xref: /petsc/src/mat/impls/aij/seq/crl/crl.c (revision 4eeb83376500adddb63404cb4ec47dfc83fb8ca7)
181824310SBarry Smith #define PETSCMAT_DLL
281824310SBarry Smith 
381824310SBarry Smith /*
481824310SBarry Smith   Defines a matrix-vector product for the MATSEQAIJCRL matrix class.
581824310SBarry Smith   This class is derived from the MATSEQAIJ class and retains the
681824310SBarry Smith   compressed row storage (aka Yale sparse matrix format) but augments
781824310SBarry Smith   it with a column oriented storage that is more efficient for
881824310SBarry Smith   matrix vector products on Vector machines.
981824310SBarry Smith 
1081824310SBarry Smith   CRL stands for constant row length (that is the same number of columns
1181824310SBarry Smith   is kept (padded with zeros) for each row of the sparse matrix.
1281824310SBarry Smith */
137c4f633dSBarry Smith #include "../src/mat/impls/aij/seq/crl/crl.h"
1481824310SBarry Smith 
1581824310SBarry Smith #undef __FUNCT__
1681824310SBarry Smith #define __FUNCT__ "MatDestroy_SeqCRL"
1781824310SBarry Smith PetscErrorCode MatDestroy_SeqCRL(Mat A)
1881824310SBarry Smith {
1981824310SBarry Smith   PetscErrorCode ierr;
20c02b24c5SBarry Smith   Mat_CRL        *crl = (Mat_CRL *) A->spptr;
2181824310SBarry Smith 
2281824310SBarry Smith   /* We are going to convert A back into a SEQAIJ matrix, since we are
23c02b24c5SBarry Smith    * eventually going to use MatDestroy() to destroy everything
2481824310SBarry Smith    * that is not specific to CRL.
2581824310SBarry Smith    * In preparation for this, reset the operations pointers in A to
2681824310SBarry Smith    * their SeqAIJ versions. */
27c02b24c5SBarry Smith   A->ops->assemblyend = crl->AssemblyEnd;
28c02b24c5SBarry Smith   A->ops->destroy     = crl->MatDestroy;
29c02b24c5SBarry Smith   A->ops->duplicate   = crl->MatDuplicate;
3081824310SBarry Smith 
31c02b24c5SBarry Smith   /* Free everything in the Mat_CRL data structure. */
3281824310SBarry Smith   ierr = PetscFree2(crl->acols,crl->icols);CHKERRQ(ierr);
3305b42c5fSBarry Smith 
34c02b24c5SBarry Smith   /* Change the type of A back to SEQAIJ and use MatDestroy()
3581824310SBarry Smith    * to destroy everything that remains. */
3681824310SBarry Smith   ierr = PetscObjectChangeTypeName( (PetscObject)A, MATSEQAIJ);CHKERRQ(ierr);
3781824310SBarry Smith   /* Note that I don't call MatSetType().  I believe this is because that
3881824310SBarry Smith    * is only to be called when *building* a matrix. */
3981824310SBarry Smith   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
4081824310SBarry Smith   PetscFunctionReturn(0);
4181824310SBarry Smith }
4281824310SBarry Smith 
43c02b24c5SBarry Smith PetscErrorCode MatDuplicate_CRL(Mat A, MatDuplicateOption op, Mat *M)
4481824310SBarry Smith {
4581824310SBarry Smith   PetscErrorCode ierr;
46c02b24c5SBarry Smith   Mat_CRL        *crl = (Mat_CRL *) A->spptr;
4781824310SBarry Smith 
4881824310SBarry Smith   PetscFunctionBegin;
49c02b24c5SBarry Smith   ierr = (*crl->MatDuplicate)(A,op,M);CHKERRQ(ierr);
5081824310SBarry Smith   SETERRQ(PETSC_ERR_SUP,"Cannot duplicate CRL matrices yet");
5181824310SBarry Smith   PetscFunctionReturn(0);
5281824310SBarry Smith }
5381824310SBarry Smith 
5481824310SBarry Smith #undef __FUNCT__
5581824310SBarry Smith #define __FUNCT__ "SeqCRL_create_crl"
5681824310SBarry Smith PetscErrorCode SeqCRL_create_crl(Mat A)
5781824310SBarry Smith {
5881824310SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ *)(A)->data;
59c02b24c5SBarry Smith   Mat_CRL        *crl = (Mat_CRL*) A->spptr;
60d0f46423SBarry Smith   PetscInt       m = A->rmap->n;  /* Number of rows in the matrix. */
6181824310SBarry Smith   PetscInt       *aj = a->j;  /* From the CSR representation; points to the beginning  of each row. */
6281824310SBarry Smith   PetscInt       i, j,rmax = a->rmax,*icols, *ilen = a->ilen;
63dd6ea824SBarry Smith   MatScalar      *aa = a->a;
64dd6ea824SBarry Smith   PetscScalar    *acols;
6581824310SBarry Smith   PetscErrorCode ierr;
6681824310SBarry Smith 
6781824310SBarry Smith   PetscFunctionBegin;
68c02b24c5SBarry Smith   crl->nz   = a->nz;
69d0f46423SBarry Smith   crl->m    = A->rmap->n;
70c02b24c5SBarry Smith   crl->rmax = rmax;
7181824310SBarry Smith   ierr  = PetscMalloc2(rmax*m,PetscScalar,&crl->acols,rmax*m,PetscInt,&crl->icols);CHKERRQ(ierr);
7281824310SBarry Smith   acols = crl->acols;
7381824310SBarry Smith   icols = crl->icols;
7481824310SBarry Smith   for (i=0; i<m; i++) {
7581824310SBarry Smith     for (j=0; j<ilen[i]; j++) {
7681824310SBarry Smith       acols[j*m+i] = *aa++;
7781824310SBarry Smith       icols[j*m+i] = *aj++;
7881824310SBarry Smith     }
7981824310SBarry Smith     for (;j<rmax; j++) { /* empty column entries */
8081824310SBarry Smith       acols[j*m+i] = 0.0;
8181824310SBarry Smith       icols[j*m+i] = (j) ? icols[(j-1)*m+i] : 0;  /* handle case where row is EMPTY */
8281824310SBarry Smith     }
8381824310SBarry Smith   }
84ae15b995SBarry Smith   ierr = PetscInfo2(A,"Percentage of 0's introduced for vectorized multiply %G. Rmax= %D\n",1.0-((double)a->nz)/((double)(rmax*m)),rmax);
8581824310SBarry Smith   PetscFunctionReturn(0);
8681824310SBarry Smith }
8781824310SBarry Smith 
8881824310SBarry Smith #undef __FUNCT__
8981824310SBarry Smith #define __FUNCT__ "MatAssemblyEnd_SeqCRL"
9081824310SBarry Smith PetscErrorCode MatAssemblyEnd_SeqCRL(Mat A, MatAssemblyType mode)
9181824310SBarry Smith {
9281824310SBarry Smith   PetscErrorCode ierr;
93c02b24c5SBarry Smith   Mat_CRL        *crl = (Mat_CRL*) A->spptr;
9481824310SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
9581824310SBarry Smith 
96c02b24c5SBarry Smith   PetscFunctionBegin;
9781824310SBarry Smith   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
9881824310SBarry Smith 
9981824310SBarry Smith   /* Since a MATSEQCRL matrix is really just a MATSEQAIJ with some
10081824310SBarry Smith    * extra information, call the AssemblyEnd routine for a MATSEQAIJ.
10181824310SBarry Smith    * I'm not sure if this is the best way to do this, but it avoids
10281824310SBarry Smith    * a lot of code duplication.
10381824310SBarry Smith    * I also note that currently MATSEQCRL doesn't know anything about
10481824310SBarry Smith    * the Mat_CompressedRow data structure that SeqAIJ now uses when there
10581824310SBarry Smith    * are many zero rows.  If the SeqAIJ assembly end routine decides to use
10681824310SBarry Smith    * this, this may break things.  (Don't know... haven't looked at it.) */
10781824310SBarry Smith   a->inode.use = PETSC_FALSE;
108c02b24c5SBarry Smith   (*crl->AssemblyEnd)(A, mode);
10981824310SBarry Smith 
11081824310SBarry Smith   /* Now calculate the permutation and grouping information. */
11181824310SBarry Smith   ierr = SeqCRL_create_crl(A);CHKERRQ(ierr);
11281824310SBarry Smith   PetscFunctionReturn(0);
11381824310SBarry Smith }
11481824310SBarry Smith 
1157c4f633dSBarry Smith #include "../src/inline/dot.h"
1165aeacdfdSBarry Smith 
11781824310SBarry Smith #undef __FUNCT__
118c02b24c5SBarry Smith #define __FUNCT__ "MatMult_CRL"
119c02b24c5SBarry Smith PetscErrorCode MatMult_CRL(Mat A,Vec xx,Vec yy)
12081824310SBarry Smith {
121c02b24c5SBarry Smith   Mat_CRL        *crl = (Mat_CRL*) A->spptr;
122c02b24c5SBarry Smith   PetscInt       m = crl->m;  /* Number of rows in the matrix. */
123c02b24c5SBarry Smith   PetscInt       rmax = crl->rmax,*icols = crl->icols;
12481824310SBarry Smith   PetscScalar    *acols = crl->acols;
12581824310SBarry Smith   PetscErrorCode ierr;
12681824310SBarry Smith   PetscScalar    *x,*y;
12781824310SBarry Smith #if !defined(PETSC_USE_FORTRAN_KERNEL_MULTCRL)
12881824310SBarry Smith   PetscInt       i,j,ii;
12981824310SBarry Smith #endif
13081824310SBarry Smith 
13181824310SBarry Smith 
13281824310SBarry Smith #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
13381824310SBarry Smith #pragma disjoint(*x,*y,*aa)
13481824310SBarry Smith #endif
13581824310SBarry Smith 
13681824310SBarry Smith   PetscFunctionBegin;
137c02b24c5SBarry Smith   if (crl->xscat) {
138c02b24c5SBarry Smith     ierr = VecCopy(xx,crl->xwork);CHKERRQ(ierr);
139c02b24c5SBarry Smith     /* get remote values needed for local part of multiply */
140ca9f406cSSatish Balay     ierr = VecScatterBegin(crl->xscat,xx,crl->fwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
141ca9f406cSSatish Balay     ierr = VecScatterEnd(crl->xscat,xx,crl->fwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
142c02b24c5SBarry Smith     xx = crl->xwork;
143c02b24c5SBarry Smith   };
144c02b24c5SBarry Smith 
14581824310SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
14681824310SBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
14781824310SBarry Smith 
14881824310SBarry Smith #if defined(PETSC_USE_FORTRAN_KERNEL_MULTCRL)
14981824310SBarry Smith   fortranmultcrl_(&m,&rmax,x,y,icols,acols);
15081824310SBarry Smith #else
15181824310SBarry Smith 
1524be65460SBarry Smith   /* first column */
1534be65460SBarry Smith   for (j=0; j<m; j++) {
1544be65460SBarry Smith     y[j] = acols[j]*x[icols[j]];
1554be65460SBarry Smith   }
1564be65460SBarry Smith 
1574be65460SBarry Smith   /* other columns */
15881824310SBarry Smith #if defined(PETSC_HAVE_CRAYC)
15981824310SBarry Smith #pragma _CRI preferstream
16081824310SBarry Smith #endif
1614be65460SBarry Smith   for (i=1; i<rmax; i++) {
16281824310SBarry Smith     ii = i*m;
16381824310SBarry Smith #if defined(PETSC_HAVE_CRAYC)
16481824310SBarry Smith #pragma _CRI prefervector
16581824310SBarry Smith #endif
16681824310SBarry Smith     for (j=0; j<m; j++) {
16781824310SBarry Smith       y[j] = y[j] + acols[ii+j]*x[icols[ii+j]];
16881824310SBarry Smith     }
16981824310SBarry Smith   }
17081824310SBarry Smith #if defined(PETSC_HAVE_CRAYC)
17181824310SBarry Smith #pragma _CRI ivdep
17281824310SBarry Smith #endif
17381824310SBarry Smith 
17481824310SBarry Smith #endif
175483e0693SBarry Smith   ierr = PetscLogFlops(2*crl->nz - m);CHKERRQ(ierr);
17681824310SBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
17781824310SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
17881824310SBarry Smith   PetscFunctionReturn(0);
17981824310SBarry Smith }
18081824310SBarry Smith 
18181824310SBarry Smith 
18281824310SBarry Smith /* MatConvert_SeqAIJ_SeqCRL converts a SeqAIJ matrix into a
18381824310SBarry Smith  * SeqCRL matrix.  This routine is called by the MatCreate_SeqCRL()
18481824310SBarry Smith  * routine, but can also be used to convert an assembled SeqAIJ matrix
18581824310SBarry Smith  * into a SeqCRL one. */
18681824310SBarry Smith EXTERN_C_BEGIN
18781824310SBarry Smith #undef __FUNCT__
18881824310SBarry Smith #define __FUNCT__ "MatConvert_SeqAIJ_SeqCRL"
1898cf70c4bSSatish Balay PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqAIJ_SeqCRL(Mat A,const MatType type,MatReuse reuse,Mat *newmat)
19081824310SBarry Smith {
19181824310SBarry Smith   PetscErrorCode ierr;
19281824310SBarry Smith   Mat            B = *newmat;
193c02b24c5SBarry Smith   Mat_CRL        *crl;
19481824310SBarry Smith 
19581824310SBarry Smith   PetscFunctionBegin;
19681824310SBarry Smith   if (reuse == MAT_INITIAL_MATRIX) {
19781824310SBarry Smith     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
19881824310SBarry Smith   }
19981824310SBarry Smith 
20038f2d2fdSLisandro Dalcin   ierr = PetscNewLog(B,Mat_CRL,&crl);CHKERRQ(ierr);
20181824310SBarry Smith   B->spptr = (void *) crl;
20281824310SBarry Smith 
203c02b24c5SBarry Smith   crl->AssemblyEnd  = A->ops->assemblyend;
204c02b24c5SBarry Smith   crl->MatDestroy   = A->ops->destroy;
205c02b24c5SBarry Smith   crl->MatDuplicate = A->ops->duplicate;
20681824310SBarry Smith 
20717667f90SBarry Smith   /* Set function pointers for methods that we inherit from AIJ but override. */
208c02b24c5SBarry Smith   B->ops->duplicate   = MatDuplicate_CRL;
20981824310SBarry Smith   B->ops->assemblyend = MatAssemblyEnd_SeqCRL;
21081824310SBarry Smith   B->ops->destroy     = MatDestroy_SeqCRL;
211c02b24c5SBarry Smith   B->ops->mult        = MatMult_CRL;
21281824310SBarry Smith 
21381824310SBarry Smith   /* If A has already been assembled, compute the permutation. */
214*4eeb8337SBarry Smith   if (A->assembled) {
21581824310SBarry Smith     ierr = SeqCRL_create_crl(B);CHKERRQ(ierr);
21681824310SBarry Smith   }
21781824310SBarry Smith   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQCRL);CHKERRQ(ierr);
21881824310SBarry Smith   *newmat = B;
21981824310SBarry Smith   PetscFunctionReturn(0);
22081824310SBarry Smith }
22181824310SBarry Smith EXTERN_C_END
22281824310SBarry Smith 
22381824310SBarry Smith 
22481824310SBarry Smith #undef __FUNCT__
22581824310SBarry Smith #define __FUNCT__ "MatCreateSeqCRL"
22681824310SBarry Smith /*@C
22781824310SBarry Smith    MatCreateSeqCRL - Creates a sparse matrix of type SEQCRL.
22881824310SBarry Smith    This type inherits from AIJ, but stores some additional
22981824310SBarry Smith    information that is used to allow better vectorization of
23081824310SBarry Smith    the matrix-vector product. At the cost of increased storage, the AIJ formatted
23181824310SBarry Smith    matrix can be copied to a format in which pieces of the matrix are
23281824310SBarry Smith    stored in ELLPACK format, allowing the vectorized matrix multiply
23381824310SBarry Smith    routine to use stride-1 memory accesses.  As with the AIJ type, it is
23481824310SBarry Smith    important to preallocate matrix storage in order to get good assembly
23581824310SBarry Smith    performance.
23681824310SBarry Smith 
23781824310SBarry Smith    Collective on MPI_Comm
23881824310SBarry Smith 
23981824310SBarry Smith    Input Parameters:
24081824310SBarry Smith +  comm - MPI communicator, set to PETSC_COMM_SELF
24181824310SBarry Smith .  m - number of rows
24281824310SBarry Smith .  n - number of columns
24381824310SBarry Smith .  nz - number of nonzeros per row (same for all rows)
24481824310SBarry Smith -  nnz - array containing the number of nonzeros in the various rows
24581824310SBarry Smith          (possibly different for each row) or PETSC_NULL
24681824310SBarry Smith 
24781824310SBarry Smith    Output Parameter:
24881824310SBarry Smith .  A - the matrix
24981824310SBarry Smith 
25081824310SBarry Smith    Notes:
25181824310SBarry Smith    If nnz is given then nz is ignored
25281824310SBarry Smith 
25381824310SBarry Smith    Level: intermediate
25481824310SBarry Smith 
25581824310SBarry Smith .keywords: matrix, cray, sparse, parallel
25681824310SBarry Smith 
25781824310SBarry Smith .seealso: MatCreate(), MatCreateMPICSRPERM(), MatSetValues()
25881824310SBarry Smith @*/
25981824310SBarry Smith PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqCRL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
26081824310SBarry Smith {
26181824310SBarry Smith   PetscErrorCode ierr;
26281824310SBarry Smith 
26381824310SBarry Smith   PetscFunctionBegin;
26481824310SBarry Smith   ierr = MatCreate(comm,A);CHKERRQ(ierr);
26581824310SBarry Smith   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
26681824310SBarry Smith   ierr = MatSetType(*A,MATSEQCRL);CHKERRQ(ierr);
26781824310SBarry Smith   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,(PetscInt*)nnz);CHKERRQ(ierr);
26881824310SBarry Smith   PetscFunctionReturn(0);
26981824310SBarry Smith }
27081824310SBarry Smith 
27181824310SBarry Smith 
27281824310SBarry Smith EXTERN_C_BEGIN
27381824310SBarry Smith #undef __FUNCT__
27481824310SBarry Smith #define __FUNCT__ "MatCreate_SeqCRL"
27581824310SBarry Smith PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqCRL(Mat A)
27681824310SBarry Smith {
27781824310SBarry Smith   PetscErrorCode ierr;
27881824310SBarry Smith 
27981824310SBarry Smith   PetscFunctionBegin;
28081824310SBarry Smith   ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
28181824310SBarry Smith   ierr = MatConvert_SeqAIJ_SeqCRL(A,MATSEQCRL,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
28281824310SBarry Smith   PetscFunctionReturn(0);
28381824310SBarry Smith }
28481824310SBarry Smith EXTERN_C_END
28581824310SBarry Smith 
286