xref: /petsc/src/mat/impls/aij/seq/crl/crl.c (revision 9566063d113dddea24716c546802770db7481bc0)
181824310SBarry Smith 
281824310SBarry Smith /*
381824310SBarry Smith   Defines a matrix-vector product for the MATSEQAIJCRL matrix class.
481824310SBarry Smith   This class is derived from the MATSEQAIJ class and retains the
581824310SBarry Smith   compressed row storage (aka Yale sparse matrix format) but augments
681824310SBarry Smith   it with a column oriented storage that is more efficient for
781824310SBarry Smith   matrix vector products on Vector machines.
881824310SBarry Smith 
981824310SBarry Smith   CRL stands for constant row length (that is the same number of columns
1081824310SBarry Smith   is kept (padded with zeros) for each row of the sparse matrix.
1181824310SBarry Smith */
12c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/crl/crl.h>
1381824310SBarry Smith 
145a11e1b2SBarry Smith PetscErrorCode MatDestroy_SeqAIJCRL(Mat A)
1581824310SBarry Smith {
165a11e1b2SBarry Smith   Mat_AIJCRL     *aijcrl = (Mat_AIJCRL*) A->spptr;
1781824310SBarry Smith 
18362febeeSStefano Zampini   PetscFunctionBegin;
195a11e1b2SBarry Smith   /* Free everything in the Mat_AIJCRL data structure. */
20bf0cc555SLisandro Dalcin   if (aijcrl) {
21*9566063dSJacob Faibussowitsch     PetscCall(PetscFree2(aijcrl->acols,aijcrl->icols));
22bf0cc555SLisandro Dalcin   }
23*9566063dSJacob Faibussowitsch   PetscCall(PetscFree(A->spptr));
24*9566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ));
25*9566063dSJacob Faibussowitsch   PetscCall(MatDestroy_SeqAIJ(A));
2681824310SBarry Smith   PetscFunctionReturn(0);
2781824310SBarry Smith }
2881824310SBarry Smith 
295a11e1b2SBarry Smith PetscErrorCode MatDuplicate_AIJCRL(Mat A, MatDuplicateOption op, Mat *M)
3081824310SBarry Smith {
315a11e1b2SBarry Smith   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot duplicate AIJCRL matrices yet");
3281824310SBarry Smith }
3381824310SBarry Smith 
3496b95a6bSBarry Smith PetscErrorCode MatSeqAIJCRL_create_aijcrl(Mat A)
3581824310SBarry Smith {
3681824310SBarry Smith   Mat_SeqAIJ     *a      = (Mat_SeqAIJ*)(A)->data;
375a11e1b2SBarry Smith   Mat_AIJCRL     *aijcrl = (Mat_AIJCRL*) A->spptr;
38d0f46423SBarry Smith   PetscInt       m       = A->rmap->n; /* Number of rows in the matrix. */
3981824310SBarry Smith   PetscInt       *aj     = a->j; /* From the CSR representation; points to the beginning  of each row. */
4081824310SBarry Smith   PetscInt       i, j,rmax = a->rmax,*icols, *ilen = a->ilen;
41dd6ea824SBarry Smith   MatScalar      *aa = a->a;
42dd6ea824SBarry Smith   PetscScalar    *acols;
4381824310SBarry Smith 
4481824310SBarry Smith   PetscFunctionBegin;
455a11e1b2SBarry Smith   aijcrl->nz   = a->nz;
465a11e1b2SBarry Smith   aijcrl->m    = A->rmap->n;
475a11e1b2SBarry Smith   aijcrl->rmax = rmax;
482205254eSKarl Rupp 
49*9566063dSJacob Faibussowitsch   PetscCall(PetscFree2(aijcrl->acols,aijcrl->icols));
50*9566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(rmax*m,&aijcrl->acols,rmax*m,&aijcrl->icols));
515a11e1b2SBarry Smith   acols = aijcrl->acols;
525a11e1b2SBarry Smith   icols = aijcrl->icols;
5381824310SBarry Smith   for (i=0; i<m; i++) {
5481824310SBarry Smith     for (j=0; j<ilen[i]; j++) {
5581824310SBarry Smith       acols[j*m+i] = *aa++;
5681824310SBarry Smith       icols[j*m+i] = *aj++;
5781824310SBarry Smith     }
5881824310SBarry Smith     for (; j<rmax; j++) { /* empty column entries */
5981824310SBarry Smith       acols[j*m+i] = 0.0;
6081824310SBarry Smith       icols[j*m+i] = (j) ? icols[(j-1)*m+i] : 0;  /* handle case where row is EMPTY */
6181824310SBarry Smith     }
6281824310SBarry Smith   }
63*9566063dSJacob Faibussowitsch   PetscCall(PetscInfo(A,"Percentage of 0's introduced for vectorized multiply %g. Rmax= %" PetscInt_FMT "\n",1.0-((double)a->nz)/((double)(rmax*m)),rmax));
6481824310SBarry Smith   PetscFunctionReturn(0);
6581824310SBarry Smith }
6681824310SBarry Smith 
675a11e1b2SBarry Smith PetscErrorCode MatAssemblyEnd_SeqAIJCRL(Mat A, MatAssemblyType mode)
6881824310SBarry Smith {
6981824310SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
7081824310SBarry Smith 
71c02b24c5SBarry Smith   PetscFunctionBegin;
7281824310SBarry Smith   a->inode.use = PETSC_FALSE;
732205254eSKarl Rupp 
74*9566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd_SeqAIJ(A,mode));
754723c594SBarry Smith   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
7681824310SBarry Smith 
7781824310SBarry Smith   /* Now calculate the permutation and grouping information. */
78*9566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJCRL_create_aijcrl(A));
7981824310SBarry Smith   PetscFunctionReturn(0);
8081824310SBarry Smith }
8181824310SBarry Smith 
82c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/crl/ftn-kernels/fmultcrl.h>
835aeacdfdSBarry Smith 
844723c594SBarry Smith /*
855a11e1b2SBarry Smith     Shared by both sequential and parallel versions of CRL matrix: MATMPIAIJCRL and MATSEQAIJCRL
864723c594SBarry Smith     - the scatter is used only in the parallel version
874723c594SBarry Smith 
884723c594SBarry Smith */
895a11e1b2SBarry Smith PetscErrorCode MatMult_AIJCRL(Mat A,Vec xx,Vec yy)
9081824310SBarry Smith {
915a11e1b2SBarry Smith   Mat_AIJCRL        *aijcrl = (Mat_AIJCRL*) A->spptr;
925a11e1b2SBarry Smith   PetscInt          m       = aijcrl->m; /* Number of rows in the matrix. */
935a11e1b2SBarry Smith   PetscInt          rmax    = aijcrl->rmax,*icols = aijcrl->icols;
945a11e1b2SBarry Smith   PetscScalar       *acols  = aijcrl->acols;
95d9ca1df4SBarry Smith   PetscScalar       *y;
96d9ca1df4SBarry Smith   const PetscScalar *x;
9781824310SBarry Smith #if !defined(PETSC_USE_FORTRAN_KERNEL_MULTCRL)
9881824310SBarry Smith   PetscInt          i,j,ii;
9981824310SBarry Smith #endif
10081824310SBarry Smith 
10181824310SBarry Smith #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
10281824310SBarry Smith #pragma disjoint(*x,*y,*aa)
10381824310SBarry Smith #endif
10481824310SBarry Smith 
10581824310SBarry Smith   PetscFunctionBegin;
1065a11e1b2SBarry Smith   if (aijcrl->xscat) {
107*9566063dSJacob Faibussowitsch     PetscCall(VecCopy(xx,aijcrl->xwork));
108c02b24c5SBarry Smith     /* get remote values needed for local part of multiply */
109*9566063dSJacob Faibussowitsch     PetscCall(VecScatterBegin(aijcrl->xscat,xx,aijcrl->fwork,INSERT_VALUES,SCATTER_FORWARD));
110*9566063dSJacob Faibussowitsch     PetscCall(VecScatterEnd(aijcrl->xscat,xx,aijcrl->fwork,INSERT_VALUES,SCATTER_FORWARD));
1115a11e1b2SBarry Smith     xx   = aijcrl->xwork;
1122205254eSKarl Rupp   }
113c02b24c5SBarry Smith 
114*9566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx,&x));
115*9566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy,&y));
11681824310SBarry Smith 
11781824310SBarry Smith #if defined(PETSC_USE_FORTRAN_KERNEL_MULTCRL)
11881824310SBarry Smith   fortranmultcrl_(&m,&rmax,x,y,icols,acols);
11981824310SBarry Smith #else
12081824310SBarry Smith 
1214be65460SBarry Smith   /* first column */
1222205254eSKarl Rupp   for (j=0; j<m; j++) y[j] = acols[j]*x[icols[j]];
1234be65460SBarry Smith 
1244be65460SBarry Smith   /* other columns */
125b409243cSBarry Smith #if defined(PETSC_HAVE_CRAY_VECTOR)
12681824310SBarry Smith #pragma _CRI preferstream
12781824310SBarry Smith #endif
1284be65460SBarry Smith   for (i=1; i<rmax; i++) {
12981824310SBarry Smith     ii = i*m;
130b409243cSBarry Smith #if defined(PETSC_HAVE_CRAY_VECTOR)
13181824310SBarry Smith #pragma _CRI prefervector
13281824310SBarry Smith #endif
1332205254eSKarl Rupp     for (j=0; j<m; j++) y[j] = y[j] + acols[ii+j]*x[icols[ii+j]];
13481824310SBarry Smith   }
135b409243cSBarry Smith #if defined(PETSC_HAVE_CRAY_VECTOR)
13681824310SBarry Smith #pragma _CRI ivdep
13781824310SBarry Smith #endif
13881824310SBarry Smith 
13981824310SBarry Smith #endif
140*9566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0*aijcrl->nz - m));
141*9566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx,&x));
142*9566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy,&y));
14381824310SBarry Smith   PetscFunctionReturn(0);
14481824310SBarry Smith }
14581824310SBarry Smith 
1465a11e1b2SBarry Smith /* MatConvert_SeqAIJ_SeqAIJCRL converts a SeqAIJ matrix into a
1475a11e1b2SBarry Smith  * SeqAIJCRL matrix.  This routine is called by the MatCreate_SeqAIJCRL()
14881824310SBarry Smith  * routine, but can also be used to convert an assembled SeqAIJ matrix
1495a11e1b2SBarry Smith  * into a SeqAIJCRL one. */
150cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJCRL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
15181824310SBarry Smith {
15281824310SBarry Smith   Mat            B = *newmat;
1535a11e1b2SBarry Smith   Mat_AIJCRL     *aijcrl;
1544099cc6bSBarry Smith   PetscBool      sametype;
15581824310SBarry Smith 
15681824310SBarry Smith   PetscFunctionBegin;
15781824310SBarry Smith   if (reuse == MAT_INITIAL_MATRIX) {
158*9566063dSJacob Faibussowitsch     PetscCall(MatDuplicate(A,MAT_COPY_VALUES,&B));
15981824310SBarry Smith   }
160*9566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)A,type,&sametype));
1614099cc6bSBarry Smith   if (sametype) PetscFunctionReturn(0);
16281824310SBarry Smith 
163*9566063dSJacob Faibussowitsch   PetscCall(PetscNewLog(B,&aijcrl));
1645a11e1b2SBarry Smith   B->spptr = (void*) aijcrl;
16581824310SBarry Smith 
16617667f90SBarry Smith   /* Set function pointers for methods that we inherit from AIJ but override. */
1675a11e1b2SBarry Smith   B->ops->duplicate   = MatDuplicate_AIJCRL;
1685a11e1b2SBarry Smith   B->ops->assemblyend = MatAssemblyEnd_SeqAIJCRL;
1695a11e1b2SBarry Smith   B->ops->destroy     = MatDestroy_SeqAIJCRL;
1705a11e1b2SBarry Smith   B->ops->mult        = MatMult_AIJCRL;
17181824310SBarry Smith 
17281824310SBarry Smith   /* If A has already been assembled, compute the permutation. */
1734eeb8337SBarry Smith   if (A->assembled) {
174*9566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJCRL_create_aijcrl(B));
17581824310SBarry Smith   }
176*9566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJCRL));
17781824310SBarry Smith   *newmat = B;
17881824310SBarry Smith   PetscFunctionReturn(0);
17981824310SBarry Smith }
18081824310SBarry Smith 
18181824310SBarry Smith /*@C
1825a11e1b2SBarry Smith    MatCreateSeqAIJCRL - Creates a sparse matrix of type SEQAIJCRL.
18381824310SBarry Smith    This type inherits from AIJ, but stores some additional
18481824310SBarry Smith    information that is used to allow better vectorization of
18581824310SBarry Smith    the matrix-vector product. At the cost of increased storage, the AIJ formatted
18681824310SBarry Smith    matrix can be copied to a format in which pieces of the matrix are
18781824310SBarry Smith    stored in ELLPACK format, allowing the vectorized matrix multiply
18881824310SBarry Smith    routine to use stride-1 memory accesses.  As with the AIJ type, it is
18981824310SBarry Smith    important to preallocate matrix storage in order to get good assembly
19081824310SBarry Smith    performance.
19181824310SBarry Smith 
192d083f849SBarry Smith    Collective
19381824310SBarry Smith 
19481824310SBarry Smith    Input Parameters:
19581824310SBarry Smith +  comm - MPI communicator, set to PETSC_COMM_SELF
19681824310SBarry Smith .  m - number of rows
19781824310SBarry Smith .  n - number of columns
19881824310SBarry Smith .  nz - number of nonzeros per row (same for all rows)
19981824310SBarry Smith -  nnz - array containing the number of nonzeros in the various rows
2000298fd71SBarry Smith          (possibly different for each row) or NULL
20181824310SBarry Smith 
20281824310SBarry Smith    Output Parameter:
20381824310SBarry Smith .  A - the matrix
20481824310SBarry Smith 
20581824310SBarry Smith    Notes:
20681824310SBarry Smith    If nnz is given then nz is ignored
20781824310SBarry Smith 
20881824310SBarry Smith    Level: intermediate
20981824310SBarry Smith 
2105a11e1b2SBarry Smith .seealso: MatCreate(), MatCreateMPIAIJPERM(), MatSetValues()
21181824310SBarry Smith @*/
2127087cfbeSBarry Smith PetscErrorCode  MatCreateSeqAIJCRL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
21381824310SBarry Smith {
21481824310SBarry Smith   PetscFunctionBegin;
215*9566063dSJacob Faibussowitsch   PetscCall(MatCreate(comm,A));
216*9566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(*A,m,n,m,n));
217*9566063dSJacob Faibussowitsch   PetscCall(MatSetType(*A,MATSEQAIJCRL));
218*9566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz));
21981824310SBarry Smith   PetscFunctionReturn(0);
22081824310SBarry Smith }
22181824310SBarry Smith 
2228cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJCRL(Mat A)
22381824310SBarry Smith {
22481824310SBarry Smith   PetscFunctionBegin;
225*9566063dSJacob Faibussowitsch   PetscCall(MatSetType(A,MATSEQAIJ));
226*9566063dSJacob Faibussowitsch   PetscCall(MatConvert_SeqAIJ_SeqAIJCRL(A,MATSEQAIJCRL,MAT_INPLACE_MATRIX,&A));
22781824310SBarry Smith   PetscFunctionReturn(0);
22881824310SBarry Smith }
229