xref: /petsc/src/mat/impls/aij/seq/crl/crl.c (revision 362febeeeb69b91ebadcb4b2dc0a22cb6dfc4097)
181824310SBarry Smith 
281824310SBarry Smith /*
381824310SBarry Smith   Defines a matrix-vector product for the MATSEQAIJCRL matrix class.
481824310SBarry Smith   This class is derived from the MATSEQAIJ class and retains the
581824310SBarry Smith   compressed row storage (aka Yale sparse matrix format) but augments
681824310SBarry Smith   it with a column oriented storage that is more efficient for
781824310SBarry Smith   matrix vector products on Vector machines.
881824310SBarry Smith 
981824310SBarry Smith   CRL stands for constant row length (that is the same number of columns
1081824310SBarry Smith   is kept (padded with zeros) for each row of the sparse matrix.
1181824310SBarry Smith */
12c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/crl/crl.h>
1381824310SBarry Smith 
145a11e1b2SBarry Smith PetscErrorCode MatDestroy_SeqAIJCRL(Mat A)
1581824310SBarry Smith {
1681824310SBarry Smith   PetscErrorCode ierr;
175a11e1b2SBarry Smith   Mat_AIJCRL     *aijcrl = (Mat_AIJCRL*) A->spptr;
1881824310SBarry Smith 
19*362febeeSStefano Zampini   PetscFunctionBegin;
205a11e1b2SBarry Smith   /* Free everything in the Mat_AIJCRL data structure. */
21bf0cc555SLisandro Dalcin   if (aijcrl) {
225a11e1b2SBarry Smith     ierr = PetscFree2(aijcrl->acols,aijcrl->icols);CHKERRQ(ierr);
23bf0cc555SLisandro Dalcin   }
24c31cb41cSBarry Smith   ierr = PetscFree(A->spptr);CHKERRQ(ierr);
2581824310SBarry Smith   ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr);
264723c594SBarry Smith   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
2781824310SBarry Smith   PetscFunctionReturn(0);
2881824310SBarry Smith }
2981824310SBarry Smith 
305a11e1b2SBarry Smith PetscErrorCode MatDuplicate_AIJCRL(Mat A, MatDuplicateOption op, Mat *M)
3181824310SBarry Smith {
325a11e1b2SBarry Smith   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot duplicate AIJCRL matrices yet");
3381824310SBarry Smith }
3481824310SBarry Smith 
3596b95a6bSBarry Smith PetscErrorCode MatSeqAIJCRL_create_aijcrl(Mat A)
3681824310SBarry Smith {
3781824310SBarry Smith   Mat_SeqAIJ     *a      = (Mat_SeqAIJ*)(A)->data;
385a11e1b2SBarry Smith   Mat_AIJCRL     *aijcrl = (Mat_AIJCRL*) A->spptr;
39d0f46423SBarry Smith   PetscInt       m       = A->rmap->n; /* Number of rows in the matrix. */
4081824310SBarry Smith   PetscInt       *aj     = a->j; /* From the CSR representation; points to the beginning  of each row. */
4181824310SBarry Smith   PetscInt       i, j,rmax = a->rmax,*icols, *ilen = a->ilen;
42dd6ea824SBarry Smith   MatScalar      *aa = a->a;
43dd6ea824SBarry Smith   PetscScalar    *acols;
4481824310SBarry Smith   PetscErrorCode ierr;
4581824310SBarry Smith 
4681824310SBarry Smith   PetscFunctionBegin;
475a11e1b2SBarry Smith   aijcrl->nz   = a->nz;
485a11e1b2SBarry Smith   aijcrl->m    = A->rmap->n;
495a11e1b2SBarry Smith   aijcrl->rmax = rmax;
502205254eSKarl Rupp 
515a11e1b2SBarry Smith   ierr  = PetscFree2(aijcrl->acols,aijcrl->icols);CHKERRQ(ierr);
52dcca6d9dSJed Brown   ierr  = PetscMalloc2(rmax*m,&aijcrl->acols,rmax*m,&aijcrl->icols);CHKERRQ(ierr);
535a11e1b2SBarry Smith   acols = aijcrl->acols;
545a11e1b2SBarry Smith   icols = aijcrl->icols;
5581824310SBarry Smith   for (i=0; i<m; i++) {
5681824310SBarry Smith     for (j=0; j<ilen[i]; j++) {
5781824310SBarry Smith       acols[j*m+i] = *aa++;
5881824310SBarry Smith       icols[j*m+i] = *aj++;
5981824310SBarry Smith     }
6081824310SBarry Smith     for (; j<rmax; j++) { /* empty column entries */
6181824310SBarry Smith       acols[j*m+i] = 0.0;
6281824310SBarry Smith       icols[j*m+i] = (j) ? icols[(j-1)*m+i] : 0;  /* handle case where row is EMPTY */
6381824310SBarry Smith     }
6481824310SBarry Smith   }
65a2ea699eSBarry Smith   ierr = PetscInfo2(A,"Percentage of 0's introduced for vectorized multiply %g. Rmax= %D\n",1.0-((double)a->nz)/((double)(rmax*m)),rmax);CHKERRQ(ierr);
6681824310SBarry Smith   PetscFunctionReturn(0);
6781824310SBarry Smith }
6881824310SBarry Smith 
695a11e1b2SBarry Smith PetscErrorCode MatAssemblyEnd_SeqAIJCRL(Mat A, MatAssemblyType mode)
7081824310SBarry Smith {
7181824310SBarry Smith   PetscErrorCode ierr;
7281824310SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
7381824310SBarry Smith 
74c02b24c5SBarry Smith   PetscFunctionBegin;
7581824310SBarry Smith   a->inode.use = PETSC_FALSE;
762205254eSKarl Rupp 
774723c594SBarry Smith   ierr = MatAssemblyEnd_SeqAIJ(A,mode);CHKERRQ(ierr);
784723c594SBarry Smith   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
7981824310SBarry Smith 
8081824310SBarry Smith   /* Now calculate the permutation and grouping information. */
8196b95a6bSBarry Smith   ierr = MatSeqAIJCRL_create_aijcrl(A);CHKERRQ(ierr);
8281824310SBarry Smith   PetscFunctionReturn(0);
8381824310SBarry Smith }
8481824310SBarry Smith 
85c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/crl/ftn-kernels/fmultcrl.h>
865aeacdfdSBarry Smith 
874723c594SBarry Smith /*
885a11e1b2SBarry Smith     Shared by both sequential and parallel versions of CRL matrix: MATMPIAIJCRL and MATSEQAIJCRL
894723c594SBarry Smith     - the scatter is used only in the parallel version
904723c594SBarry Smith 
914723c594SBarry Smith */
925a11e1b2SBarry Smith PetscErrorCode MatMult_AIJCRL(Mat A,Vec xx,Vec yy)
9381824310SBarry Smith {
945a11e1b2SBarry Smith   Mat_AIJCRL        *aijcrl = (Mat_AIJCRL*) A->spptr;
955a11e1b2SBarry Smith   PetscInt          m       = aijcrl->m; /* Number of rows in the matrix. */
965a11e1b2SBarry Smith   PetscInt          rmax    = aijcrl->rmax,*icols = aijcrl->icols;
975a11e1b2SBarry Smith   PetscScalar       *acols  = aijcrl->acols;
9881824310SBarry Smith   PetscErrorCode    ierr;
99d9ca1df4SBarry Smith   PetscScalar       *y;
100d9ca1df4SBarry Smith   const PetscScalar *x;
10181824310SBarry Smith #if !defined(PETSC_USE_FORTRAN_KERNEL_MULTCRL)
10281824310SBarry Smith   PetscInt          i,j,ii;
10381824310SBarry Smith #endif
10481824310SBarry Smith 
10581824310SBarry Smith #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
10681824310SBarry Smith #pragma disjoint(*x,*y,*aa)
10781824310SBarry Smith #endif
10881824310SBarry Smith 
10981824310SBarry Smith   PetscFunctionBegin;
1105a11e1b2SBarry Smith   if (aijcrl->xscat) {
1115a11e1b2SBarry Smith     ierr = VecCopy(xx,aijcrl->xwork);CHKERRQ(ierr);
112c02b24c5SBarry Smith     /* get remote values needed for local part of multiply */
1135a11e1b2SBarry Smith     ierr = VecScatterBegin(aijcrl->xscat,xx,aijcrl->fwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1145a11e1b2SBarry Smith     ierr = VecScatterEnd(aijcrl->xscat,xx,aijcrl->fwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1155a11e1b2SBarry Smith     xx   = aijcrl->xwork;
1162205254eSKarl Rupp   }
117c02b24c5SBarry Smith 
118d9ca1df4SBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
11981824310SBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
12081824310SBarry Smith 
12181824310SBarry Smith #if defined(PETSC_USE_FORTRAN_KERNEL_MULTCRL)
12281824310SBarry Smith   fortranmultcrl_(&m,&rmax,x,y,icols,acols);
12381824310SBarry Smith #else
12481824310SBarry Smith 
1254be65460SBarry Smith   /* first column */
1262205254eSKarl Rupp   for (j=0; j<m; j++) y[j] = acols[j]*x[icols[j]];
1274be65460SBarry Smith 
1284be65460SBarry Smith   /* other columns */
129b409243cSBarry Smith #if defined(PETSC_HAVE_CRAY_VECTOR)
13081824310SBarry Smith #pragma _CRI preferstream
13181824310SBarry Smith #endif
1324be65460SBarry Smith   for (i=1; i<rmax; i++) {
13381824310SBarry Smith     ii = i*m;
134b409243cSBarry Smith #if defined(PETSC_HAVE_CRAY_VECTOR)
13581824310SBarry Smith #pragma _CRI prefervector
13681824310SBarry Smith #endif
1372205254eSKarl Rupp     for (j=0; j<m; j++) y[j] = y[j] + acols[ii+j]*x[icols[ii+j]];
13881824310SBarry Smith   }
139b409243cSBarry Smith #if defined(PETSC_HAVE_CRAY_VECTOR)
14081824310SBarry Smith #pragma _CRI ivdep
14181824310SBarry Smith #endif
14281824310SBarry Smith 
14381824310SBarry Smith #endif
1445a11e1b2SBarry Smith   ierr = PetscLogFlops(2.0*aijcrl->nz - m);CHKERRQ(ierr);
145d9ca1df4SBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
14681824310SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
14781824310SBarry Smith   PetscFunctionReturn(0);
14881824310SBarry Smith }
14981824310SBarry Smith 
1505a11e1b2SBarry Smith /* MatConvert_SeqAIJ_SeqAIJCRL converts a SeqAIJ matrix into a
1515a11e1b2SBarry Smith  * SeqAIJCRL matrix.  This routine is called by the MatCreate_SeqAIJCRL()
15281824310SBarry Smith  * routine, but can also be used to convert an assembled SeqAIJ matrix
1535a11e1b2SBarry Smith  * into a SeqAIJCRL one. */
154cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJCRL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
15581824310SBarry Smith {
15681824310SBarry Smith   PetscErrorCode ierr;
15781824310SBarry Smith   Mat            B = *newmat;
1585a11e1b2SBarry Smith   Mat_AIJCRL     *aijcrl;
1594099cc6bSBarry Smith   PetscBool      sametype;
16081824310SBarry Smith 
16181824310SBarry Smith   PetscFunctionBegin;
16281824310SBarry Smith   if (reuse == MAT_INITIAL_MATRIX) {
16381824310SBarry Smith     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
16481824310SBarry Smith   }
1654099cc6bSBarry Smith   ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr);
1664099cc6bSBarry Smith   if (sametype) PetscFunctionReturn(0);
16781824310SBarry Smith 
168b00a9115SJed Brown   ierr     = PetscNewLog(B,&aijcrl);CHKERRQ(ierr);
1695a11e1b2SBarry Smith   B->spptr = (void*) aijcrl;
17081824310SBarry Smith 
17117667f90SBarry Smith   /* Set function pointers for methods that we inherit from AIJ but override. */
1725a11e1b2SBarry Smith   B->ops->duplicate   = MatDuplicate_AIJCRL;
1735a11e1b2SBarry Smith   B->ops->assemblyend = MatAssemblyEnd_SeqAIJCRL;
1745a11e1b2SBarry Smith   B->ops->destroy     = MatDestroy_SeqAIJCRL;
1755a11e1b2SBarry Smith   B->ops->mult        = MatMult_AIJCRL;
17681824310SBarry Smith 
17781824310SBarry Smith   /* If A has already been assembled, compute the permutation. */
1784eeb8337SBarry Smith   if (A->assembled) {
17996b95a6bSBarry Smith     ierr = MatSeqAIJCRL_create_aijcrl(B);CHKERRQ(ierr);
18081824310SBarry Smith   }
1815a11e1b2SBarry Smith   ierr    = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJCRL);CHKERRQ(ierr);
18281824310SBarry Smith   *newmat = B;
18381824310SBarry Smith   PetscFunctionReturn(0);
18481824310SBarry Smith }
18581824310SBarry Smith 
18681824310SBarry Smith /*@C
1875a11e1b2SBarry Smith    MatCreateSeqAIJCRL - Creates a sparse matrix of type SEQAIJCRL.
18881824310SBarry Smith    This type inherits from AIJ, but stores some additional
18981824310SBarry Smith    information that is used to allow better vectorization of
19081824310SBarry Smith    the matrix-vector product. At the cost of increased storage, the AIJ formatted
19181824310SBarry Smith    matrix can be copied to a format in which pieces of the matrix are
19281824310SBarry Smith    stored in ELLPACK format, allowing the vectorized matrix multiply
19381824310SBarry Smith    routine to use stride-1 memory accesses.  As with the AIJ type, it is
19481824310SBarry Smith    important to preallocate matrix storage in order to get good assembly
19581824310SBarry Smith    performance.
19681824310SBarry Smith 
197d083f849SBarry Smith    Collective
19881824310SBarry Smith 
19981824310SBarry Smith    Input Parameters:
20081824310SBarry Smith +  comm - MPI communicator, set to PETSC_COMM_SELF
20181824310SBarry Smith .  m - number of rows
20281824310SBarry Smith .  n - number of columns
20381824310SBarry Smith .  nz - number of nonzeros per row (same for all rows)
20481824310SBarry Smith -  nnz - array containing the number of nonzeros in the various rows
2050298fd71SBarry Smith          (possibly different for each row) or NULL
20681824310SBarry Smith 
20781824310SBarry Smith    Output Parameter:
20881824310SBarry Smith .  A - the matrix
20981824310SBarry Smith 
21081824310SBarry Smith    Notes:
21181824310SBarry Smith    If nnz is given then nz is ignored
21281824310SBarry Smith 
21381824310SBarry Smith    Level: intermediate
21481824310SBarry Smith 
2155a11e1b2SBarry Smith .seealso: MatCreate(), MatCreateMPIAIJPERM(), MatSetValues()
21681824310SBarry Smith @*/
2177087cfbeSBarry Smith PetscErrorCode  MatCreateSeqAIJCRL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
21881824310SBarry Smith {
21981824310SBarry Smith   PetscErrorCode ierr;
22081824310SBarry Smith 
22181824310SBarry Smith   PetscFunctionBegin;
22281824310SBarry Smith   ierr = MatCreate(comm,A);CHKERRQ(ierr);
22381824310SBarry Smith   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
2245a11e1b2SBarry Smith   ierr = MatSetType(*A,MATSEQAIJCRL);CHKERRQ(ierr);
225d28bb7d2SJed Brown   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr);
22681824310SBarry Smith   PetscFunctionReturn(0);
22781824310SBarry Smith }
22881824310SBarry Smith 
2298cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJCRL(Mat A)
23081824310SBarry Smith {
23181824310SBarry Smith   PetscErrorCode ierr;
23281824310SBarry Smith 
23381824310SBarry Smith   PetscFunctionBegin;
23481824310SBarry Smith   ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
235511c6705SHong Zhang   ierr = MatConvert_SeqAIJ_SeqAIJCRL(A,MATSEQAIJCRL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
23681824310SBarry Smith   PetscFunctionReturn(0);
23781824310SBarry Smith }
238b2573a8aSBarry Smith 
239