xref: /petsc/src/mat/impls/aij/seq/crl/crl.c (revision d083f849a86f1f43e18d534ee43954e2786cb29a)
181824310SBarry Smith 
281824310SBarry Smith /*
381824310SBarry Smith   Defines a matrix-vector product for the MATSEQAIJCRL matrix class.
481824310SBarry Smith   This class is derived from the MATSEQAIJ class and retains the
581824310SBarry Smith   compressed row storage (aka Yale sparse matrix format) but augments
681824310SBarry Smith   it with a column oriented storage that is more efficient for
781824310SBarry Smith   matrix vector products on Vector machines.
881824310SBarry Smith 
981824310SBarry Smith   CRL stands for constant row length (that is the same number of columns
1081824310SBarry Smith   is kept (padded with zeros) for each row of the sparse matrix.
1181824310SBarry Smith */
12c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/crl/crl.h>
1381824310SBarry Smith 
145a11e1b2SBarry Smith PetscErrorCode MatDestroy_SeqAIJCRL(Mat A)
1581824310SBarry Smith {
1681824310SBarry Smith   PetscErrorCode ierr;
175a11e1b2SBarry Smith   Mat_AIJCRL     *aijcrl = (Mat_AIJCRL*) A->spptr;
1881824310SBarry Smith 
195a11e1b2SBarry Smith   /* Free everything in the Mat_AIJCRL data structure. */
20bf0cc555SLisandro Dalcin   if (aijcrl) {
215a11e1b2SBarry Smith     ierr = PetscFree2(aijcrl->acols,aijcrl->icols);CHKERRQ(ierr);
22bf0cc555SLisandro Dalcin   }
23c31cb41cSBarry Smith   ierr = PetscFree(A->spptr);CHKERRQ(ierr);
2481824310SBarry Smith   ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr);
254723c594SBarry Smith   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
2681824310SBarry Smith   PetscFunctionReturn(0);
2781824310SBarry Smith }
2881824310SBarry Smith 
295a11e1b2SBarry Smith PetscErrorCode MatDuplicate_AIJCRL(Mat A, MatDuplicateOption op, Mat *M)
3081824310SBarry Smith {
3181824310SBarry Smith   PetscFunctionBegin;
325a11e1b2SBarry Smith   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot duplicate AIJCRL matrices yet");
3381824310SBarry Smith   PetscFunctionReturn(0);
3481824310SBarry Smith }
3581824310SBarry Smith 
3696b95a6bSBarry Smith PetscErrorCode MatSeqAIJCRL_create_aijcrl(Mat A)
3781824310SBarry Smith {
3881824310SBarry Smith   Mat_SeqAIJ     *a      = (Mat_SeqAIJ*)(A)->data;
395a11e1b2SBarry Smith   Mat_AIJCRL     *aijcrl = (Mat_AIJCRL*) A->spptr;
40d0f46423SBarry Smith   PetscInt       m       = A->rmap->n; /* Number of rows in the matrix. */
4181824310SBarry Smith   PetscInt       *aj     = a->j; /* From the CSR representation; points to the beginning  of each row. */
4281824310SBarry Smith   PetscInt       i, j,rmax = a->rmax,*icols, *ilen = a->ilen;
43dd6ea824SBarry Smith   MatScalar      *aa = a->a;
44dd6ea824SBarry Smith   PetscScalar    *acols;
4581824310SBarry Smith   PetscErrorCode ierr;
4681824310SBarry Smith 
4781824310SBarry Smith   PetscFunctionBegin;
485a11e1b2SBarry Smith   aijcrl->nz   = a->nz;
495a11e1b2SBarry Smith   aijcrl->m    = A->rmap->n;
505a11e1b2SBarry Smith   aijcrl->rmax = rmax;
512205254eSKarl Rupp 
525a11e1b2SBarry Smith   ierr  = PetscFree2(aijcrl->acols,aijcrl->icols);CHKERRQ(ierr);
53dcca6d9dSJed Brown   ierr  = PetscMalloc2(rmax*m,&aijcrl->acols,rmax*m,&aijcrl->icols);CHKERRQ(ierr);
545a11e1b2SBarry Smith   acols = aijcrl->acols;
555a11e1b2SBarry Smith   icols = aijcrl->icols;
5681824310SBarry Smith   for (i=0; i<m; i++) {
5781824310SBarry Smith     for (j=0; j<ilen[i]; j++) {
5881824310SBarry Smith       acols[j*m+i] = *aa++;
5981824310SBarry Smith       icols[j*m+i] = *aj++;
6081824310SBarry Smith     }
6181824310SBarry Smith     for (; j<rmax; j++) { /* empty column entries */
6281824310SBarry Smith       acols[j*m+i] = 0.0;
6381824310SBarry Smith       icols[j*m+i] = (j) ? icols[(j-1)*m+i] : 0;  /* handle case where row is EMPTY */
6481824310SBarry Smith     }
6581824310SBarry Smith   }
66a2ea699eSBarry Smith   ierr = PetscInfo2(A,"Percentage of 0's introduced for vectorized multiply %g. Rmax= %D\n",1.0-((double)a->nz)/((double)(rmax*m)),rmax);CHKERRQ(ierr);
6781824310SBarry Smith   PetscFunctionReturn(0);
6881824310SBarry Smith }
6981824310SBarry Smith 
705a11e1b2SBarry Smith PetscErrorCode MatAssemblyEnd_SeqAIJCRL(Mat A, MatAssemblyType mode)
7181824310SBarry Smith {
7281824310SBarry Smith   PetscErrorCode ierr;
7381824310SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
7481824310SBarry Smith 
75c02b24c5SBarry Smith   PetscFunctionBegin;
7681824310SBarry Smith   a->inode.use = PETSC_FALSE;
772205254eSKarl Rupp 
784723c594SBarry Smith   ierr = MatAssemblyEnd_SeqAIJ(A,mode);CHKERRQ(ierr);
794723c594SBarry Smith   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
8081824310SBarry Smith 
8181824310SBarry Smith   /* Now calculate the permutation and grouping information. */
8296b95a6bSBarry Smith   ierr = MatSeqAIJCRL_create_aijcrl(A);CHKERRQ(ierr);
8381824310SBarry Smith   PetscFunctionReturn(0);
8481824310SBarry Smith }
8581824310SBarry Smith 
86c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/crl/ftn-kernels/fmultcrl.h>
875aeacdfdSBarry Smith 
884723c594SBarry Smith /*
895a11e1b2SBarry Smith     Shared by both sequential and parallel versions of CRL matrix: MATMPIAIJCRL and MATSEQAIJCRL
904723c594SBarry Smith     - the scatter is used only in the parallel version
914723c594SBarry Smith 
924723c594SBarry Smith */
935a11e1b2SBarry Smith PetscErrorCode MatMult_AIJCRL(Mat A,Vec xx,Vec yy)
9481824310SBarry Smith {
955a11e1b2SBarry Smith   Mat_AIJCRL        *aijcrl = (Mat_AIJCRL*) A->spptr;
965a11e1b2SBarry Smith   PetscInt          m       = aijcrl->m; /* Number of rows in the matrix. */
975a11e1b2SBarry Smith   PetscInt          rmax    = aijcrl->rmax,*icols = aijcrl->icols;
985a11e1b2SBarry Smith   PetscScalar       *acols  = aijcrl->acols;
9981824310SBarry Smith   PetscErrorCode    ierr;
100d9ca1df4SBarry Smith   PetscScalar       *y;
101d9ca1df4SBarry Smith   const PetscScalar *x;
10281824310SBarry Smith #if !defined(PETSC_USE_FORTRAN_KERNEL_MULTCRL)
10381824310SBarry Smith   PetscInt          i,j,ii;
10481824310SBarry Smith #endif
10581824310SBarry Smith 
10681824310SBarry Smith #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
10781824310SBarry Smith #pragma disjoint(*x,*y,*aa)
10881824310SBarry Smith #endif
10981824310SBarry Smith 
11081824310SBarry Smith   PetscFunctionBegin;
1115a11e1b2SBarry Smith   if (aijcrl->xscat) {
1125a11e1b2SBarry Smith     ierr = VecCopy(xx,aijcrl->xwork);CHKERRQ(ierr);
113c02b24c5SBarry Smith     /* get remote values needed for local part of multiply */
1145a11e1b2SBarry Smith     ierr = VecScatterBegin(aijcrl->xscat,xx,aijcrl->fwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1155a11e1b2SBarry Smith     ierr = VecScatterEnd(aijcrl->xscat,xx,aijcrl->fwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1165a11e1b2SBarry Smith     xx   = aijcrl->xwork;
1172205254eSKarl Rupp   }
118c02b24c5SBarry Smith 
119d9ca1df4SBarry Smith   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
12081824310SBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
12181824310SBarry Smith 
12281824310SBarry Smith #if defined(PETSC_USE_FORTRAN_KERNEL_MULTCRL)
12381824310SBarry Smith   fortranmultcrl_(&m,&rmax,x,y,icols,acols);
12481824310SBarry Smith #else
12581824310SBarry Smith 
1264be65460SBarry Smith   /* first column */
1272205254eSKarl Rupp   for (j=0; j<m; j++) y[j] = acols[j]*x[icols[j]];
1284be65460SBarry Smith 
1294be65460SBarry Smith   /* other columns */
130b409243cSBarry Smith #if defined(PETSC_HAVE_CRAY_VECTOR)
13181824310SBarry Smith #pragma _CRI preferstream
13281824310SBarry Smith #endif
1334be65460SBarry Smith   for (i=1; i<rmax; i++) {
13481824310SBarry Smith     ii = i*m;
135b409243cSBarry Smith #if defined(PETSC_HAVE_CRAY_VECTOR)
13681824310SBarry Smith #pragma _CRI prefervector
13781824310SBarry Smith #endif
1382205254eSKarl Rupp     for (j=0; j<m; j++) y[j] = y[j] + acols[ii+j]*x[icols[ii+j]];
13981824310SBarry Smith   }
140b409243cSBarry Smith #if defined(PETSC_HAVE_CRAY_VECTOR)
14181824310SBarry Smith #pragma _CRI ivdep
14281824310SBarry Smith #endif
14381824310SBarry Smith 
14481824310SBarry Smith #endif
1455a11e1b2SBarry Smith   ierr = PetscLogFlops(2.0*aijcrl->nz - m);CHKERRQ(ierr);
146d9ca1df4SBarry Smith   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
14781824310SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
14881824310SBarry Smith   PetscFunctionReturn(0);
14981824310SBarry Smith }
15081824310SBarry Smith 
15181824310SBarry Smith 
1525a11e1b2SBarry Smith /* MatConvert_SeqAIJ_SeqAIJCRL converts a SeqAIJ matrix into a
1535a11e1b2SBarry Smith  * SeqAIJCRL matrix.  This routine is called by the MatCreate_SeqAIJCRL()
15481824310SBarry Smith  * routine, but can also be used to convert an assembled SeqAIJ matrix
1555a11e1b2SBarry Smith  * into a SeqAIJCRL one. */
156cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJCRL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
15781824310SBarry Smith {
15881824310SBarry Smith   PetscErrorCode ierr;
15981824310SBarry Smith   Mat            B = *newmat;
1605a11e1b2SBarry Smith   Mat_AIJCRL     *aijcrl;
1614099cc6bSBarry Smith   PetscBool      sametype;
16281824310SBarry Smith 
16381824310SBarry Smith   PetscFunctionBegin;
16481824310SBarry Smith   if (reuse == MAT_INITIAL_MATRIX) {
16581824310SBarry Smith     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
16681824310SBarry Smith   }
1674099cc6bSBarry Smith   ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr);
1684099cc6bSBarry Smith   if (sametype) PetscFunctionReturn(0);
16981824310SBarry Smith 
170b00a9115SJed Brown   ierr     = PetscNewLog(B,&aijcrl);CHKERRQ(ierr);
1715a11e1b2SBarry Smith   B->spptr = (void*) aijcrl;
17281824310SBarry Smith 
17317667f90SBarry Smith   /* Set function pointers for methods that we inherit from AIJ but override. */
1745a11e1b2SBarry Smith   B->ops->duplicate   = MatDuplicate_AIJCRL;
1755a11e1b2SBarry Smith   B->ops->assemblyend = MatAssemblyEnd_SeqAIJCRL;
1765a11e1b2SBarry Smith   B->ops->destroy     = MatDestroy_SeqAIJCRL;
1775a11e1b2SBarry Smith   B->ops->mult        = MatMult_AIJCRL;
17881824310SBarry Smith 
17981824310SBarry Smith   /* If A has already been assembled, compute the permutation. */
1804eeb8337SBarry Smith   if (A->assembled) {
18196b95a6bSBarry Smith     ierr = MatSeqAIJCRL_create_aijcrl(B);CHKERRQ(ierr);
18281824310SBarry Smith   }
1835a11e1b2SBarry Smith   ierr    = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJCRL);CHKERRQ(ierr);
18481824310SBarry Smith   *newmat = B;
18581824310SBarry Smith   PetscFunctionReturn(0);
18681824310SBarry Smith }
18781824310SBarry Smith 
18881824310SBarry Smith /*@C
1895a11e1b2SBarry Smith    MatCreateSeqAIJCRL - Creates a sparse matrix of type SEQAIJCRL.
19081824310SBarry Smith    This type inherits from AIJ, but stores some additional
19181824310SBarry Smith    information that is used to allow better vectorization of
19281824310SBarry Smith    the matrix-vector product. At the cost of increased storage, the AIJ formatted
19381824310SBarry Smith    matrix can be copied to a format in which pieces of the matrix are
19481824310SBarry Smith    stored in ELLPACK format, allowing the vectorized matrix multiply
19581824310SBarry Smith    routine to use stride-1 memory accesses.  As with the AIJ type, it is
19681824310SBarry Smith    important to preallocate matrix storage in order to get good assembly
19781824310SBarry Smith    performance.
19881824310SBarry Smith 
199*d083f849SBarry Smith    Collective
20081824310SBarry Smith 
20181824310SBarry Smith    Input Parameters:
20281824310SBarry Smith +  comm - MPI communicator, set to PETSC_COMM_SELF
20381824310SBarry Smith .  m - number of rows
20481824310SBarry Smith .  n - number of columns
20581824310SBarry Smith .  nz - number of nonzeros per row (same for all rows)
20681824310SBarry Smith -  nnz - array containing the number of nonzeros in the various rows
2070298fd71SBarry Smith          (possibly different for each row) or NULL
20881824310SBarry Smith 
20981824310SBarry Smith    Output Parameter:
21081824310SBarry Smith .  A - the matrix
21181824310SBarry Smith 
21281824310SBarry Smith    Notes:
21381824310SBarry Smith    If nnz is given then nz is ignored
21481824310SBarry Smith 
21581824310SBarry Smith    Level: intermediate
21681824310SBarry Smith 
2175a11e1b2SBarry Smith .seealso: MatCreate(), MatCreateMPIAIJPERM(), MatSetValues()
21881824310SBarry Smith @*/
2197087cfbeSBarry Smith PetscErrorCode  MatCreateSeqAIJCRL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
22081824310SBarry Smith {
22181824310SBarry Smith   PetscErrorCode ierr;
22281824310SBarry Smith 
22381824310SBarry Smith   PetscFunctionBegin;
22481824310SBarry Smith   ierr = MatCreate(comm,A);CHKERRQ(ierr);
22581824310SBarry Smith   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
2265a11e1b2SBarry Smith   ierr = MatSetType(*A,MATSEQAIJCRL);CHKERRQ(ierr);
227d28bb7d2SJed Brown   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr);
22881824310SBarry Smith   PetscFunctionReturn(0);
22981824310SBarry Smith }
23081824310SBarry Smith 
2318cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJCRL(Mat A)
23281824310SBarry Smith {
23381824310SBarry Smith   PetscErrorCode ierr;
23481824310SBarry Smith 
23581824310SBarry Smith   PetscFunctionBegin;
23681824310SBarry Smith   ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
237511c6705SHong Zhang   ierr = MatConvert_SeqAIJ_SeqAIJCRL(A,MATSEQAIJCRL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
23881824310SBarry Smith   PetscFunctionReturn(0);
23981824310SBarry Smith }
240b2573a8aSBarry Smith 
24181824310SBarry Smith 
242