xref: /petsc/src/mat/impls/aij/seq/crl/crl.c (revision ae15b995b5732fffd2de5a75cf61ef7190c6fef1)
181824310SBarry Smith #define PETSCMAT_DLL
281824310SBarry Smith 
381824310SBarry Smith /*
481824310SBarry Smith   Defines a matrix-vector product for the MATSEQAIJCRL matrix class.
581824310SBarry Smith   This class is derived from the MATSEQAIJ class and retains the
681824310SBarry Smith   compressed row storage (aka Yale sparse matrix format) but augments
781824310SBarry Smith   it with a column oriented storage that is more efficient for
881824310SBarry Smith   matrix vector products on Vector machines.
981824310SBarry Smith 
1081824310SBarry Smith   CRL stands for constant row length (that is the same number of columns
1181824310SBarry Smith   is kept (padded with zeros) for each row of the sparse matrix.
1281824310SBarry Smith */
13c02b24c5SBarry Smith #include "src/mat/impls/aij/seq/crl/crl.h"
1481824310SBarry Smith 
1581824310SBarry Smith #undef __FUNCT__
1681824310SBarry Smith #define __FUNCT__ "MatDestroy_SeqCRL"
1781824310SBarry Smith PetscErrorCode MatDestroy_SeqCRL(Mat A)
1881824310SBarry Smith {
1981824310SBarry Smith   PetscErrorCode ierr;
20c02b24c5SBarry Smith   Mat_CRL        *crl = (Mat_CRL *) A->spptr;
2181824310SBarry Smith 
2281824310SBarry Smith   /* We are going to convert A back into a SEQAIJ matrix, since we are
23c02b24c5SBarry Smith    * eventually going to use MatDestroy() to destroy everything
2481824310SBarry Smith    * that is not specific to CRL.
2581824310SBarry Smith    * In preparation for this, reset the operations pointers in A to
2681824310SBarry Smith    * their SeqAIJ versions. */
27c02b24c5SBarry Smith   A->ops->assemblyend = crl->AssemblyEnd;
28c02b24c5SBarry Smith   A->ops->destroy     = crl->MatDestroy;
29c02b24c5SBarry Smith   A->ops->duplicate   = crl->MatDuplicate;
3081824310SBarry Smith 
31c02b24c5SBarry Smith   /* Free everything in the Mat_CRL data structure. */
3281824310SBarry Smith   if (crl->icols) {
3381824310SBarry Smith     ierr = PetscFree2(crl->acols,crl->icols);CHKERRQ(ierr);
3481824310SBarry Smith   }
35c02b24c5SBarry Smith   /* Free the Mat_CRL struct itself. */
3681824310SBarry Smith   ierr = PetscFree(crl);CHKERRQ(ierr);
3781824310SBarry Smith 
38c02b24c5SBarry Smith   /* Change the type of A back to SEQAIJ and use MatDestroy()
3981824310SBarry Smith    * to destroy everything that remains. */
4081824310SBarry Smith   ierr = PetscObjectChangeTypeName( (PetscObject)A, MATSEQAIJ);CHKERRQ(ierr);
4181824310SBarry Smith   /* Note that I don't call MatSetType().  I believe this is because that
4281824310SBarry Smith    * is only to be called when *building* a matrix. */
4381824310SBarry Smith   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
4481824310SBarry Smith   PetscFunctionReturn(0);
4581824310SBarry Smith }
4681824310SBarry Smith 
47c02b24c5SBarry Smith PetscErrorCode MatDuplicate_CRL(Mat A, MatDuplicateOption op, Mat *M)
4881824310SBarry Smith {
4981824310SBarry Smith   PetscErrorCode ierr;
50c02b24c5SBarry Smith   Mat_CRL        *crl = (Mat_CRL *) A->spptr;
5181824310SBarry Smith 
5281824310SBarry Smith   PetscFunctionBegin;
53c02b24c5SBarry Smith   ierr = (*crl->MatDuplicate)(A,op,M);CHKERRQ(ierr);
5481824310SBarry Smith   SETERRQ(PETSC_ERR_SUP,"Cannot duplicate CRL matrices yet");
5581824310SBarry Smith   PetscFunctionReturn(0);
5681824310SBarry Smith }
5781824310SBarry Smith 
5881824310SBarry Smith #undef __FUNCT__
5981824310SBarry Smith #define __FUNCT__ "SeqCRL_create_crl"
6081824310SBarry Smith PetscErrorCode SeqCRL_create_crl(Mat A)
6181824310SBarry Smith {
6281824310SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ *)(A)->data;
63c02b24c5SBarry Smith   Mat_CRL        *crl = (Mat_CRL*) A->spptr;
6481824310SBarry Smith   PetscInt       m = A->m;  /* Number of rows in the matrix. */
6581824310SBarry Smith   PetscInt       *aj = a->j;  /* From the CSR representation; points to the beginning  of each row. */
6681824310SBarry Smith   PetscInt       i, j,rmax = a->rmax,*icols, *ilen = a->ilen;
6781824310SBarry Smith   PetscScalar    *aa = a->a,*acols;
6881824310SBarry Smith   PetscErrorCode ierr;
6981824310SBarry Smith 
7081824310SBarry Smith   PetscFunctionBegin;
71c02b24c5SBarry Smith   crl->nz   = a->nz;
72c02b24c5SBarry Smith   crl->m    = A->m;
73c02b24c5SBarry Smith   crl->rmax = rmax;
7481824310SBarry Smith   ierr  = PetscMalloc2(rmax*m,PetscScalar,&crl->acols,rmax*m,PetscInt,&crl->icols);CHKERRQ(ierr);
7581824310SBarry Smith   acols = crl->acols;
7681824310SBarry Smith   icols = crl->icols;
7781824310SBarry Smith   for (i=0; i<m; i++) {
7881824310SBarry Smith     for (j=0; j<ilen[i]; j++) {
7981824310SBarry Smith       acols[j*m+i] = *aa++;
8081824310SBarry Smith       icols[j*m+i] = *aj++;
8181824310SBarry Smith     }
8281824310SBarry Smith     for (;j<rmax; j++) { /* empty column entries */
8381824310SBarry Smith       acols[j*m+i] = 0.0;
8481824310SBarry Smith       icols[j*m+i] = (j) ? icols[(j-1)*m+i] : 0;  /* handle case where row is EMPTY */
8581824310SBarry Smith     }
8681824310SBarry Smith   }
87*ae15b995SBarry Smith   ierr = PetscInfo2(A,"Percentage of 0's introduced for vectorized multiply %G. Rmax= %D\n",1.0-((double)a->nz)/((double)(rmax*m)),rmax);
8881824310SBarry Smith   PetscFunctionReturn(0);
8981824310SBarry Smith }
9081824310SBarry Smith 
9181824310SBarry Smith #undef __FUNCT__
9281824310SBarry Smith #define __FUNCT__ "MatAssemblyEnd_SeqCRL"
9381824310SBarry Smith PetscErrorCode MatAssemblyEnd_SeqCRL(Mat A, MatAssemblyType mode)
9481824310SBarry Smith {
9581824310SBarry Smith   PetscErrorCode ierr;
96c02b24c5SBarry Smith   Mat_CRL        *crl = (Mat_CRL*) A->spptr;
9781824310SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
9881824310SBarry Smith 
99c02b24c5SBarry Smith   PetscFunctionBegin;
10081824310SBarry Smith   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
10181824310SBarry Smith 
10281824310SBarry Smith   /* Since a MATSEQCRL matrix is really just a MATSEQAIJ with some
10381824310SBarry Smith    * extra information, call the AssemblyEnd routine for a MATSEQAIJ.
10481824310SBarry Smith    * I'm not sure if this is the best way to do this, but it avoids
10581824310SBarry Smith    * a lot of code duplication.
10681824310SBarry Smith    * I also note that currently MATSEQCRL doesn't know anything about
10781824310SBarry Smith    * the Mat_CompressedRow data structure that SeqAIJ now uses when there
10881824310SBarry Smith    * are many zero rows.  If the SeqAIJ assembly end routine decides to use
10981824310SBarry Smith    * this, this may break things.  (Don't know... haven't looked at it.) */
11081824310SBarry Smith   a->inode.use = PETSC_FALSE;
111c02b24c5SBarry Smith   (*crl->AssemblyEnd)(A, mode);
11281824310SBarry Smith 
11381824310SBarry Smith   /* Now calculate the permutation and grouping information. */
11481824310SBarry Smith   ierr = SeqCRL_create_crl(A);CHKERRQ(ierr);
11581824310SBarry Smith   PetscFunctionReturn(0);
11681824310SBarry Smith }
11781824310SBarry Smith 
1185aeacdfdSBarry Smith #include "src/inline/dot.h"
1195aeacdfdSBarry Smith 
12081824310SBarry Smith #undef __FUNCT__
121c02b24c5SBarry Smith #define __FUNCT__ "MatMult_CRL"
122c02b24c5SBarry Smith PetscErrorCode MatMult_CRL(Mat A,Vec xx,Vec yy)
12381824310SBarry Smith {
124c02b24c5SBarry Smith   Mat_CRL        *crl = (Mat_CRL*) A->spptr;
125c02b24c5SBarry Smith   PetscInt       m = crl->m;  /* Number of rows in the matrix. */
126c02b24c5SBarry Smith   PetscInt       rmax = crl->rmax,*icols = crl->icols;
12781824310SBarry Smith   PetscScalar    *acols = crl->acols;
12881824310SBarry Smith   PetscErrorCode ierr;
12981824310SBarry Smith   PetscScalar    *x,*y;
13081824310SBarry Smith #if !defined(PETSC_USE_FORTRAN_KERNEL_MULTCRL)
13181824310SBarry Smith   PetscInt       i,j,ii;
13281824310SBarry Smith #endif
13381824310SBarry Smith 
13481824310SBarry Smith 
13581824310SBarry Smith #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
13681824310SBarry Smith #pragma disjoint(*x,*y,*aa)
13781824310SBarry Smith #endif
13881824310SBarry Smith 
13981824310SBarry Smith   PetscFunctionBegin;
140c02b24c5SBarry Smith   if (crl->xscat) {
141c02b24c5SBarry Smith     ierr = VecCopy(xx,crl->xwork);CHKERRQ(ierr);
142c02b24c5SBarry Smith     /* get remote values needed for local part of multiply */
143c02b24c5SBarry Smith     ierr = VecScatterBegin(xx,crl->fwork,INSERT_VALUES,SCATTER_FORWARD,crl->xscat);CHKERRQ(ierr);
14411285404SBarry Smith     ierr = VecScatterEnd(xx,crl->fwork,INSERT_VALUES,SCATTER_FORWARD,crl->xscat);CHKERRQ(ierr);
145c02b24c5SBarry Smith     xx = crl->xwork;
146c02b24c5SBarry Smith   };
147c02b24c5SBarry Smith 
14881824310SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
14981824310SBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
15081824310SBarry Smith 
15181824310SBarry Smith #if defined(PETSC_USE_FORTRAN_KERNEL_MULTCRL)
15281824310SBarry Smith   fortranmultcrl_(&m,&rmax,x,y,icols,acols);
15381824310SBarry Smith #else
15481824310SBarry Smith 
1554be65460SBarry Smith   /* first column */
1564be65460SBarry Smith   for (j=0; j<m; j++) {
1574be65460SBarry Smith     y[j] = acols[j]*x[icols[j]];
1584be65460SBarry Smith   }
1594be65460SBarry Smith 
1604be65460SBarry Smith   /* other columns */
16181824310SBarry Smith #if defined(PETSC_HAVE_CRAYC)
16281824310SBarry Smith #pragma _CRI preferstream
16381824310SBarry Smith #endif
1644be65460SBarry Smith   for (i=1; i<rmax; i++) {
16581824310SBarry Smith     ii = i*m;
16681824310SBarry Smith #if defined(PETSC_HAVE_CRAYC)
16781824310SBarry Smith #pragma _CRI prefervector
16881824310SBarry Smith #endif
16981824310SBarry Smith     for (j=0; j<m; j++) {
17081824310SBarry Smith       y[j] = y[j] + acols[ii+j]*x[icols[ii+j]];
17181824310SBarry Smith     }
17281824310SBarry Smith   }
17381824310SBarry Smith #if defined(PETSC_HAVE_CRAYC)
17481824310SBarry Smith #pragma _CRI ivdep
17581824310SBarry Smith #endif
17681824310SBarry Smith 
17781824310SBarry Smith #endif
178483e0693SBarry Smith   ierr = PetscLogFlops(2*crl->nz - m);CHKERRQ(ierr);
17981824310SBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
18081824310SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
18181824310SBarry Smith   PetscFunctionReturn(0);
18281824310SBarry Smith }
18381824310SBarry Smith 
18481824310SBarry Smith 
18581824310SBarry Smith /* MatConvert_SeqAIJ_SeqCRL converts a SeqAIJ matrix into a
18681824310SBarry Smith  * SeqCRL matrix.  This routine is called by the MatCreate_SeqCRL()
18781824310SBarry Smith  * routine, but can also be used to convert an assembled SeqAIJ matrix
18881824310SBarry Smith  * into a SeqCRL one. */
18981824310SBarry Smith EXTERN_C_BEGIN
19081824310SBarry Smith #undef __FUNCT__
19181824310SBarry Smith #define __FUNCT__ "MatConvert_SeqAIJ_SeqCRL"
19281824310SBarry Smith PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqAIJ_SeqCRL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
19381824310SBarry Smith {
19481824310SBarry Smith   /* This routine is only called to convert to MATSEQCRL
19581824310SBarry Smith    * from MATSEQAIJ, so we can ignore 'MatType Type'. */
19681824310SBarry Smith   PetscErrorCode ierr;
19781824310SBarry Smith   Mat            B = *newmat;
198c02b24c5SBarry Smith   Mat_CRL        *crl;
19981824310SBarry Smith 
20081824310SBarry Smith   PetscFunctionBegin;
20181824310SBarry Smith   if (reuse == MAT_INITIAL_MATRIX) {
20281824310SBarry Smith     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
20381824310SBarry Smith   }
20481824310SBarry Smith 
205c02b24c5SBarry Smith   ierr = PetscNew(Mat_CRL,&crl);CHKERRQ(ierr);
20681824310SBarry Smith   B->spptr = (void *) crl;
20781824310SBarry Smith 
20881824310SBarry Smith   /* Save a pointer to the original SeqAIJ assembly end routine, because we
20981824310SBarry Smith    * will want to use it later in the CRL assembly end routine.
21081824310SBarry Smith    * Also, save a pointer to the original SeqAIJ Destroy routine, because we
21181824310SBarry Smith    * will want to use it in the CRL destroy routine. */
212c02b24c5SBarry Smith   crl->AssemblyEnd  = A->ops->assemblyend;
213c02b24c5SBarry Smith   crl->MatDestroy   = A->ops->destroy;
214c02b24c5SBarry Smith   crl->MatDuplicate = A->ops->duplicate;
21581824310SBarry Smith 
21681824310SBarry Smith   /* Set function pointers for methods that we inherit from AIJ but
21781824310SBarry Smith    * override. */
218c02b24c5SBarry Smith   B->ops->duplicate   = MatDuplicate_CRL;
21981824310SBarry Smith   B->ops->assemblyend = MatAssemblyEnd_SeqCRL;
22081824310SBarry Smith   B->ops->destroy     = MatDestroy_SeqCRL;
221c02b24c5SBarry Smith   B->ops->mult        = MatMult_CRL;
22281824310SBarry Smith 
22381824310SBarry Smith   /* If A has already been assembled, compute the permutation. */
22481824310SBarry Smith   if (A->assembled == PETSC_TRUE) {
22581824310SBarry Smith     ierr = SeqCRL_create_crl(B);CHKERRQ(ierr);
22681824310SBarry Smith   }
22781824310SBarry Smith   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQCRL);CHKERRQ(ierr);
22881824310SBarry Smith   *newmat = B;
22981824310SBarry Smith   PetscFunctionReturn(0);
23081824310SBarry Smith }
23181824310SBarry Smith EXTERN_C_END
23281824310SBarry Smith 
23381824310SBarry Smith 
23481824310SBarry Smith #undef __FUNCT__
23581824310SBarry Smith #define __FUNCT__ "MatCreateSeqCRL"
23681824310SBarry Smith /*@C
23781824310SBarry Smith    MatCreateSeqCRL - Creates a sparse matrix of type SEQCRL.
23881824310SBarry Smith    This type inherits from AIJ, but stores some additional
23981824310SBarry Smith    information that is used to allow better vectorization of
24081824310SBarry Smith    the matrix-vector product. At the cost of increased storage, the AIJ formatted
24181824310SBarry Smith    matrix can be copied to a format in which pieces of the matrix are
24281824310SBarry Smith    stored in ELLPACK format, allowing the vectorized matrix multiply
24381824310SBarry Smith    routine to use stride-1 memory accesses.  As with the AIJ type, it is
24481824310SBarry Smith    important to preallocate matrix storage in order to get good assembly
24581824310SBarry Smith    performance.
24681824310SBarry Smith 
24781824310SBarry Smith    Collective on MPI_Comm
24881824310SBarry Smith 
24981824310SBarry Smith    Input Parameters:
25081824310SBarry Smith +  comm - MPI communicator, set to PETSC_COMM_SELF
25181824310SBarry Smith .  m - number of rows
25281824310SBarry Smith .  n - number of columns
25381824310SBarry Smith .  nz - number of nonzeros per row (same for all rows)
25481824310SBarry Smith -  nnz - array containing the number of nonzeros in the various rows
25581824310SBarry Smith          (possibly different for each row) or PETSC_NULL
25681824310SBarry Smith 
25781824310SBarry Smith    Output Parameter:
25881824310SBarry Smith .  A - the matrix
25981824310SBarry Smith 
26081824310SBarry Smith    Notes:
26181824310SBarry Smith    If nnz is given then nz is ignored
26281824310SBarry Smith 
26381824310SBarry Smith    Level: intermediate
26481824310SBarry Smith 
26581824310SBarry Smith .keywords: matrix, cray, sparse, parallel
26681824310SBarry Smith 
26781824310SBarry Smith .seealso: MatCreate(), MatCreateMPICSRPERM(), MatSetValues()
26881824310SBarry Smith @*/
26981824310SBarry Smith PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqCRL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
27081824310SBarry Smith {
27181824310SBarry Smith   PetscErrorCode ierr;
27281824310SBarry Smith 
27381824310SBarry Smith   PetscFunctionBegin;
27481824310SBarry Smith   ierr = MatCreate(comm,A);CHKERRQ(ierr);
27581824310SBarry Smith   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
27681824310SBarry Smith   ierr = MatSetType(*A,MATSEQCRL);CHKERRQ(ierr);
27781824310SBarry Smith   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,(PetscInt*)nnz);CHKERRQ(ierr);
27881824310SBarry Smith   PetscFunctionReturn(0);
27981824310SBarry Smith }
28081824310SBarry Smith 
28181824310SBarry Smith 
28281824310SBarry Smith EXTERN_C_BEGIN
28381824310SBarry Smith #undef __FUNCT__
28481824310SBarry Smith #define __FUNCT__ "MatCreate_SeqCRL"
28581824310SBarry Smith PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqCRL(Mat A)
28681824310SBarry Smith {
28781824310SBarry Smith   PetscErrorCode ierr;
28881824310SBarry Smith 
28981824310SBarry Smith   PetscFunctionBegin;
29081824310SBarry Smith   /* Change the type name before calling MatSetType() to force proper construction of SeqAIJ
29181824310SBarry Smith      and MATSEQCRL types. */
29281824310SBarry Smith   ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQCRL);CHKERRQ(ierr);
29381824310SBarry Smith   ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
29481824310SBarry Smith   ierr = MatConvert_SeqAIJ_SeqCRL(A,MATSEQCRL,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
29581824310SBarry Smith   PetscFunctionReturn(0);
29681824310SBarry Smith }
29781824310SBarry Smith EXTERN_C_END
29881824310SBarry Smith 
299