xref: /petsc/src/mat/impls/aij/seq/crl/crl.c (revision 5aeacdfd97e55118231b31a928bcc6a301462628)
181824310SBarry Smith #define PETSCMAT_DLL
281824310SBarry Smith 
381824310SBarry Smith /*
481824310SBarry Smith   Defines a matrix-vector product for the MATSEQAIJCRL matrix class.
581824310SBarry Smith   This class is derived from the MATSEQAIJ class and retains the
681824310SBarry Smith   compressed row storage (aka Yale sparse matrix format) but augments
781824310SBarry Smith   it with a column oriented storage that is more efficient for
881824310SBarry Smith   matrix vector products on Vector machines.
981824310SBarry Smith 
1081824310SBarry Smith   CRL stands for constant row length (that is the same number of columns
1181824310SBarry Smith   is kept (padded with zeros) for each row of the sparse matrix.
1281824310SBarry Smith */
1381824310SBarry Smith 
1481824310SBarry Smith #include "src/mat/impls/aij/seq/aij.h"
1581824310SBarry Smith 
1681824310SBarry Smith typedef struct {
1781824310SBarry Smith   PetscInt    ncols;    /* number of columns in each row */
1881824310SBarry Smith   PetscInt    *icols;   /* columns of nonzeros, stored one column at a time */
1981824310SBarry Smith   PetscScalar *acols;   /* values of nonzeros, stored as icols */
2081824310SBarry Smith 
2181824310SBarry Smith   /* We need to keep a pointer to MatAssemblyEnd_SeqAIJ because we
2281824310SBarry Smith    * actually want to call this function from within the
2381824310SBarry Smith    * MatAssemblyEnd_SeqCRL function.  Similarly, we also need
2481824310SBarry Smith    * MatDestroy_SeqAIJ and MatDuplicate_SeqAIJ. */
2581824310SBarry Smith   PetscErrorCode (*AssemblyEnd_SeqAIJ)(Mat,MatAssemblyType);
2681824310SBarry Smith   PetscErrorCode (*MatDestroy_SeqAIJ)(Mat);
2781824310SBarry Smith   PetscErrorCode (*MatDuplicate_SeqAIJ)(Mat,MatDuplicateOption,Mat*);
2881824310SBarry Smith } Mat_SeqCRL;
2981824310SBarry Smith 
3081824310SBarry Smith #undef __FUNCT__
3181824310SBarry Smith #define __FUNCT__ "MatDestroy_SeqCRL"
3281824310SBarry Smith PetscErrorCode MatDestroy_SeqCRL(Mat A)
3381824310SBarry Smith {
3481824310SBarry Smith   PetscErrorCode ierr;
3581824310SBarry Smith   Mat_SeqCRL     *crl = (Mat_SeqCRL *) A->spptr;
3681824310SBarry Smith 
3781824310SBarry Smith   /* We are going to convert A back into a SEQAIJ matrix, since we are
3881824310SBarry Smith    * eventually going to use MatDestroy_SeqAIJ() to destroy everything
3981824310SBarry Smith    * that is not specific to CRL.
4081824310SBarry Smith    * In preparation for this, reset the operations pointers in A to
4181824310SBarry Smith    * their SeqAIJ versions. */
4281824310SBarry Smith   A->ops->assemblyend = crl->AssemblyEnd_SeqAIJ;
4381824310SBarry Smith   A->ops->destroy     = crl->MatDestroy_SeqAIJ;
4481824310SBarry Smith   A->ops->duplicate   = crl->MatDuplicate_SeqAIJ;
4581824310SBarry Smith 
4681824310SBarry Smith   /* Free everything in the Mat_SeqCRL data structure. */
4781824310SBarry Smith   if (crl->icols) {
4881824310SBarry Smith     ierr = PetscFree2(crl->acols,crl->icols);CHKERRQ(ierr);
4981824310SBarry Smith   }
5081824310SBarry Smith   /* Free the Mat_SeqCRL struct itself. */
5181824310SBarry Smith   ierr = PetscFree(crl);CHKERRQ(ierr);
5281824310SBarry Smith 
5381824310SBarry Smith   /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
5481824310SBarry Smith    * to destroy everything that remains. */
5581824310SBarry Smith   ierr = PetscObjectChangeTypeName( (PetscObject)A, MATSEQAIJ);CHKERRQ(ierr);
5681824310SBarry Smith   /* Note that I don't call MatSetType().  I believe this is because that
5781824310SBarry Smith    * is only to be called when *building* a matrix. */
5881824310SBarry Smith   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
5981824310SBarry Smith   PetscFunctionReturn(0);
6081824310SBarry Smith }
6181824310SBarry Smith 
6281824310SBarry Smith PetscErrorCode MatDuplicate_SeqCRL(Mat A, MatDuplicateOption op, Mat *M)
6381824310SBarry Smith {
6481824310SBarry Smith   PetscErrorCode ierr;
6581824310SBarry Smith   Mat_SeqCRL     *crl = (Mat_SeqCRL *) A->spptr;
6681824310SBarry Smith 
6781824310SBarry Smith   PetscFunctionBegin;
6881824310SBarry Smith   ierr = (*crl->MatDuplicate_SeqAIJ)(A,op,M);CHKERRQ(ierr);
6981824310SBarry Smith   SETERRQ(PETSC_ERR_SUP,"Cannot duplicate CRL matrices yet");
7081824310SBarry Smith   PetscFunctionReturn(0);
7181824310SBarry Smith }
7281824310SBarry Smith 
7381824310SBarry Smith #undef __FUNCT__
7481824310SBarry Smith #define __FUNCT__ "SeqCRL_create_crl"
7581824310SBarry Smith PetscErrorCode SeqCRL_create_crl(Mat A)
7681824310SBarry Smith {
7781824310SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ *)(A)->data;
7881824310SBarry Smith   Mat_SeqCRL     *crl = (Mat_SeqCRL*) A->spptr;
7981824310SBarry Smith   PetscInt       m = A->m;  /* Number of rows in the matrix. */
8081824310SBarry Smith   PetscInt       *aj = a->j;  /* From the CSR representation; points to the beginning  of each row. */
8181824310SBarry Smith   PetscInt       i, j,rmax = a->rmax,*icols, *ilen = a->ilen;
8281824310SBarry Smith   PetscScalar    *aa = a->a,*acols;
8381824310SBarry Smith   PetscErrorCode ierr;
8481824310SBarry Smith 
8581824310SBarry Smith   PetscFunctionBegin;
8681824310SBarry Smith   ierr  = PetscMalloc2(rmax*m,PetscScalar,&crl->acols,rmax*m,PetscInt,&crl->icols);CHKERRQ(ierr);
8781824310SBarry Smith   acols = crl->acols;
8881824310SBarry Smith   icols = crl->icols;
8981824310SBarry Smith   for (i=0; i<m; i++) {
9081824310SBarry Smith     for (j=0; j<ilen[i]; j++) {
9181824310SBarry Smith       acols[j*m+i] = *aa++;
9281824310SBarry Smith       icols[j*m+i] = *aj++;
9381824310SBarry Smith     }
9481824310SBarry Smith     for (;j<rmax; j++) { /* empty column entries */
9581824310SBarry Smith       acols[j*m+i] = 0.0;
9681824310SBarry Smith       icols[j*m+i] = (j) ? icols[(j-1)*m+i] : 0;  /* handle case where row is EMPTY */
9781824310SBarry Smith     }
9881824310SBarry Smith   }
9981824310SBarry Smith   ierr = PetscLogInfo((A,"SeqCRL_create_crl: Percentage of 0's introduced for vectorized multiply %g\n",1.0-((double)a->nz)/((double)(rmax*m))));
10081824310SBarry Smith   PetscFunctionReturn(0);
10181824310SBarry Smith }
10281824310SBarry Smith 
10381824310SBarry Smith #undef __FUNCT__
10481824310SBarry Smith #define __FUNCT__ "MatAssemblyEnd_SeqCRL"
10581824310SBarry Smith PetscErrorCode MatAssemblyEnd_SeqCRL(Mat A, MatAssemblyType mode)
10681824310SBarry Smith {
10781824310SBarry Smith   PetscErrorCode ierr;
10881824310SBarry Smith   Mat_SeqCRL     *crl = (Mat_SeqCRL*) A->spptr;
10981824310SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
11081824310SBarry Smith 
11181824310SBarry Smith   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
11281824310SBarry Smith 
11381824310SBarry Smith   /* Since a MATSEQCRL matrix is really just a MATSEQAIJ with some
11481824310SBarry Smith    * extra information, call the AssemblyEnd routine for a MATSEQAIJ.
11581824310SBarry Smith    * I'm not sure if this is the best way to do this, but it avoids
11681824310SBarry Smith    * a lot of code duplication.
11781824310SBarry Smith    * I also note that currently MATSEQCRL doesn't know anything about
11881824310SBarry Smith    * the Mat_CompressedRow data structure that SeqAIJ now uses when there
11981824310SBarry Smith    * are many zero rows.  If the SeqAIJ assembly end routine decides to use
12081824310SBarry Smith    * this, this may break things.  (Don't know... haven't looked at it.) */
12181824310SBarry Smith   a->inode.use = PETSC_FALSE;
12281824310SBarry Smith   (*crl->AssemblyEnd_SeqAIJ)(A, mode);
12381824310SBarry Smith 
12481824310SBarry Smith   /* Now calculate the permutation and grouping information. */
12581824310SBarry Smith   ierr = SeqCRL_create_crl(A);CHKERRQ(ierr);
12681824310SBarry Smith   PetscFunctionReturn(0);
12781824310SBarry Smith }
12881824310SBarry Smith 
129*5aeacdfdSBarry Smith #include "src/inline/dot.h"
130*5aeacdfdSBarry Smith 
13181824310SBarry Smith #undef __FUNCT__
13281824310SBarry Smith #define __FUNCT__ "MatMult_SeqCRL"
13381824310SBarry Smith PetscErrorCode MatMult_SeqCRL(Mat A,Vec xx,Vec yy)
13481824310SBarry Smith {
13581824310SBarry Smith   Mat_SeqAIJ     *a = (Mat_SeqAIJ *)(A)->data;
13681824310SBarry Smith   Mat_SeqCRL     *crl = (Mat_SeqCRL*) A->spptr;
13781824310SBarry Smith   PetscInt       m = A->m;  /* Number of rows in the matrix. */
13881824310SBarry Smith   PetscInt       rmax = a->rmax,*icols = crl->icols;
13981824310SBarry Smith   PetscScalar    *acols = crl->acols;
14081824310SBarry Smith   PetscErrorCode ierr;
14181824310SBarry Smith   PetscScalar    *x,*y;
14281824310SBarry Smith #if !defined(PETSC_USE_FORTRAN_KERNEL_MULTCRL)
14381824310SBarry Smith   PetscInt       i,j,ii;
14481824310SBarry Smith #endif
14581824310SBarry Smith 
14681824310SBarry Smith 
14781824310SBarry Smith #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
14881824310SBarry Smith #pragma disjoint(*x,*y,*aa)
14981824310SBarry Smith #endif
15081824310SBarry Smith 
15181824310SBarry Smith   PetscFunctionBegin;
15281824310SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
15381824310SBarry Smith   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
15481824310SBarry Smith 
15581824310SBarry Smith #if defined(PETSC_USE_FORTRAN_KERNEL_MULTCRL)
15681824310SBarry Smith   fortranmultcrl_(&m,&rmax,x,y,icols,acols);
15781824310SBarry Smith #else
15881824310SBarry Smith 
1594be65460SBarry Smith   /* first column */
1604be65460SBarry Smith   for (j=0; j<m; j++) {
1614be65460SBarry Smith     y[j] = acols[j]*x[icols[j]];
1624be65460SBarry Smith   }
1634be65460SBarry Smith 
1644be65460SBarry Smith   /* other columns */
16581824310SBarry Smith #if defined(PETSC_HAVE_CRAYC)
16681824310SBarry Smith #pragma _CRI preferstream
16781824310SBarry Smith #endif
1684be65460SBarry Smith   for (i=1; i<rmax; i++) {
16981824310SBarry Smith     ii = i*m;
17081824310SBarry Smith #if defined(PETSC_HAVE_CRAYC)
17181824310SBarry Smith #pragma _CRI prefervector
17281824310SBarry Smith #endif
17381824310SBarry Smith     for (j=0; j<m; j++) {
17481824310SBarry Smith       y[j] = y[j] + acols[ii+j]*x[icols[ii+j]];
17581824310SBarry Smith     }
17681824310SBarry Smith   }
17781824310SBarry Smith #if defined(PETSC_HAVE_CRAYC)
17881824310SBarry Smith #pragma _CRI ivdep
17981824310SBarry Smith #endif
18081824310SBarry Smith 
18181824310SBarry Smith #endif
18281824310SBarry Smith   ierr = PetscLogFlops(2*a->nz - m);CHKERRQ(ierr);
18381824310SBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
18481824310SBarry Smith   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
18581824310SBarry Smith   PetscFunctionReturn(0);
18681824310SBarry Smith }
18781824310SBarry Smith 
18881824310SBarry Smith 
18981824310SBarry Smith /* MatConvert_SeqAIJ_SeqCRL converts a SeqAIJ matrix into a
19081824310SBarry Smith  * SeqCRL matrix.  This routine is called by the MatCreate_SeqCRL()
19181824310SBarry Smith  * routine, but can also be used to convert an assembled SeqAIJ matrix
19281824310SBarry Smith  * into a SeqCRL one. */
19381824310SBarry Smith EXTERN_C_BEGIN
19481824310SBarry Smith #undef __FUNCT__
19581824310SBarry Smith #define __FUNCT__ "MatConvert_SeqAIJ_SeqCRL"
19681824310SBarry Smith PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqAIJ_SeqCRL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
19781824310SBarry Smith {
19881824310SBarry Smith   /* This routine is only called to convert to MATSEQCRL
19981824310SBarry Smith    * from MATSEQAIJ, so we can ignore 'MatType Type'. */
20081824310SBarry Smith   PetscErrorCode ierr;
20181824310SBarry Smith   Mat            B = *newmat;
20281824310SBarry Smith   Mat_SeqCRL     *crl;
20381824310SBarry Smith 
20481824310SBarry Smith   PetscFunctionBegin;
20581824310SBarry Smith   if (reuse == MAT_INITIAL_MATRIX) {
20681824310SBarry Smith     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
20781824310SBarry Smith   }
20881824310SBarry Smith 
20981824310SBarry Smith   ierr = PetscNew(Mat_SeqCRL,&crl);CHKERRQ(ierr);
21081824310SBarry Smith   B->spptr = (void *) crl;
21181824310SBarry Smith 
21281824310SBarry Smith   /* Save a pointer to the original SeqAIJ assembly end routine, because we
21381824310SBarry Smith    * will want to use it later in the CRL assembly end routine.
21481824310SBarry Smith    * Also, save a pointer to the original SeqAIJ Destroy routine, because we
21581824310SBarry Smith    * will want to use it in the CRL destroy routine. */
21681824310SBarry Smith   crl->AssemblyEnd_SeqAIJ  = A->ops->assemblyend;
21781824310SBarry Smith   crl->MatDestroy_SeqAIJ   = A->ops->destroy;
21881824310SBarry Smith   crl->MatDuplicate_SeqAIJ = A->ops->duplicate;
21981824310SBarry Smith 
22081824310SBarry Smith   /* Set function pointers for methods that we inherit from AIJ but
22181824310SBarry Smith    * override. */
22281824310SBarry Smith   B->ops->duplicate   = MatDuplicate_SeqCRL;
22381824310SBarry Smith   B->ops->assemblyend = MatAssemblyEnd_SeqCRL;
22481824310SBarry Smith   B->ops->destroy     = MatDestroy_SeqCRL;
22581824310SBarry Smith   B->ops->mult        = MatMult_SeqCRL;
22681824310SBarry Smith 
22781824310SBarry Smith   /* If A has already been assembled, compute the permutation. */
22881824310SBarry Smith   if (A->assembled == PETSC_TRUE) {
22981824310SBarry Smith     ierr = SeqCRL_create_crl(B);CHKERRQ(ierr);
23081824310SBarry Smith   }
23181824310SBarry Smith   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQCRL);CHKERRQ(ierr);
23281824310SBarry Smith   *newmat = B;
23381824310SBarry Smith   PetscFunctionReturn(0);
23481824310SBarry Smith }
23581824310SBarry Smith EXTERN_C_END
23681824310SBarry Smith 
23781824310SBarry Smith 
23881824310SBarry Smith #undef __FUNCT__
23981824310SBarry Smith #define __FUNCT__ "MatCreateSeqCRL"
24081824310SBarry Smith /*@C
24181824310SBarry Smith    MatCreateSeqCRL - Creates a sparse matrix of type SEQCRL.
24281824310SBarry Smith    This type inherits from AIJ, but stores some additional
24381824310SBarry Smith    information that is used to allow better vectorization of
24481824310SBarry Smith    the matrix-vector product. At the cost of increased storage, the AIJ formatted
24581824310SBarry Smith    matrix can be copied to a format in which pieces of the matrix are
24681824310SBarry Smith    stored in ELLPACK format, allowing the vectorized matrix multiply
24781824310SBarry Smith    routine to use stride-1 memory accesses.  As with the AIJ type, it is
24881824310SBarry Smith    important to preallocate matrix storage in order to get good assembly
24981824310SBarry Smith    performance.
25081824310SBarry Smith 
25181824310SBarry Smith    Collective on MPI_Comm
25281824310SBarry Smith 
25381824310SBarry Smith    Input Parameters:
25481824310SBarry Smith +  comm - MPI communicator, set to PETSC_COMM_SELF
25581824310SBarry Smith .  m - number of rows
25681824310SBarry Smith .  n - number of columns
25781824310SBarry Smith .  nz - number of nonzeros per row (same for all rows)
25881824310SBarry Smith -  nnz - array containing the number of nonzeros in the various rows
25981824310SBarry Smith          (possibly different for each row) or PETSC_NULL
26081824310SBarry Smith 
26181824310SBarry Smith    Output Parameter:
26281824310SBarry Smith .  A - the matrix
26381824310SBarry Smith 
26481824310SBarry Smith    Notes:
26581824310SBarry Smith    If nnz is given then nz is ignored
26681824310SBarry Smith 
26781824310SBarry Smith    Level: intermediate
26881824310SBarry Smith 
26981824310SBarry Smith .keywords: matrix, cray, sparse, parallel
27081824310SBarry Smith 
27181824310SBarry Smith .seealso: MatCreate(), MatCreateMPICSRPERM(), MatSetValues()
27281824310SBarry Smith @*/
27381824310SBarry Smith PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqCRL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
27481824310SBarry Smith {
27581824310SBarry Smith   PetscErrorCode ierr;
27681824310SBarry Smith 
27781824310SBarry Smith   PetscFunctionBegin;
27881824310SBarry Smith   ierr = MatCreate(comm,A);CHKERRQ(ierr);
27981824310SBarry Smith   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
28081824310SBarry Smith   ierr = MatSetType(*A,MATSEQCRL);CHKERRQ(ierr);
28181824310SBarry Smith   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,(PetscInt*)nnz);CHKERRQ(ierr);
28281824310SBarry Smith   PetscFunctionReturn(0);
28381824310SBarry Smith }
28481824310SBarry Smith 
28581824310SBarry Smith 
28681824310SBarry Smith EXTERN_C_BEGIN
28781824310SBarry Smith #undef __FUNCT__
28881824310SBarry Smith #define __FUNCT__ "MatCreate_SeqCRL"
28981824310SBarry Smith PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqCRL(Mat A)
29081824310SBarry Smith {
29181824310SBarry Smith   PetscErrorCode ierr;
29281824310SBarry Smith 
29381824310SBarry Smith   PetscFunctionBegin;
29481824310SBarry Smith   /* Change the type name before calling MatSetType() to force proper construction of SeqAIJ
29581824310SBarry Smith      and MATSEQCRL types. */
29681824310SBarry Smith   ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQCRL);CHKERRQ(ierr);
29781824310SBarry Smith   ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
29881824310SBarry Smith   ierr = MatConvert_SeqAIJ_SeqCRL(A,MATSEQCRL,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
29981824310SBarry Smith   PetscFunctionReturn(0);
30081824310SBarry Smith }
30181824310SBarry Smith EXTERN_C_END
30281824310SBarry Smith 
303