xref: /petsc/src/mat/impls/aij/seq/crl/crl.c (revision 48a46eb9bd028bec07ec0f396b1a3abb43f14558)
181824310SBarry Smith 
281824310SBarry Smith /*
381824310SBarry Smith   Defines a matrix-vector product for the MATSEQAIJCRL matrix class.
481824310SBarry Smith   This class is derived from the MATSEQAIJ class and retains the
581824310SBarry Smith   compressed row storage (aka Yale sparse matrix format) but augments
681824310SBarry Smith   it with a column oriented storage that is more efficient for
781824310SBarry Smith   matrix vector products on Vector machines.
881824310SBarry Smith 
981824310SBarry Smith   CRL stands for constant row length (that is the same number of columns
1081824310SBarry Smith   is kept (padded with zeros) for each row of the sparse matrix.
1181824310SBarry Smith */
12c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/crl/crl.h>
1381824310SBarry Smith 
149371c9d4SSatish Balay PetscErrorCode MatDestroy_SeqAIJCRL(Mat A) {
155a11e1b2SBarry Smith   Mat_AIJCRL *aijcrl = (Mat_AIJCRL *)A->spptr;
1681824310SBarry Smith 
17362febeeSStefano Zampini   PetscFunctionBegin;
185a11e1b2SBarry Smith   /* Free everything in the Mat_AIJCRL data structure. */
191baa6e33SBarry Smith   if (aijcrl) PetscCall(PetscFree2(aijcrl->acols, aijcrl->icols));
209566063dSJacob Faibussowitsch   PetscCall(PetscFree(A->spptr));
219566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ));
229566063dSJacob Faibussowitsch   PetscCall(MatDestroy_SeqAIJ(A));
2381824310SBarry Smith   PetscFunctionReturn(0);
2481824310SBarry Smith }
2581824310SBarry Smith 
269371c9d4SSatish Balay PetscErrorCode MatDuplicate_AIJCRL(Mat A, MatDuplicateOption op, Mat *M) {
275a11e1b2SBarry Smith   SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot duplicate AIJCRL matrices yet");
2881824310SBarry Smith }
2981824310SBarry Smith 
309371c9d4SSatish Balay PetscErrorCode MatSeqAIJCRL_create_aijcrl(Mat A) {
3181824310SBarry Smith   Mat_SeqAIJ  *a      = (Mat_SeqAIJ *)(A)->data;
325a11e1b2SBarry Smith   Mat_AIJCRL  *aijcrl = (Mat_AIJCRL *)A->spptr;
33d0f46423SBarry Smith   PetscInt     m      = A->rmap->n; /* Number of rows in the matrix. */
3481824310SBarry Smith   PetscInt    *aj     = a->j;       /* From the CSR representation; points to the beginning  of each row. */
3581824310SBarry Smith   PetscInt     i, j, rmax = a->rmax, *icols, *ilen = a->ilen;
36dd6ea824SBarry Smith   MatScalar   *aa = a->a;
37dd6ea824SBarry Smith   PetscScalar *acols;
3881824310SBarry Smith 
3981824310SBarry Smith   PetscFunctionBegin;
405a11e1b2SBarry Smith   aijcrl->nz   = a->nz;
415a11e1b2SBarry Smith   aijcrl->m    = A->rmap->n;
425a11e1b2SBarry Smith   aijcrl->rmax = rmax;
432205254eSKarl Rupp 
449566063dSJacob Faibussowitsch   PetscCall(PetscFree2(aijcrl->acols, aijcrl->icols));
459566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(rmax * m, &aijcrl->acols, rmax * m, &aijcrl->icols));
465a11e1b2SBarry Smith   acols = aijcrl->acols;
475a11e1b2SBarry Smith   icols = aijcrl->icols;
4881824310SBarry Smith   for (i = 0; i < m; i++) {
4981824310SBarry Smith     for (j = 0; j < ilen[i]; j++) {
5081824310SBarry Smith       acols[j * m + i] = *aa++;
5181824310SBarry Smith       icols[j * m + i] = *aj++;
5281824310SBarry Smith     }
5381824310SBarry Smith     for (; j < rmax; j++) { /* empty column entries */
5481824310SBarry Smith       acols[j * m + i] = 0.0;
5581824310SBarry Smith       icols[j * m + i] = (j) ? icols[(j - 1) * m + i] : 0; /* handle case where row is EMPTY */
5681824310SBarry Smith     }
5781824310SBarry Smith   }
589566063dSJacob Faibussowitsch   PetscCall(PetscInfo(A, "Percentage of 0's introduced for vectorized multiply %g. Rmax= %" PetscInt_FMT "\n", 1.0 - ((double)a->nz) / ((double)(rmax * m)), rmax));
5981824310SBarry Smith   PetscFunctionReturn(0);
6081824310SBarry Smith }
6181824310SBarry Smith 
629371c9d4SSatish Balay PetscErrorCode MatAssemblyEnd_SeqAIJCRL(Mat A, MatAssemblyType mode) {
6381824310SBarry Smith   Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
6481824310SBarry Smith 
65c02b24c5SBarry Smith   PetscFunctionBegin;
6681824310SBarry Smith   a->inode.use = PETSC_FALSE;
672205254eSKarl Rupp 
689566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd_SeqAIJ(A, mode));
694723c594SBarry Smith   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
7081824310SBarry Smith 
7181824310SBarry Smith   /* Now calculate the permutation and grouping information. */
729566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJCRL_create_aijcrl(A));
7381824310SBarry Smith   PetscFunctionReturn(0);
7481824310SBarry Smith }
7581824310SBarry Smith 
76c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/crl/ftn-kernels/fmultcrl.h>
775aeacdfdSBarry Smith 
784723c594SBarry Smith /*
795a11e1b2SBarry Smith     Shared by both sequential and parallel versions of CRL matrix: MATMPIAIJCRL and MATSEQAIJCRL
804723c594SBarry Smith     - the scatter is used only in the parallel version
814723c594SBarry Smith 
824723c594SBarry Smith */
839371c9d4SSatish Balay PetscErrorCode MatMult_AIJCRL(Mat A, Vec xx, Vec yy) {
845a11e1b2SBarry Smith   Mat_AIJCRL        *aijcrl = (Mat_AIJCRL *)A->spptr;
855a11e1b2SBarry Smith   PetscInt           m      = aijcrl->m; /* Number of rows in the matrix. */
865a11e1b2SBarry Smith   PetscInt           rmax = aijcrl->rmax, *icols = aijcrl->icols;
875a11e1b2SBarry Smith   PetscScalar       *acols = aijcrl->acols;
88d9ca1df4SBarry Smith   PetscScalar       *y;
89d9ca1df4SBarry Smith   const PetscScalar *x;
9081824310SBarry Smith #if !defined(PETSC_USE_FORTRAN_KERNEL_MULTCRL)
9181824310SBarry Smith   PetscInt i, j, ii;
9281824310SBarry Smith #endif
9381824310SBarry Smith 
9481824310SBarry Smith #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
9581824310SBarry Smith #pragma disjoint(*x, *y, *aa)
9681824310SBarry Smith #endif
9781824310SBarry Smith 
9881824310SBarry Smith   PetscFunctionBegin;
995a11e1b2SBarry Smith   if (aijcrl->xscat) {
1009566063dSJacob Faibussowitsch     PetscCall(VecCopy(xx, aijcrl->xwork));
101c02b24c5SBarry Smith     /* get remote values needed for local part of multiply */
1029566063dSJacob Faibussowitsch     PetscCall(VecScatterBegin(aijcrl->xscat, xx, aijcrl->fwork, INSERT_VALUES, SCATTER_FORWARD));
1039566063dSJacob Faibussowitsch     PetscCall(VecScatterEnd(aijcrl->xscat, xx, aijcrl->fwork, INSERT_VALUES, SCATTER_FORWARD));
1045a11e1b2SBarry Smith     xx = aijcrl->xwork;
1052205254eSKarl Rupp   }
106c02b24c5SBarry Smith 
1079566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
1089566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
10981824310SBarry Smith 
11081824310SBarry Smith #if defined(PETSC_USE_FORTRAN_KERNEL_MULTCRL)
11181824310SBarry Smith   fortranmultcrl_(&m, &rmax, x, y, icols, acols);
11281824310SBarry Smith #else
11381824310SBarry Smith 
1144be65460SBarry Smith   /* first column */
1152205254eSKarl Rupp   for (j = 0; j < m; j++) y[j] = acols[j] * x[icols[j]];
1164be65460SBarry Smith 
1174be65460SBarry Smith     /* other columns */
118b409243cSBarry Smith #if defined(PETSC_HAVE_CRAY_VECTOR)
11981824310SBarry Smith #pragma _CRI preferstream
12081824310SBarry Smith #endif
1214be65460SBarry Smith   for (i = 1; i < rmax; i++) {
12281824310SBarry Smith     ii = i * m;
123b409243cSBarry Smith #if defined(PETSC_HAVE_CRAY_VECTOR)
12481824310SBarry Smith #pragma _CRI prefervector
12581824310SBarry Smith #endif
1262205254eSKarl Rupp     for (j = 0; j < m; j++) y[j] = y[j] + acols[ii + j] * x[icols[ii + j]];
12781824310SBarry Smith   }
128b409243cSBarry Smith #if defined(PETSC_HAVE_CRAY_VECTOR)
12981824310SBarry Smith #pragma _CRI ivdep
13081824310SBarry Smith #endif
13181824310SBarry Smith 
13281824310SBarry Smith #endif
1339566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * aijcrl->nz - m));
1349566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
1359566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
13681824310SBarry Smith   PetscFunctionReturn(0);
13781824310SBarry Smith }
13881824310SBarry Smith 
1395a11e1b2SBarry Smith /* MatConvert_SeqAIJ_SeqAIJCRL converts a SeqAIJ matrix into a
1405a11e1b2SBarry Smith  * SeqAIJCRL matrix.  This routine is called by the MatCreate_SeqAIJCRL()
14181824310SBarry Smith  * routine, but can also be used to convert an assembled SeqAIJ matrix
1425a11e1b2SBarry Smith  * into a SeqAIJCRL one. */
1439371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJCRL(Mat A, MatType type, MatReuse reuse, Mat *newmat) {
14481824310SBarry Smith   Mat         B = *newmat;
1455a11e1b2SBarry Smith   Mat_AIJCRL *aijcrl;
1464099cc6bSBarry Smith   PetscBool   sametype;
14781824310SBarry Smith 
14881824310SBarry Smith   PetscFunctionBegin;
149*48a46eb9SPierre Jolivet   if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B));
1509566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)A, type, &sametype));
1514099cc6bSBarry Smith   if (sametype) PetscFunctionReturn(0);
15281824310SBarry Smith 
1539566063dSJacob Faibussowitsch   PetscCall(PetscNewLog(B, &aijcrl));
1545a11e1b2SBarry Smith   B->spptr = (void *)aijcrl;
15581824310SBarry Smith 
15617667f90SBarry Smith   /* Set function pointers for methods that we inherit from AIJ but override. */
1575a11e1b2SBarry Smith   B->ops->duplicate   = MatDuplicate_AIJCRL;
1585a11e1b2SBarry Smith   B->ops->assemblyend = MatAssemblyEnd_SeqAIJCRL;
1595a11e1b2SBarry Smith   B->ops->destroy     = MatDestroy_SeqAIJCRL;
1605a11e1b2SBarry Smith   B->ops->mult        = MatMult_AIJCRL;
16181824310SBarry Smith 
16281824310SBarry Smith   /* If A has already been assembled, compute the permutation. */
1631baa6e33SBarry Smith   if (A->assembled) PetscCall(MatSeqAIJCRL_create_aijcrl(B));
1649566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJCRL));
16581824310SBarry Smith   *newmat = B;
16681824310SBarry Smith   PetscFunctionReturn(0);
16781824310SBarry Smith }
16881824310SBarry Smith 
16981824310SBarry Smith /*@C
1705a11e1b2SBarry Smith    MatCreateSeqAIJCRL - Creates a sparse matrix of type SEQAIJCRL.
17181824310SBarry Smith    This type inherits from AIJ, but stores some additional
17281824310SBarry Smith    information that is used to allow better vectorization of
17381824310SBarry Smith    the matrix-vector product. At the cost of increased storage, the AIJ formatted
17481824310SBarry Smith    matrix can be copied to a format in which pieces of the matrix are
17581824310SBarry Smith    stored in ELLPACK format, allowing the vectorized matrix multiply
17681824310SBarry Smith    routine to use stride-1 memory accesses.  As with the AIJ type, it is
17781824310SBarry Smith    important to preallocate matrix storage in order to get good assembly
17881824310SBarry Smith    performance.
17981824310SBarry Smith 
180d083f849SBarry Smith    Collective
18181824310SBarry Smith 
18281824310SBarry Smith    Input Parameters:
18381824310SBarry Smith +  comm - MPI communicator, set to PETSC_COMM_SELF
18481824310SBarry Smith .  m - number of rows
18581824310SBarry Smith .  n - number of columns
18681824310SBarry Smith .  nz - number of nonzeros per row (same for all rows)
18781824310SBarry Smith -  nnz - array containing the number of nonzeros in the various rows
1880298fd71SBarry Smith          (possibly different for each row) or NULL
18981824310SBarry Smith 
19081824310SBarry Smith    Output Parameter:
19181824310SBarry Smith .  A - the matrix
19281824310SBarry Smith 
19381824310SBarry Smith    Notes:
19481824310SBarry Smith    If nnz is given then nz is ignored
19581824310SBarry Smith 
19681824310SBarry Smith    Level: intermediate
19781824310SBarry Smith 
198db781477SPatrick Sanan .seealso: `MatCreate()`, `MatCreateMPIAIJPERM()`, `MatSetValues()`
19981824310SBarry Smith @*/
2009371c9d4SSatish Balay PetscErrorCode MatCreateSeqAIJCRL(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A) {
20181824310SBarry Smith   PetscFunctionBegin;
2029566063dSJacob Faibussowitsch   PetscCall(MatCreate(comm, A));
2039566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(*A, m, n, m, n));
2049566063dSJacob Faibussowitsch   PetscCall(MatSetType(*A, MATSEQAIJCRL));
2059566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(*A, nz, nnz));
20681824310SBarry Smith   PetscFunctionReturn(0);
20781824310SBarry Smith }
20881824310SBarry Smith 
2099371c9d4SSatish Balay PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJCRL(Mat A) {
21081824310SBarry Smith   PetscFunctionBegin;
2119566063dSJacob Faibussowitsch   PetscCall(MatSetType(A, MATSEQAIJ));
2129566063dSJacob Faibussowitsch   PetscCall(MatConvert_SeqAIJ_SeqAIJCRL(A, MATSEQAIJCRL, MAT_INPLACE_MATRIX, &A));
21381824310SBarry Smith   PetscFunctionReturn(0);
21481824310SBarry Smith }
215