xref: /petsc/src/mat/impls/aij/seq/crl/crl.c (revision d71ae5a4db6382e7f06317b8d368875286fe9008)
181824310SBarry Smith 
281824310SBarry Smith /*
381824310SBarry Smith   Defines a matrix-vector product for the MATSEQAIJCRL matrix class.
481824310SBarry Smith   This class is derived from the MATSEQAIJ class and retains the
581824310SBarry Smith   compressed row storage (aka Yale sparse matrix format) but augments
681824310SBarry Smith   it with a column oriented storage that is more efficient for
781824310SBarry Smith   matrix vector products on Vector machines.
881824310SBarry Smith 
981824310SBarry Smith   CRL stands for constant row length (that is the same number of columns
1081824310SBarry Smith   is kept (padded with zeros) for each row of the sparse matrix.
1181824310SBarry Smith */
12c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/crl/crl.h>
1381824310SBarry Smith 
14*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDestroy_SeqAIJCRL(Mat A)
15*d71ae5a4SJacob Faibussowitsch {
165a11e1b2SBarry Smith   Mat_AIJCRL *aijcrl = (Mat_AIJCRL *)A->spptr;
1781824310SBarry Smith 
18362febeeSStefano Zampini   PetscFunctionBegin;
195a11e1b2SBarry Smith   /* Free everything in the Mat_AIJCRL data structure. */
201baa6e33SBarry Smith   if (aijcrl) PetscCall(PetscFree2(aijcrl->acols, aijcrl->icols));
219566063dSJacob Faibussowitsch   PetscCall(PetscFree(A->spptr));
229566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ));
239566063dSJacob Faibussowitsch   PetscCall(MatDestroy_SeqAIJ(A));
2481824310SBarry Smith   PetscFunctionReturn(0);
2581824310SBarry Smith }
2681824310SBarry Smith 
27*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDuplicate_AIJCRL(Mat A, MatDuplicateOption op, Mat *M)
28*d71ae5a4SJacob Faibussowitsch {
295a11e1b2SBarry Smith   SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot duplicate AIJCRL matrices yet");
3081824310SBarry Smith }
3181824310SBarry Smith 
32*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSeqAIJCRL_create_aijcrl(Mat A)
33*d71ae5a4SJacob Faibussowitsch {
3481824310SBarry Smith   Mat_SeqAIJ  *a      = (Mat_SeqAIJ *)(A)->data;
355a11e1b2SBarry Smith   Mat_AIJCRL  *aijcrl = (Mat_AIJCRL *)A->spptr;
36d0f46423SBarry Smith   PetscInt     m      = A->rmap->n; /* Number of rows in the matrix. */
3781824310SBarry Smith   PetscInt    *aj     = a->j;       /* From the CSR representation; points to the beginning  of each row. */
3881824310SBarry Smith   PetscInt     i, j, rmax = a->rmax, *icols, *ilen = a->ilen;
39dd6ea824SBarry Smith   MatScalar   *aa = a->a;
40dd6ea824SBarry Smith   PetscScalar *acols;
4181824310SBarry Smith 
4281824310SBarry Smith   PetscFunctionBegin;
435a11e1b2SBarry Smith   aijcrl->nz   = a->nz;
445a11e1b2SBarry Smith   aijcrl->m    = A->rmap->n;
455a11e1b2SBarry Smith   aijcrl->rmax = rmax;
462205254eSKarl Rupp 
479566063dSJacob Faibussowitsch   PetscCall(PetscFree2(aijcrl->acols, aijcrl->icols));
489566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(rmax * m, &aijcrl->acols, rmax * m, &aijcrl->icols));
495a11e1b2SBarry Smith   acols = aijcrl->acols;
505a11e1b2SBarry Smith   icols = aijcrl->icols;
5181824310SBarry Smith   for (i = 0; i < m; i++) {
5281824310SBarry Smith     for (j = 0; j < ilen[i]; j++) {
5381824310SBarry Smith       acols[j * m + i] = *aa++;
5481824310SBarry Smith       icols[j * m + i] = *aj++;
5581824310SBarry Smith     }
5681824310SBarry Smith     for (; j < rmax; j++) { /* empty column entries */
5781824310SBarry Smith       acols[j * m + i] = 0.0;
5881824310SBarry Smith       icols[j * m + i] = (j) ? icols[(j - 1) * m + i] : 0; /* handle case where row is EMPTY */
5981824310SBarry Smith     }
6081824310SBarry Smith   }
619566063dSJacob Faibussowitsch   PetscCall(PetscInfo(A, "Percentage of 0's introduced for vectorized multiply %g. Rmax= %" PetscInt_FMT "\n", 1.0 - ((double)a->nz) / ((double)(rmax * m)), rmax));
6281824310SBarry Smith   PetscFunctionReturn(0);
6381824310SBarry Smith }
6481824310SBarry Smith 
65*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatAssemblyEnd_SeqAIJCRL(Mat A, MatAssemblyType mode)
66*d71ae5a4SJacob Faibussowitsch {
6781824310SBarry Smith   Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
6881824310SBarry Smith 
69c02b24c5SBarry Smith   PetscFunctionBegin;
7081824310SBarry Smith   a->inode.use = PETSC_FALSE;
712205254eSKarl Rupp 
729566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd_SeqAIJ(A, mode));
734723c594SBarry Smith   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
7481824310SBarry Smith 
7581824310SBarry Smith   /* Now calculate the permutation and grouping information. */
769566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJCRL_create_aijcrl(A));
7781824310SBarry Smith   PetscFunctionReturn(0);
7881824310SBarry Smith }
7981824310SBarry Smith 
80c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/crl/ftn-kernels/fmultcrl.h>
815aeacdfdSBarry Smith 
824723c594SBarry Smith /*
835a11e1b2SBarry Smith     Shared by both sequential and parallel versions of CRL matrix: MATMPIAIJCRL and MATSEQAIJCRL
844723c594SBarry Smith     - the scatter is used only in the parallel version
854723c594SBarry Smith 
864723c594SBarry Smith */
87*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMult_AIJCRL(Mat A, Vec xx, Vec yy)
88*d71ae5a4SJacob Faibussowitsch {
895a11e1b2SBarry Smith   Mat_AIJCRL        *aijcrl = (Mat_AIJCRL *)A->spptr;
905a11e1b2SBarry Smith   PetscInt           m      = aijcrl->m; /* Number of rows in the matrix. */
915a11e1b2SBarry Smith   PetscInt           rmax = aijcrl->rmax, *icols = aijcrl->icols;
925a11e1b2SBarry Smith   PetscScalar       *acols = aijcrl->acols;
93d9ca1df4SBarry Smith   PetscScalar       *y;
94d9ca1df4SBarry Smith   const PetscScalar *x;
9581824310SBarry Smith #if !defined(PETSC_USE_FORTRAN_KERNEL_MULTCRL)
9681824310SBarry Smith   PetscInt i, j, ii;
9781824310SBarry Smith #endif
9881824310SBarry Smith 
9981824310SBarry Smith #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
10081824310SBarry Smith   #pragma disjoint(*x, *y, *aa)
10181824310SBarry Smith #endif
10281824310SBarry Smith 
10381824310SBarry Smith   PetscFunctionBegin;
1045a11e1b2SBarry Smith   if (aijcrl->xscat) {
1059566063dSJacob Faibussowitsch     PetscCall(VecCopy(xx, aijcrl->xwork));
106c02b24c5SBarry Smith     /* get remote values needed for local part of multiply */
1079566063dSJacob Faibussowitsch     PetscCall(VecScatterBegin(aijcrl->xscat, xx, aijcrl->fwork, INSERT_VALUES, SCATTER_FORWARD));
1089566063dSJacob Faibussowitsch     PetscCall(VecScatterEnd(aijcrl->xscat, xx, aijcrl->fwork, INSERT_VALUES, SCATTER_FORWARD));
1095a11e1b2SBarry Smith     xx = aijcrl->xwork;
1102205254eSKarl Rupp   }
111c02b24c5SBarry Smith 
1129566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
1139566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
11481824310SBarry Smith 
11581824310SBarry Smith #if defined(PETSC_USE_FORTRAN_KERNEL_MULTCRL)
11681824310SBarry Smith   fortranmultcrl_(&m, &rmax, x, y, icols, acols);
11781824310SBarry Smith #else
11881824310SBarry Smith 
1194be65460SBarry Smith   /* first column */
1202205254eSKarl Rupp   for (j = 0; j < m; j++) y[j] = acols[j] * x[icols[j]];
1214be65460SBarry Smith 
1224be65460SBarry Smith     /* other columns */
123b409243cSBarry Smith   #if defined(PETSC_HAVE_CRAY_VECTOR)
12481824310SBarry Smith     #pragma _CRI preferstream
12581824310SBarry Smith   #endif
1264be65460SBarry Smith   for (i = 1; i < rmax; i++) {
12781824310SBarry Smith     ii = i * m;
128b409243cSBarry Smith   #if defined(PETSC_HAVE_CRAY_VECTOR)
12981824310SBarry Smith     #pragma _CRI prefervector
13081824310SBarry Smith   #endif
1312205254eSKarl Rupp     for (j = 0; j < m; j++) y[j] = y[j] + acols[ii + j] * x[icols[ii + j]];
13281824310SBarry Smith   }
133b409243cSBarry Smith   #if defined(PETSC_HAVE_CRAY_VECTOR)
13481824310SBarry Smith     #pragma _CRI ivdep
13581824310SBarry Smith   #endif
13681824310SBarry Smith 
13781824310SBarry Smith #endif
1389566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * aijcrl->nz - m));
1399566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
1409566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
14181824310SBarry Smith   PetscFunctionReturn(0);
14281824310SBarry Smith }
14381824310SBarry Smith 
1445a11e1b2SBarry Smith /* MatConvert_SeqAIJ_SeqAIJCRL converts a SeqAIJ matrix into a
1455a11e1b2SBarry Smith  * SeqAIJCRL matrix.  This routine is called by the MatCreate_SeqAIJCRL()
14681824310SBarry Smith  * routine, but can also be used to convert an assembled SeqAIJ matrix
1475a11e1b2SBarry Smith  * into a SeqAIJCRL one. */
148*d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJCRL(Mat A, MatType type, MatReuse reuse, Mat *newmat)
149*d71ae5a4SJacob Faibussowitsch {
15081824310SBarry Smith   Mat         B = *newmat;
1515a11e1b2SBarry Smith   Mat_AIJCRL *aijcrl;
1524099cc6bSBarry Smith   PetscBool   sametype;
15381824310SBarry Smith 
15481824310SBarry Smith   PetscFunctionBegin;
15548a46eb9SPierre Jolivet   if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B));
1569566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)A, type, &sametype));
1574099cc6bSBarry Smith   if (sametype) PetscFunctionReturn(0);
15881824310SBarry Smith 
1594dfa11a4SJacob Faibussowitsch   PetscCall(PetscNew(&aijcrl));
1605a11e1b2SBarry Smith   B->spptr = (void *)aijcrl;
16181824310SBarry Smith 
16217667f90SBarry Smith   /* Set function pointers for methods that we inherit from AIJ but override. */
1635a11e1b2SBarry Smith   B->ops->duplicate   = MatDuplicate_AIJCRL;
1645a11e1b2SBarry Smith   B->ops->assemblyend = MatAssemblyEnd_SeqAIJCRL;
1655a11e1b2SBarry Smith   B->ops->destroy     = MatDestroy_SeqAIJCRL;
1665a11e1b2SBarry Smith   B->ops->mult        = MatMult_AIJCRL;
16781824310SBarry Smith 
16881824310SBarry Smith   /* If A has already been assembled, compute the permutation. */
1691baa6e33SBarry Smith   if (A->assembled) PetscCall(MatSeqAIJCRL_create_aijcrl(B));
1709566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJCRL));
17181824310SBarry Smith   *newmat = B;
17281824310SBarry Smith   PetscFunctionReturn(0);
17381824310SBarry Smith }
17481824310SBarry Smith 
17581824310SBarry Smith /*@C
17611a5261eSBarry Smith    MatCreateSeqAIJCRL - Creates a sparse matrix of type `MATSEQAIJCRL`.
17711a5261eSBarry Smith    This type inherits from `MATSEQAIJ`, but stores some additional
17881824310SBarry Smith    information that is used to allow better vectorization of
17911a5261eSBarry Smith    the matrix-vector product. At the cost of increased storage, the `MATSEQAIJ` formatted
18081824310SBarry Smith    matrix can be copied to a format in which pieces of the matrix are
18181824310SBarry Smith    stored in ELLPACK format, allowing the vectorized matrix multiply
18211a5261eSBarry Smith    routine to use stride-1 memory accesses.  As with the `MATSEQAIJ` type, it is
18381824310SBarry Smith    important to preallocate matrix storage in order to get good assembly
18481824310SBarry Smith    performance.
18581824310SBarry Smith 
186d083f849SBarry Smith    Collective
18781824310SBarry Smith 
18881824310SBarry Smith    Input Parameters:
18911a5261eSBarry Smith +  comm - MPI communicator, set to `PETSC_COMM_SELF`
19081824310SBarry Smith .  m - number of rows
19181824310SBarry Smith .  n - number of columns
19281824310SBarry Smith .  nz - number of nonzeros per row (same for all rows)
19381824310SBarry Smith -  nnz - array containing the number of nonzeros in the various rows
1940298fd71SBarry Smith          (possibly different for each row) or NULL
19581824310SBarry Smith 
19681824310SBarry Smith    Output Parameter:
19781824310SBarry Smith .  A - the matrix
19881824310SBarry Smith 
19911a5261eSBarry Smith    Note:
20081824310SBarry Smith    If nnz is given then nz is ignored
20181824310SBarry Smith 
20281824310SBarry Smith    Level: intermediate
20381824310SBarry Smith 
204db781477SPatrick Sanan .seealso: `MatCreate()`, `MatCreateMPIAIJPERM()`, `MatSetValues()`
20581824310SBarry Smith @*/
206*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateSeqAIJCRL(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A)
207*d71ae5a4SJacob Faibussowitsch {
20881824310SBarry Smith   PetscFunctionBegin;
2099566063dSJacob Faibussowitsch   PetscCall(MatCreate(comm, A));
2109566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(*A, m, n, m, n));
2119566063dSJacob Faibussowitsch   PetscCall(MatSetType(*A, MATSEQAIJCRL));
2129566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(*A, nz, nnz));
21381824310SBarry Smith   PetscFunctionReturn(0);
21481824310SBarry Smith }
21581824310SBarry Smith 
216*d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJCRL(Mat A)
217*d71ae5a4SJacob Faibussowitsch {
21881824310SBarry Smith   PetscFunctionBegin;
2199566063dSJacob Faibussowitsch   PetscCall(MatSetType(A, MATSEQAIJ));
2209566063dSJacob Faibussowitsch   PetscCall(MatConvert_SeqAIJ_SeqAIJCRL(A, MATSEQAIJCRL, MAT_INPLACE_MATRIX, &A));
22181824310SBarry Smith   PetscFunctionReturn(0);
22281824310SBarry Smith }
223