181824310SBarry Smith 281824310SBarry Smith /* 381824310SBarry Smith Defines a matrix-vector product for the MATSEQAIJCRL matrix class. 481824310SBarry Smith This class is derived from the MATSEQAIJ class and retains the 581824310SBarry Smith compressed row storage (aka Yale sparse matrix format) but augments 681824310SBarry Smith it with a column oriented storage that is more efficient for 781824310SBarry Smith matrix vector products on Vector machines. 881824310SBarry Smith 981824310SBarry Smith CRL stands for constant row length (that is the same number of columns 1081824310SBarry Smith is kept (padded with zeros) for each row of the sparse matrix. 1181824310SBarry Smith */ 12c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/crl/crl.h> 1381824310SBarry Smith 14d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDestroy_SeqAIJCRL(Mat A) 15d71ae5a4SJacob Faibussowitsch { 165a11e1b2SBarry Smith Mat_AIJCRL *aijcrl = (Mat_AIJCRL *)A->spptr; 1781824310SBarry Smith 18362febeeSStefano Zampini PetscFunctionBegin; 195a11e1b2SBarry Smith /* Free everything in the Mat_AIJCRL data structure. */ 201baa6e33SBarry Smith if (aijcrl) PetscCall(PetscFree2(aijcrl->acols, aijcrl->icols)); 219566063dSJacob Faibussowitsch PetscCall(PetscFree(A->spptr)); 229566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ)); 239566063dSJacob Faibussowitsch PetscCall(MatDestroy_SeqAIJ(A)); 243ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2581824310SBarry Smith } 2681824310SBarry Smith 27d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDuplicate_AIJCRL(Mat A, MatDuplicateOption op, Mat *M) 28d71ae5a4SJacob Faibussowitsch { 295a11e1b2SBarry Smith SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot duplicate AIJCRL matrices yet"); 3081824310SBarry Smith } 3181824310SBarry Smith 32d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSeqAIJCRL_create_aijcrl(Mat A) 33d71ae5a4SJacob Faibussowitsch { 3481824310SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)(A)->data; 355a11e1b2SBarry Smith Mat_AIJCRL *aijcrl = (Mat_AIJCRL *)A->spptr; 36d0f46423SBarry Smith PetscInt m = A->rmap->n; /* Number of rows in the matrix. */ 3781824310SBarry Smith PetscInt *aj = a->j; /* From the CSR representation; points to the beginning of each row. */ 3881824310SBarry Smith PetscInt i, j, rmax = a->rmax, *icols, *ilen = a->ilen; 39dd6ea824SBarry Smith MatScalar *aa = a->a; 40dd6ea824SBarry Smith PetscScalar *acols; 4181824310SBarry Smith 4281824310SBarry Smith PetscFunctionBegin; 435a11e1b2SBarry Smith aijcrl->nz = a->nz; 445a11e1b2SBarry Smith aijcrl->m = A->rmap->n; 455a11e1b2SBarry Smith aijcrl->rmax = rmax; 462205254eSKarl Rupp 479566063dSJacob Faibussowitsch PetscCall(PetscFree2(aijcrl->acols, aijcrl->icols)); 489566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(rmax * m, &aijcrl->acols, rmax * m, &aijcrl->icols)); 495a11e1b2SBarry Smith acols = aijcrl->acols; 505a11e1b2SBarry Smith icols = aijcrl->icols; 5181824310SBarry Smith for (i = 0; i < m; i++) { 5281824310SBarry Smith for (j = 0; j < ilen[i]; j++) { 5381824310SBarry Smith acols[j * m + i] = *aa++; 5481824310SBarry Smith icols[j * m + i] = *aj++; 5581824310SBarry Smith } 5681824310SBarry Smith for (; j < rmax; j++) { /* empty column entries */ 5781824310SBarry Smith acols[j * m + i] = 0.0; 5881824310SBarry Smith icols[j * m + i] = (j) ? icols[(j - 1) * m + i] : 0; /* handle case where row is EMPTY */ 5981824310SBarry Smith } 6081824310SBarry Smith } 619566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Percentage of 0's introduced for vectorized multiply %g. Rmax= %" PetscInt_FMT "\n", 1.0 - ((double)a->nz) / ((double)(rmax * m)), rmax)); 623ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 6381824310SBarry Smith } 6481824310SBarry Smith 65d71ae5a4SJacob Faibussowitsch PetscErrorCode MatAssemblyEnd_SeqAIJCRL(Mat A, MatAssemblyType mode) 66d71ae5a4SJacob Faibussowitsch { 6781824310SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 6881824310SBarry Smith 69c02b24c5SBarry Smith PetscFunctionBegin; 7081824310SBarry Smith a->inode.use = PETSC_FALSE; 712205254eSKarl Rupp 729566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd_SeqAIJ(A, mode)); 733ba16761SJacob Faibussowitsch if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(PETSC_SUCCESS); 7481824310SBarry Smith 7581824310SBarry Smith /* Now calculate the permutation and grouping information. */ 769566063dSJacob Faibussowitsch PetscCall(MatSeqAIJCRL_create_aijcrl(A)); 773ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 7881824310SBarry Smith } 7981824310SBarry Smith 80c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/crl/ftn-kernels/fmultcrl.h> 815aeacdfdSBarry Smith 824723c594SBarry Smith /* 835a11e1b2SBarry Smith Shared by both sequential and parallel versions of CRL matrix: MATMPIAIJCRL and MATSEQAIJCRL 844723c594SBarry Smith - the scatter is used only in the parallel version 854723c594SBarry Smith 864723c594SBarry Smith */ 87d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMult_AIJCRL(Mat A, Vec xx, Vec yy) 88d71ae5a4SJacob Faibussowitsch { 895a11e1b2SBarry Smith Mat_AIJCRL *aijcrl = (Mat_AIJCRL *)A->spptr; 905a11e1b2SBarry Smith PetscInt m = aijcrl->m; /* Number of rows in the matrix. */ 915a11e1b2SBarry Smith PetscInt rmax = aijcrl->rmax, *icols = aijcrl->icols; 925a11e1b2SBarry Smith PetscScalar *acols = aijcrl->acols; 93d9ca1df4SBarry Smith PetscScalar *y; 94d9ca1df4SBarry Smith const PetscScalar *x; 9581824310SBarry Smith #if !defined(PETSC_USE_FORTRAN_KERNEL_MULTCRL) 9681824310SBarry Smith PetscInt i, j, ii; 9781824310SBarry Smith #endif 9881824310SBarry Smith 9981824310SBarry Smith #if defined(PETSC_HAVE_PRAGMA_DISJOINT) 10081824310SBarry Smith #pragma disjoint(*x, *y, *aa) 10181824310SBarry Smith #endif 10281824310SBarry Smith 10381824310SBarry Smith PetscFunctionBegin; 1045a11e1b2SBarry Smith if (aijcrl->xscat) { 1059566063dSJacob Faibussowitsch PetscCall(VecCopy(xx, aijcrl->xwork)); 106c02b24c5SBarry Smith /* get remote values needed for local part of multiply */ 1079566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(aijcrl->xscat, xx, aijcrl->fwork, INSERT_VALUES, SCATTER_FORWARD)); 1089566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(aijcrl->xscat, xx, aijcrl->fwork, INSERT_VALUES, SCATTER_FORWARD)); 1095a11e1b2SBarry Smith xx = aijcrl->xwork; 1102205254eSKarl Rupp } 111c02b24c5SBarry Smith 1129566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(xx, &x)); 1139566063dSJacob Faibussowitsch PetscCall(VecGetArray(yy, &y)); 11481824310SBarry Smith 11581824310SBarry Smith #if defined(PETSC_USE_FORTRAN_KERNEL_MULTCRL) 11681824310SBarry Smith fortranmultcrl_(&m, &rmax, x, y, icols, acols); 11781824310SBarry Smith #else 11881824310SBarry Smith 1194be65460SBarry Smith /* first column */ 1202205254eSKarl Rupp for (j = 0; j < m; j++) y[j] = acols[j] * x[icols[j]]; 1214be65460SBarry Smith 1224be65460SBarry Smith /* other columns */ 123b409243cSBarry Smith #if defined(PETSC_HAVE_CRAY_VECTOR) 12481824310SBarry Smith #pragma _CRI preferstream 12581824310SBarry Smith #endif 1264be65460SBarry Smith for (i = 1; i < rmax; i++) { 12781824310SBarry Smith ii = i * m; 128b409243cSBarry Smith #if defined(PETSC_HAVE_CRAY_VECTOR) 12981824310SBarry Smith #pragma _CRI prefervector 13081824310SBarry Smith #endif 1312205254eSKarl Rupp for (j = 0; j < m; j++) y[j] = y[j] + acols[ii + j] * x[icols[ii + j]]; 13281824310SBarry Smith } 13381824310SBarry Smith #endif 1349566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * aijcrl->nz - m)); 1359566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(xx, &x)); 1369566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(yy, &y)); 1373ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 13881824310SBarry Smith } 13981824310SBarry Smith 1405a11e1b2SBarry Smith /* MatConvert_SeqAIJ_SeqAIJCRL converts a SeqAIJ matrix into a 1415a11e1b2SBarry Smith * SeqAIJCRL matrix. This routine is called by the MatCreate_SeqAIJCRL() 14281824310SBarry Smith * routine, but can also be used to convert an assembled SeqAIJ matrix 1435a11e1b2SBarry Smith * into a SeqAIJCRL one. */ 144d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJCRL(Mat A, MatType type, MatReuse reuse, Mat *newmat) 145d71ae5a4SJacob Faibussowitsch { 14681824310SBarry Smith Mat B = *newmat; 1475a11e1b2SBarry Smith Mat_AIJCRL *aijcrl; 1484099cc6bSBarry Smith PetscBool sametype; 14981824310SBarry Smith 15081824310SBarry Smith PetscFunctionBegin; 15148a46eb9SPierre Jolivet if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B)); 1529566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)A, type, &sametype)); 1533ba16761SJacob Faibussowitsch if (sametype) PetscFunctionReturn(PETSC_SUCCESS); 15481824310SBarry Smith 1554dfa11a4SJacob Faibussowitsch PetscCall(PetscNew(&aijcrl)); 1565a11e1b2SBarry Smith B->spptr = (void *)aijcrl; 15781824310SBarry Smith 15817667f90SBarry Smith /* Set function pointers for methods that we inherit from AIJ but override. */ 1595a11e1b2SBarry Smith B->ops->duplicate = MatDuplicate_AIJCRL; 1605a11e1b2SBarry Smith B->ops->assemblyend = MatAssemblyEnd_SeqAIJCRL; 1615a11e1b2SBarry Smith B->ops->destroy = MatDestroy_SeqAIJCRL; 1625a11e1b2SBarry Smith B->ops->mult = MatMult_AIJCRL; 16381824310SBarry Smith 16481824310SBarry Smith /* If A has already been assembled, compute the permutation. */ 1651baa6e33SBarry Smith if (A->assembled) PetscCall(MatSeqAIJCRL_create_aijcrl(B)); 1669566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJCRL)); 16781824310SBarry Smith *newmat = B; 1683ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 16981824310SBarry Smith } 17081824310SBarry Smith 17181824310SBarry Smith /*@C 17211a5261eSBarry Smith MatCreateSeqAIJCRL - Creates a sparse matrix of type `MATSEQAIJCRL`. 17311a5261eSBarry Smith This type inherits from `MATSEQAIJ`, but stores some additional 17481824310SBarry Smith information that is used to allow better vectorization of 17511a5261eSBarry Smith the matrix-vector product. At the cost of increased storage, the `MATSEQAIJ` formatted 17681824310SBarry Smith matrix can be copied to a format in which pieces of the matrix are 17781824310SBarry Smith stored in ELLPACK format, allowing the vectorized matrix multiply 178*20f4b53cSBarry Smith routine to use stride-1 memory accesses. 17981824310SBarry Smith 180d083f849SBarry Smith Collective 18181824310SBarry Smith 18281824310SBarry Smith Input Parameters: 18311a5261eSBarry Smith + comm - MPI communicator, set to `PETSC_COMM_SELF` 18481824310SBarry Smith . m - number of rows 18581824310SBarry Smith . n - number of columns 186*20f4b53cSBarry Smith . nz - number of nonzeros per row (same for all rows), ignored if `nnz` is given 18781824310SBarry Smith - nnz - array containing the number of nonzeros in the various rows 1882ef1f0ffSBarry Smith (possibly different for each row) or `NULL` 18981824310SBarry Smith 19081824310SBarry Smith Output Parameter: 19181824310SBarry Smith . A - the matrix 19281824310SBarry Smith 19381824310SBarry Smith Level: intermediate 19481824310SBarry Smith 1952ef1f0ffSBarry Smith .seealso: [](chapter_matrices), `Mat`, `MatCreate()`, `MatCreateMPIAIJPERM()`, `MatSetValues()` 19681824310SBarry Smith @*/ 197d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateSeqAIJCRL(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A) 198d71ae5a4SJacob Faibussowitsch { 19981824310SBarry Smith PetscFunctionBegin; 2009566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, A)); 2019566063dSJacob Faibussowitsch PetscCall(MatSetSizes(*A, m, n, m, n)); 2029566063dSJacob Faibussowitsch PetscCall(MatSetType(*A, MATSEQAIJCRL)); 2039566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(*A, nz, nnz)); 2043ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 20581824310SBarry Smith } 20681824310SBarry Smith 207d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJCRL(Mat A) 208d71ae5a4SJacob Faibussowitsch { 20981824310SBarry Smith PetscFunctionBegin; 2109566063dSJacob Faibussowitsch PetscCall(MatSetType(A, MATSEQAIJ)); 2119566063dSJacob Faibussowitsch PetscCall(MatConvert_SeqAIJ_SeqAIJCRL(A, MATSEQAIJCRL, MAT_INPLACE_MATRIX, &A)); 2123ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 21381824310SBarry Smith } 214