1 2 /* 3 Defines a matrix-vector product for the MATMPIAIJCRL matrix class. 4 This class is derived from the MATMPIAIJ class and retains the 5 compressed row storage (aka Yale sparse matrix format) but augments 6 it with a column oriented storage that is more efficient for 7 matrix vector products on Vector machines. 8 9 CRL stands for constant row length (that is the same number of columns 10 is kept (padded with zeros) for each row of the sparse matrix. 11 12 See src/mat/impls/aij/seq/crl/crl.c for the sequential version 13 */ 14 15 #include <../src/mat/impls/aij/mpi/mpiaij.h> 16 #include <../src/mat/impls/aij/seq/crl/crl.h> 17 18 PetscErrorCode MatDestroy_MPIAIJCRL(Mat A) { 19 Mat_AIJCRL *aijcrl = (Mat_AIJCRL *)A->spptr; 20 21 PetscFunctionBegin; 22 if (aijcrl) { 23 PetscCall(PetscFree2(aijcrl->acols, aijcrl->icols)); 24 PetscCall(VecDestroy(&aijcrl->fwork)); 25 PetscCall(VecDestroy(&aijcrl->xwork)); 26 PetscCall(PetscFree(aijcrl->array)); 27 } 28 PetscCall(PetscFree(A->spptr)); 29 30 PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATMPIAIJ)); 31 PetscCall(MatDestroy_MPIAIJ(A)); 32 PetscFunctionReturn(0); 33 } 34 35 PetscErrorCode MatMPIAIJCRL_create_aijcrl(Mat A) { 36 Mat_MPIAIJ *a = (Mat_MPIAIJ *)(A)->data; 37 Mat_SeqAIJ *Aij = (Mat_SeqAIJ *)(a->A->data), *Bij = (Mat_SeqAIJ *)(a->B->data); 38 Mat_AIJCRL *aijcrl = (Mat_AIJCRL *)A->spptr; 39 PetscInt m = A->rmap->n; /* Number of rows in the matrix. */ 40 PetscInt nd = a->A->cmap->n; /* number of columns in diagonal portion */ 41 PetscInt *aj = Aij->j, *bj = Bij->j; /* From the CSR representation; points to the beginning of each row. */ 42 PetscInt i, j, rmax = 0, *icols, *ailen = Aij->ilen, *bilen = Bij->ilen; 43 PetscScalar *aa = Aij->a, *ba = Bij->a, *acols, *array; 44 45 PetscFunctionBegin; 46 /* determine the row with the most columns */ 47 for (i = 0; i < m; i++) rmax = PetscMax(rmax, ailen[i] + bilen[i]); 48 aijcrl->nz = Aij->nz + Bij->nz; 49 aijcrl->m = A->rmap->n; 50 aijcrl->rmax = rmax; 51 52 PetscCall(PetscFree2(aijcrl->acols, aijcrl->icols)); 53 PetscCall(PetscMalloc2(rmax * m, &aijcrl->acols, rmax * m, &aijcrl->icols)); 54 acols = aijcrl->acols; 55 icols = aijcrl->icols; 56 for (i = 0; i < m; i++) { 57 for (j = 0; j < ailen[i]; j++) { 58 acols[j * m + i] = *aa++; 59 icols[j * m + i] = *aj++; 60 } 61 for (; j < ailen[i] + bilen[i]; j++) { 62 acols[j * m + i] = *ba++; 63 icols[j * m + i] = nd + *bj++; 64 } 65 for (; j < rmax; j++) { /* empty column entries */ 66 acols[j * m + i] = 0.0; 67 icols[j * m + i] = (j) ? icols[(j - 1) * m + i] : 0; /* handle case where row is EMPTY */ 68 } 69 } 70 PetscCall(PetscInfo(A, "Percentage of 0's introduced for vectorized multiply %g\n", 1.0 - ((double)(aijcrl->nz)) / ((double)(rmax * m)))); 71 72 PetscCall(PetscFree(aijcrl->array)); 73 PetscCall(PetscMalloc1(a->B->cmap->n + nd, &array)); 74 /* xwork array is actually B->n+nd long, but we define xwork this length so can copy into it */ 75 PetscCall(VecDestroy(&aijcrl->xwork)); 76 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), 1, nd, PETSC_DECIDE, array, &aijcrl->xwork)); 77 PetscCall(VecDestroy(&aijcrl->fwork)); 78 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, a->B->cmap->n, array + nd, &aijcrl->fwork)); 79 80 aijcrl->array = array; 81 aijcrl->xscat = a->Mvctx; 82 PetscFunctionReturn(0); 83 } 84 85 PetscErrorCode MatAssemblyEnd_MPIAIJCRL(Mat A, MatAssemblyType mode) { 86 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 87 Mat_SeqAIJ *Aij = (Mat_SeqAIJ *)(a->A->data), *Bij = (Mat_SeqAIJ *)(a->A->data); 88 89 PetscFunctionBegin; 90 Aij->inode.use = PETSC_FALSE; 91 Bij->inode.use = PETSC_FALSE; 92 93 PetscCall(MatAssemblyEnd_MPIAIJ(A, mode)); 94 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 95 96 /* Now calculate the permutation and grouping information. */ 97 PetscCall(MatMPIAIJCRL_create_aijcrl(A)); 98 PetscFunctionReturn(0); 99 } 100 101 extern PetscErrorCode MatMult_AIJCRL(Mat, Vec, Vec); 102 extern PetscErrorCode MatDuplicate_AIJCRL(Mat, MatDuplicateOption, Mat *); 103 104 /* MatConvert_MPIAIJ_MPIAIJCRL converts a MPIAIJ matrix into a 105 * MPIAIJCRL matrix. This routine is called by the MatCreate_MPIAIJCRL() 106 * routine, but can also be used to convert an assembled MPIAIJ matrix 107 * into a MPIAIJCRL one. */ 108 109 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat A, MatType type, MatReuse reuse, Mat *newmat) { 110 Mat B = *newmat; 111 Mat_AIJCRL *aijcrl; 112 113 PetscFunctionBegin; 114 if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B)); 115 116 PetscCall(PetscNewLog(B, &aijcrl)); 117 B->spptr = (void *)aijcrl; 118 119 /* Set function pointers for methods that we inherit from AIJ but override. */ 120 B->ops->duplicate = MatDuplicate_AIJCRL; 121 B->ops->assemblyend = MatAssemblyEnd_MPIAIJCRL; 122 B->ops->destroy = MatDestroy_MPIAIJCRL; 123 B->ops->mult = MatMult_AIJCRL; 124 125 /* If A has already been assembled, compute the permutation. */ 126 if (A->assembled) PetscCall(MatMPIAIJCRL_create_aijcrl(B)); 127 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJCRL)); 128 *newmat = B; 129 PetscFunctionReturn(0); 130 } 131 132 /*@C 133 MatCreateMPIAIJCRL - Creates a sparse matrix of type `MATMPIAIJCRL`. 134 This type inherits from `MATAIJ`, but stores some additional 135 information that is used to allow better vectorization of 136 the matrix-vector product. At the cost of increased storage, the AIJ formatted 137 matrix can be copied to a format in which pieces of the matrix are 138 stored in ELLPACK format, allowing the vectorized matrix multiply 139 routine to use stride-1 memory accesses. As with the AIJ type, it is 140 important to preallocate matrix storage in order to get good assembly 141 performance. 142 143 Collective 144 145 Input Parameters: 146 + comm - MPI communicator, set to `PETSC_COMM_SELF` 147 . m - number of rows 148 . n - number of columns 149 . nz - number of nonzeros per row (same for all rows) 150 - nnz - array containing the number of nonzeros in the various rows 151 (possibly different for each row) or NULL 152 153 Output Parameter: 154 . A - the matrix 155 156 Note: 157 If nnz is given then nz is ignored 158 159 Level: intermediate 160 161 .seealso: `MATAIJ`, `MATAIJSELL`, `MATAIJPERM`, `MATAIJMKL`, `MatCreate()`, `MatCreateMPIAIJPERM()`, `MatSetValues()` 162 @*/ 163 PetscErrorCode MatCreateMPIAIJCRL(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], PetscInt onz, const PetscInt onnz[], Mat *A) { 164 PetscFunctionBegin; 165 PetscCall(MatCreate(comm, A)); 166 PetscCall(MatSetSizes(*A, m, n, m, n)); 167 PetscCall(MatSetType(*A, MATMPIAIJCRL)); 168 PetscCall(MatMPIAIJSetPreallocation_MPIAIJ(*A, nz, (PetscInt *)nnz, onz, (PetscInt *)onnz)); 169 PetscFunctionReturn(0); 170 } 171 172 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJCRL(Mat A) { 173 PetscFunctionBegin; 174 PetscCall(MatSetType(A, MATMPIAIJ)); 175 PetscCall(MatConvert_MPIAIJ_MPIAIJCRL(A, MATMPIAIJCRL, MAT_INPLACE_MATRIX, &A)); 176 PetscFunctionReturn(0); 177 } 178