1 #include <petscconf.h> 2 #include <../src/mat/impls/sell/mpi/mpisell.h> /*I "petscmat.h" I*/ 3 4 PetscErrorCode MatMPISELLSetPreallocation_MPISELLCUDA(Mat B, PetscInt d_rlenmax, const PetscInt d_rlen[], PetscInt o_rlenmax, const PetscInt o_rlen[]) 5 { 6 Mat_MPISELL *b = (Mat_MPISELL *)B->data; 7 8 PetscFunctionBegin; 9 PetscCall(PetscLayoutSetUp(B->rmap)); 10 PetscCall(PetscLayoutSetUp(B->cmap)); 11 12 if (!B->preallocated) { 13 /* Explicitly create 2 MATSEQSELLCUDA matrices. */ 14 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 15 PetscCall(MatBindToCPU(b->A, B->boundtocpu)); 16 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 17 PetscCall(MatSetType(b->A, MATSEQSELLCUDA)); 18 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 19 PetscCall(MatBindToCPU(b->B, B->boundtocpu)); 20 PetscCall(MatSetSizes(b->B, B->rmap->n, B->cmap->N, B->rmap->n, B->cmap->N)); 21 PetscCall(MatSetType(b->B, MATSEQSELLCUDA)); 22 } 23 PetscCall(MatSeqSELLSetPreallocation(b->A, d_rlenmax, d_rlen)); 24 PetscCall(MatSeqSELLSetPreallocation(b->B, o_rlenmax, o_rlen)); 25 B->preallocated = PETSC_TRUE; 26 B->was_assembled = PETSC_FALSE; 27 B->assembled = PETSC_FALSE; 28 PetscFunctionReturn(PETSC_SUCCESS); 29 } 30 31 PetscErrorCode MatMult_MPISELLCUDA(Mat A, Vec xx, Vec yy) 32 { 33 Mat_MPISELL *a = (Mat_MPISELL *)A->data; 34 PetscInt nt; 35 36 PetscFunctionBegin; 37 PetscCall(VecGetLocalSize(xx, &nt)); 38 PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt); 39 PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 40 PetscCall((*a->A->ops->mult)(a->A, xx, yy)); 41 PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 42 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, yy, yy)); 43 PetscFunctionReturn(PETSC_SUCCESS); 44 } 45 46 PetscErrorCode MatMultAdd_MPISELLCUDA(Mat A, Vec xx, Vec yy, Vec zz) 47 { 48 Mat_MPISELL *a = (Mat_MPISELL *)A->data; 49 PetscInt nt; 50 51 PetscFunctionBegin; 52 PetscCall(VecGetLocalSize(xx, &nt)); 53 PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt); 54 PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 55 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz)); 56 PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 57 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz)); 58 PetscFunctionReturn(PETSC_SUCCESS); 59 } 60 61 PetscErrorCode MatMultTranspose_MPISELLCUDA(Mat A, Vec xx, Vec yy) 62 { 63 Mat_MPISELL *a = (Mat_MPISELL *)A->data; 64 PetscInt nt; 65 66 PetscFunctionBegin; 67 PetscCall(VecGetLocalSize(xx, &nt)); 68 PetscCheck(nt == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->rmap->n, nt); 69 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 70 PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy)); 71 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 72 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 73 PetscFunctionReturn(PETSC_SUCCESS); 74 } 75 76 PetscErrorCode MatSetFromOptions_MPISELLCUDA(PetscOptionItems *PetscOptionsObject, Mat A) 77 { 78 PetscFunctionBegin; 79 PetscOptionsHeadBegin(PetscOptionsObject, "MPISELLCUDA options"); 80 if (A->factortype == MAT_FACTOR_NONE) { } 81 PetscOptionsHeadEnd(); 82 PetscFunctionReturn(PETSC_SUCCESS); 83 } 84 85 PetscErrorCode MatAssemblyEnd_MPISELLCUDA(Mat A, MatAssemblyType mode) 86 { 87 Mat_MPISELL *mpisell; 88 89 PetscFunctionBegin; 90 mpisell = (Mat_MPISELL *)A->data; 91 PetscCall(MatAssemblyEnd_MPISELL(A, mode)); 92 if (!A->was_assembled && mode == MAT_FINAL_ASSEMBLY) { PetscCall(VecSetType(mpisell->lvec, VECSEQCUDA)); } 93 PetscFunctionReturn(PETSC_SUCCESS); 94 } 95 96 PetscErrorCode MatDestroy_MPISELLCUDA(Mat A) 97 { 98 PetscFunctionBegin; 99 PetscCall(MatDestroy_MPISELL(A)); 100 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpisellcuda_mpiaij_C", NULL)); 101 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMPISELLSetPreallocation_C", NULL)); 102 PetscFunctionReturn(PETSC_SUCCESS); 103 } 104 105 PETSC_EXTERN PetscErrorCode MatCreate_MPISELLCUDA(Mat A) 106 { 107 PetscFunctionBegin; 108 PetscCall(MatCreate_MPISELL(A)); 109 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMPISELLSetPreallocation_C", MatMPISELLSetPreallocation_MPISELLCUDA)); 110 PetscCall(PetscFree(A->defaultvectype)); 111 PetscCall(PetscStrallocpy(VECCUDA, &A->defaultvectype)); 112 113 A->ops->assemblyend = MatAssemblyEnd_MPISELLCUDA; 114 A->ops->mult = MatMult_MPISELLCUDA; 115 A->ops->multadd = MatMultAdd_MPISELLCUDA; 116 A->ops->multtranspose = MatMultTranspose_MPISELLCUDA; 117 A->ops->destroy = MatDestroy_MPISELLCUDA; 118 119 PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATMPISELLCUDA)); 120 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpisellcuda_mpiaij_C", MatConvert_MPISELL_MPIAIJ)); 121 PetscFunctionReturn(PETSC_SUCCESS); 122 } 123 124 /*@ 125 MatCreateSELLCUDA - Creates a sparse matrix in SELL format. 126 This matrix will ultimately pushed down to NVIDIA GPUs. 127 128 Collective 129 130 Input Parameters: 131 + comm - MPI communicator, set to `PETSC_COMM_SELF` 132 . m - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given) 133 This value should be the same as the local size used in creating the 134 y vector for the matrix-vector product y = Ax. 135 . n - This value should be the same as the local size used in creating the 136 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 137 calculated if `N` is given) For square matrices `n` is almost always `m`. 138 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 139 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 140 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 141 (same value is used for all local rows) 142 . d_nnz - array containing the number of nonzeros in the various rows of the 143 DIAGONAL portion of the local submatrix (possibly different for each row) 144 or `NULL`, if `d_nz` is used to specify the nonzero structure. 145 The size of this array is equal to the number of local rows, i.e `m`. 146 For matrices you plan to factor you must leave room for the diagonal entry and 147 put in the entry even if it is zero. 148 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 149 submatrix (same value is used for all local rows). 150 - o_nnz - array containing the number of nonzeros in the various rows of the 151 OFF-DIAGONAL portion of the local submatrix (possibly different for 152 each row) or `NULL`, if `o_nz` is used to specify the nonzero 153 structure. The size of this array is equal to the number 154 of local rows, i.e `m`. 155 156 Output Parameter: 157 . A - the matrix 158 159 Level: intermediate 160 161 Notes: 162 If `nnz` is given then `nz` is ignored 163 164 Specify the preallocated storage with either `nz` or `nnz` (not both). 165 Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory 166 allocation. 167 168 .seealso: [](chapter_matrices), `Mat`, `MatCreate()`, `MatCreateSELL()`, `MatSetValues()`, `MATMPISELLCUDA`, `MATSELLCUDA` 169 @*/ 170 PetscErrorCode MatCreateSELLCUDA(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 171 { 172 PetscMPIInt size; 173 174 PetscFunctionBegin; 175 PetscCall(MatCreate(comm, A)); 176 PetscCall(MatSetSizes(*A, m, n, M, N)); 177 PetscCallMPI(MPI_Comm_size(comm, &size)); 178 if (size > 1) { 179 PetscCall(MatSetType(*A, MATMPISELLCUDA)); 180 PetscCall(MatMPISELLSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz)); 181 } else { 182 PetscCall(MatSetType(*A, MATSEQSELLCUDA)); 183 PetscCall(MatSeqSELLSetPreallocation(*A, d_nz, d_nnz)); 184 } 185 PetscFunctionReturn(PETSC_SUCCESS); 186 } 187 188 /*MC 189 MATSELLCUDA - "sellcuda" = "mpisellcuda" - A matrix type to be used for sparse matrices. 190 191 Sliced ELLPACK matrix type whose data resides on NVIDIA GPUs. 192 193 This matrix type is identical to `MATSEQSELLCUDA` when constructed with a single process communicator, 194 and `MATMPISELLCUDA` otherwise. As a result, for single process communicators, 195 `MatSeqSELLSetPreallocation()` is supported, and similarly `MatMPISELLSetPreallocation()` is supported 196 for communicators controlling multiple processes. It is recommended that you call both of 197 the above preallocation routines for simplicity. 198 199 Options Database Key: 200 . -mat_type mpisellcuda - sets the matrix type to `MATMPISELLCUDA` during a call to MatSetFromOptions() 201 202 Level: beginner 203 204 .seealso: `MatCreateSELLCUDA()`, `MATSEQSELLCUDA`, `MatCreateSeqSELLCUDA()`, `MatCUDAFormatOperation()` 205 M*/ 206