xref: /petsc/src/mat/impls/sell/mpi/mpicuda/mpisellcuda.cu (revision bcee047adeeb73090d7e36cc71e39fc287cdbb97)
1 #include <petscconf.h>
2 #include <petscdevice.h>
3 #include <../src/mat/impls/sell/mpi/mpisell.h> /*I "petscmat.h" I*/
4 
5 PetscErrorCode MatMPISELLSetPreallocation_MPISELLCUDA(Mat B, PetscInt d_rlenmax, const PetscInt d_rlen[], PetscInt o_rlenmax, const PetscInt o_rlen[])
6 {
7   Mat_MPISELL *b = (Mat_MPISELL *)B->data;
8 
9   PetscFunctionBegin;
10   PetscCall(PetscLayoutSetUp(B->rmap));
11   PetscCall(PetscLayoutSetUp(B->cmap));
12 
13   if (!B->preallocated) {
14     /* Explicitly create 2 MATSEQSELLCUDA matrices. */
15     PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
16     PetscCall(MatBindToCPU(b->A, B->boundtocpu));
17     PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
18     PetscCall(MatSetType(b->A, MATSEQSELLCUDA));
19     PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
20     PetscCall(MatBindToCPU(b->B, B->boundtocpu));
21     PetscCall(MatSetSizes(b->B, B->rmap->n, B->cmap->N, B->rmap->n, B->cmap->N));
22     PetscCall(MatSetType(b->B, MATSEQSELLCUDA));
23   }
24   PetscCall(MatSeqSELLSetPreallocation(b->A, d_rlenmax, d_rlen));
25   PetscCall(MatSeqSELLSetPreallocation(b->B, o_rlenmax, o_rlen));
26   B->preallocated  = PETSC_TRUE;
27   B->was_assembled = PETSC_FALSE;
28   B->assembled     = PETSC_FALSE;
29   PetscFunctionReturn(PETSC_SUCCESS);
30 }
31 
32 PetscErrorCode MatMult_MPISELLCUDA(Mat A, Vec xx, Vec yy)
33 {
34   Mat_MPISELL *a = (Mat_MPISELL *)A->data;
35   PetscInt     nt;
36 
37   PetscFunctionBegin;
38   PetscCall(VecGetLocalSize(xx, &nt));
39   PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt);
40   PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
41   PetscCall((*a->A->ops->mult)(a->A, xx, yy));
42   PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
43   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, yy, yy));
44   PetscFunctionReturn(PETSC_SUCCESS);
45 }
46 
47 PetscErrorCode MatZeroEntries_MPISELLCUDA(Mat A)
48 {
49   Mat_MPISELL *l = (Mat_MPISELL *)A->data;
50 
51   PetscFunctionBegin;
52   PetscCall(MatZeroEntries(l->A));
53   PetscCall(MatZeroEntries(l->B));
54   PetscFunctionReturn(PETSC_SUCCESS);
55 }
56 
57 PetscErrorCode MatMultAdd_MPISELLCUDA(Mat A, Vec xx, Vec yy, Vec zz)
58 {
59   Mat_MPISELL *a = (Mat_MPISELL *)A->data;
60   PetscInt     nt;
61 
62   PetscFunctionBegin;
63   PetscCall(VecGetLocalSize(xx, &nt));
64   PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt);
65   PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
66   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz));
67   PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
68   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz));
69   PetscFunctionReturn(PETSC_SUCCESS);
70 }
71 
72 PetscErrorCode MatMultTranspose_MPISELLCUDA(Mat A, Vec xx, Vec yy)
73 {
74   Mat_MPISELL *a = (Mat_MPISELL *)A->data;
75   PetscInt     nt;
76 
77   PetscFunctionBegin;
78   PetscCall(VecGetLocalSize(xx, &nt));
79   PetscCheck(nt == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->rmap->n, nt);
80   PetscUseTypeMethod(a->B, multtranspose, xx, a->lvec);
81   PetscUseTypeMethod(a->A, multtranspose, xx, yy);
82   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
83   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
84   PetscFunctionReturn(PETSC_SUCCESS);
85 }
86 
87 PetscErrorCode MatSetFromOptions_MPISELLCUDA(Mat, PetscOptionItems *)
88 {
89   return PETSC_SUCCESS;
90 }
91 
92 PetscErrorCode MatAssemblyEnd_MPISELLCUDA(Mat A, MatAssemblyType mode)
93 {
94   PetscFunctionBegin;
95   PetscCall(MatAssemblyEnd_MPISELL(A, mode));
96   if (!A->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(VecSetType(((Mat_MPISELL *)A->data)->lvec, VECSEQCUDA));
97   PetscFunctionReturn(PETSC_SUCCESS);
98 }
99 
100 PetscErrorCode MatDestroy_MPISELLCUDA(Mat A)
101 {
102   PetscFunctionBegin;
103   PetscCall(MatDestroy_MPISELL(A));
104   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMPISELLSetPreallocation_C", NULL));
105   PetscFunctionReturn(PETSC_SUCCESS);
106 }
107 
108 PETSC_INTERN PetscErrorCode MatConvert_MPISELL_MPISELLCUDA(Mat B, MatType, MatReuse reuse, Mat *newmat)
109 {
110   Mat_MPISELL *a;
111   Mat          A;
112 
113   PetscFunctionBegin;
114   PetscCall(PetscDeviceInitialize(PETSC_DEVICE_CUDA));
115   if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(B, MAT_COPY_VALUES, newmat));
116   else if (reuse == MAT_REUSE_MATRIX) PetscCall(MatCopy(B, *newmat, SAME_NONZERO_PATTERN));
117   A             = *newmat;
118   A->boundtocpu = PETSC_FALSE;
119   PetscCall(PetscFree(A->defaultvectype));
120   PetscCall(PetscStrallocpy(VECCUDA, &A->defaultvectype));
121 
122   a = (Mat_MPISELL *)A->data;
123   if (a->A) PetscCall(MatSetType(a->A, MATSEQSELLCUDA));
124   if (a->B) PetscCall(MatSetType(a->B, MATSEQSELLCUDA));
125   if (a->lvec) PetscCall(VecSetType(a->lvec, VECSEQCUDA));
126 
127   A->ops->assemblyend    = MatAssemblyEnd_MPISELLCUDA;
128   A->ops->mult           = MatMult_MPISELLCUDA;
129   A->ops->multadd        = MatMultAdd_MPISELLCUDA;
130   A->ops->multtranspose  = MatMultTranspose_MPISELLCUDA;
131   A->ops->setfromoptions = MatSetFromOptions_MPISELLCUDA;
132   A->ops->destroy        = MatDestroy_MPISELLCUDA;
133   A->ops->zeroentries    = MatZeroEntries_MPISELLCUDA;
134 
135   PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATMPISELLCUDA));
136   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMPISELLSetPreallocation_C", MatMPISELLSetPreallocation_MPISELLCUDA));
137   PetscFunctionReturn(PETSC_SUCCESS);
138 }
139 
140 PETSC_EXTERN PetscErrorCode MatCreate_MPISELLCUDA(Mat A)
141 {
142   PetscFunctionBegin;
143   PetscCall(PetscDeviceInitialize(PETSC_DEVICE_CUDA));
144   PetscCall(MatCreate_MPISELL(A));
145   PetscCall(MatConvert_MPISELL_MPISELLCUDA(A, MATMPISELLCUDA, MAT_INPLACE_MATRIX, &A));
146   PetscFunctionReturn(PETSC_SUCCESS);
147 }
148 
149 /*@
150    MatCreateSELLCUDA - Creates a sparse matrix in SELL format.
151    This matrix will ultimately pushed down to NVIDIA GPUs.
152 
153    Collective
154 
155    Input Parameters:
156 +  comm - MPI communicator, set to `PETSC_COMM_SELF`
157 .  m - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given)
158            This value should be the same as the local size used in creating the
159            y vector for the matrix-vector product y = Ax.
160 .  n - This value should be the same as the local size used in creating the
161        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
162        calculated if `N` is given) For square matrices `n` is almost always `m`.
163 .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
164 .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
165 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
166            (same value is used for all local rows)
167 .  d_nnz - array containing the number of nonzeros in the various rows of the
168            DIAGONAL portion of the local submatrix (possibly different for each row)
169            or `NULL`, if `d_nz` is used to specify the nonzero structure.
170            The size of this array is equal to the number of local rows, i.e `m`.
171            For matrices you plan to factor you must leave room for the diagonal entry and
172            put in the entry even if it is zero.
173 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
174            submatrix (same value is used for all local rows).
175 -  o_nnz - array containing the number of nonzeros in the various rows of the
176            OFF-DIAGONAL portion of the local submatrix (possibly different for
177            each row) or `NULL`, if `o_nz` is used to specify the nonzero
178            structure. The size of this array is equal to the number
179            of local rows, i.e `m`.
180 
181    Output Parameter:
182 .  A - the matrix
183 
184    Level: intermediate
185 
186    Notes:
187    If `nnz` is given then `nz` is ignored
188 
189    Specify the preallocated storage with either `nz` or `nnz` (not both).
190    Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory
191    allocation.
192 
193 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSELL()`, `MatSetValues()`, `MATMPISELLCUDA`, `MATSELLCUDA`
194 @*/
195 PetscErrorCode MatCreateSELLCUDA(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
196 {
197   PetscMPIInt size;
198 
199   PetscFunctionBegin;
200   PetscCall(MatCreate(comm, A));
201   PetscCall(MatSetSizes(*A, m, n, M, N));
202   PetscCallMPI(MPI_Comm_size(comm, &size));
203   if (size > 1) {
204     PetscCall(MatSetType(*A, MATMPISELLCUDA));
205     PetscCall(MatMPISELLSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz));
206   } else {
207     PetscCall(MatSetType(*A, MATSEQSELLCUDA));
208     PetscCall(MatSeqSELLSetPreallocation(*A, d_nz, d_nnz));
209   }
210   PetscFunctionReturn(PETSC_SUCCESS);
211 }
212 
213 /*MC
214    MATSELLCUDA - "sellcuda" = "mpisellcuda" - A matrix type to be used for sparse matrices.
215 
216    Sliced ELLPACK matrix type whose data resides on NVIDIA GPUs.
217 
218    This matrix type is identical to `MATSEQSELLCUDA` when constructed with a single process communicator,
219    and `MATMPISELLCUDA` otherwise.  As a result, for single process communicators,
220    `MatSeqSELLSetPreallocation()` is supported, and similarly `MatMPISELLSetPreallocation()` is supported
221    for communicators controlling multiple processes.  It is recommended that you call both of
222    the above preallocation routines for simplicity.
223 
224    Options Database Key:
225 .  -mat_type mpisellcuda - sets the matrix type to `MATMPISELLCUDA` during a call to MatSetFromOptions()
226 
227   Level: beginner
228 
229  .seealso: `MatCreateSELLCUDA()`, `MATSEQSELLCUDA`, `MatCreateSeqSELLCUDA()`, `MatCUDAFormatOperation()`
230 M*/
231