xref: /petsc/src/mat/impls/sell/mpi/mpicuda/mpisellcuda.cu (revision 2d1451d43b73a0495cd81c074cbc1e0206888947)
1 #include <petscconf.h>
2 #include <../src/mat/impls/sell/mpi/mpisell.h> /*I "petscmat.h" I*/
3 
4 PetscErrorCode MatMPISELLSetPreallocation_MPISELLCUDA(Mat B, PetscInt d_rlenmax, const PetscInt d_rlen[], PetscInt o_rlenmax, const PetscInt o_rlen[])
5 {
6   Mat_MPISELL *b = (Mat_MPISELL *)B->data;
7 
8   PetscFunctionBegin;
9   PetscCall(PetscLayoutSetUp(B->rmap));
10   PetscCall(PetscLayoutSetUp(B->cmap));
11 
12   if (!B->preallocated) {
13     /* Explicitly create 2 MATSEQSELLCUDA matrices. */
14     PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
15     PetscCall(MatBindToCPU(b->A, B->boundtocpu));
16     PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
17     PetscCall(MatSetType(b->A, MATSEQSELLCUDA));
18     PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
19     PetscCall(MatBindToCPU(b->B, B->boundtocpu));
20     PetscCall(MatSetSizes(b->B, B->rmap->n, B->cmap->N, B->rmap->n, B->cmap->N));
21     PetscCall(MatSetType(b->B, MATSEQSELLCUDA));
22   }
23   PetscCall(MatSeqSELLSetPreallocation(b->A, d_rlenmax, d_rlen));
24   PetscCall(MatSeqSELLSetPreallocation(b->B, o_rlenmax, o_rlen));
25   B->preallocated  = PETSC_TRUE;
26   B->was_assembled = PETSC_FALSE;
27   B->assembled     = PETSC_FALSE;
28   PetscFunctionReturn(PETSC_SUCCESS);
29 }
30 
31 PetscErrorCode MatMult_MPISELLCUDA(Mat A, Vec xx, Vec yy)
32 {
33   Mat_MPISELL *a = (Mat_MPISELL *)A->data;
34   PetscInt     nt;
35 
36   PetscFunctionBegin;
37   PetscCall(VecGetLocalSize(xx, &nt));
38   PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt);
39   PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
40   PetscCall((*a->A->ops->mult)(a->A, xx, yy));
41   PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
42   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, yy, yy));
43   PetscFunctionReturn(PETSC_SUCCESS);
44 }
45 
46 PetscErrorCode MatMultAdd_MPISELLCUDA(Mat A, Vec xx, Vec yy, Vec zz)
47 {
48   Mat_MPISELL *a = (Mat_MPISELL *)A->data;
49   PetscInt     nt;
50 
51   PetscFunctionBegin;
52   PetscCall(VecGetLocalSize(xx, &nt));
53   PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt);
54   PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
55   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz));
56   PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
57   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz));
58   PetscFunctionReturn(PETSC_SUCCESS);
59 }
60 
61 PetscErrorCode MatMultTranspose_MPISELLCUDA(Mat A, Vec xx, Vec yy)
62 {
63   Mat_MPISELL *a = (Mat_MPISELL *)A->data;
64   PetscInt     nt;
65 
66   PetscFunctionBegin;
67   PetscCall(VecGetLocalSize(xx, &nt));
68   PetscCheck(nt == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->rmap->n, nt);
69   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
70   PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy));
71   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
72   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
73   PetscFunctionReturn(PETSC_SUCCESS);
74 }
75 
76 PetscErrorCode MatSetFromOptions_MPISELLCUDA(PetscOptionItems *PetscOptionsObject, Mat A)
77 {
78   PetscFunctionBegin;
79   PetscOptionsHeadBegin(PetscOptionsObject, "MPISELLCUDA options");
80   if (A->factortype == MAT_FACTOR_NONE) { }
81   PetscOptionsHeadEnd();
82   PetscFunctionReturn(PETSC_SUCCESS);
83 }
84 
85 PetscErrorCode MatAssemblyEnd_MPISELLCUDA(Mat A, MatAssemblyType mode)
86 {
87   Mat_MPISELL *mpisell;
88 
89   PetscFunctionBegin;
90   mpisell = (Mat_MPISELL *)A->data;
91   PetscCall(MatAssemblyEnd_MPISELL(A, mode));
92   if (!A->was_assembled && mode == MAT_FINAL_ASSEMBLY) { PetscCall(VecSetType(mpisell->lvec, VECSEQCUDA)); }
93   PetscFunctionReturn(PETSC_SUCCESS);
94 }
95 
96 PetscErrorCode MatDestroy_MPISELLCUDA(Mat A)
97 {
98   PetscFunctionBegin;
99   PetscCall(MatDestroy_MPISELL(A));
100   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpisellcuda_mpiaij_C", NULL));
101   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMPISELLSetPreallocation_C", NULL));
102   PetscFunctionReturn(PETSC_SUCCESS);
103 }
104 
105 PETSC_EXTERN PetscErrorCode MatCreate_MPISELLCUDA(Mat A)
106 {
107   PetscFunctionBegin;
108   PetscCall(MatCreate_MPISELL(A));
109   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMPISELLSetPreallocation_C", MatMPISELLSetPreallocation_MPISELLCUDA));
110   PetscCall(PetscFree(A->defaultvectype));
111   PetscCall(PetscStrallocpy(VECCUDA, &A->defaultvectype));
112 
113   A->ops->assemblyend   = MatAssemblyEnd_MPISELLCUDA;
114   A->ops->mult          = MatMult_MPISELLCUDA;
115   A->ops->multadd       = MatMultAdd_MPISELLCUDA;
116   A->ops->multtranspose = MatMultTranspose_MPISELLCUDA;
117   A->ops->destroy       = MatDestroy_MPISELLCUDA;
118 
119   PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATMPISELLCUDA));
120   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpisellcuda_mpiaij_C", MatConvert_MPISELL_MPIAIJ));
121   PetscFunctionReturn(PETSC_SUCCESS);
122 }
123 
124 /*@
125    MatCreateSELLCUDA - Creates a sparse matrix in SELL format.
126    This matrix will ultimately pushed down to NVIDIA GPUs.
127 
128    Collective
129 
130    Input Parameters:
131 +  comm - MPI communicator, set to `PETSC_COMM_SELF`
132 .  m - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given)
133            This value should be the same as the local size used in creating the
134            y vector for the matrix-vector product y = Ax.
135 .  n - This value should be the same as the local size used in creating the
136        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
137        calculated if `N` is given) For square matrices `n` is almost always `m`.
138 .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
139 .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
140 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
141            (same value is used for all local rows)
142 .  d_nnz - array containing the number of nonzeros in the various rows of the
143            DIAGONAL portion of the local submatrix (possibly different for each row)
144            or `NULL`, if `d_nz` is used to specify the nonzero structure.
145            The size of this array is equal to the number of local rows, i.e `m`.
146            For matrices you plan to factor you must leave room for the diagonal entry and
147            put in the entry even if it is zero.
148 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
149            submatrix (same value is used for all local rows).
150 -  o_nnz - array containing the number of nonzeros in the various rows of the
151            OFF-DIAGONAL portion of the local submatrix (possibly different for
152            each row) or `NULL`, if `o_nz` is used to specify the nonzero
153            structure. The size of this array is equal to the number
154            of local rows, i.e `m`.
155 
156    Output Parameter:
157 .  A - the matrix
158 
159    Level: intermediate
160 
161    Notes:
162    If `nnz` is given then `nz` is ignored
163 
164    Specify the preallocated storage with either `nz` or `nnz` (not both).
165    Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory
166    allocation.
167 
168 .seealso: [](chapter_matrices), `Mat`, `MatCreate()`, `MatCreateSELL()`, `MatSetValues()`, `MATMPISELLCUDA`, `MATSELLCUDA`
169 @*/
170 PetscErrorCode MatCreateSELLCUDA(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
171 {
172   PetscMPIInt size;
173 
174   PetscFunctionBegin;
175   PetscCall(MatCreate(comm, A));
176   PetscCall(MatSetSizes(*A, m, n, M, N));
177   PetscCallMPI(MPI_Comm_size(comm, &size));
178   if (size > 1) {
179     PetscCall(MatSetType(*A, MATMPISELLCUDA));
180     PetscCall(MatMPISELLSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz));
181   } else {
182     PetscCall(MatSetType(*A, MATSEQSELLCUDA));
183     PetscCall(MatSeqSELLSetPreallocation(*A, d_nz, d_nnz));
184   }
185   PetscFunctionReturn(PETSC_SUCCESS);
186 }
187 
188 /*MC
189    MATSELLCUDA - "sellcuda" = "mpisellcuda" - A matrix type to be used for sparse matrices.
190 
191    Sliced ELLPACK matrix type whose data resides on NVIDIA GPUs.
192 
193    This matrix type is identical to `MATSEQSELLCUDA` when constructed with a single process communicator,
194    and `MATMPISELLCUDA` otherwise.  As a result, for single process communicators,
195    `MatSeqSELLSetPreallocation()` is supported, and similarly `MatMPISELLSetPreallocation()` is supported
196    for communicators controlling multiple processes.  It is recommended that you call both of
197    the above preallocation routines for simplicity.
198 
199    Options Database Key:
200 .  -mat_type mpisellcuda - sets the matrix type to `MATMPISELLCUDA` during a call to MatSetFromOptions()
201 
202   Level: beginner
203 
204  .seealso: `MatCreateSELLCUDA()`, `MATSEQSELLCUDA`, `MatCreateSeqSELLCUDA()`, `MatCUDAFormatOperation()`
205 M*/
206