xref: /petsc/src/mat/impls/sell/mpi/mpicuda/mpisellcuda.cu (revision 2d1451d43b73a0495cd81c074cbc1e0206888947)
1*2d1451d4SHong Zhang #include <petscconf.h>
2*2d1451d4SHong Zhang #include <../src/mat/impls/sell/mpi/mpisell.h> /*I "petscmat.h" I*/
3*2d1451d4SHong Zhang 
4*2d1451d4SHong Zhang PetscErrorCode MatMPISELLSetPreallocation_MPISELLCUDA(Mat B, PetscInt d_rlenmax, const PetscInt d_rlen[], PetscInt o_rlenmax, const PetscInt o_rlen[])
5*2d1451d4SHong Zhang {
6*2d1451d4SHong Zhang   Mat_MPISELL *b = (Mat_MPISELL *)B->data;
7*2d1451d4SHong Zhang 
8*2d1451d4SHong Zhang   PetscFunctionBegin;
9*2d1451d4SHong Zhang   PetscCall(PetscLayoutSetUp(B->rmap));
10*2d1451d4SHong Zhang   PetscCall(PetscLayoutSetUp(B->cmap));
11*2d1451d4SHong Zhang 
12*2d1451d4SHong Zhang   if (!B->preallocated) {
13*2d1451d4SHong Zhang     /* Explicitly create 2 MATSEQSELLCUDA matrices. */
14*2d1451d4SHong Zhang     PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
15*2d1451d4SHong Zhang     PetscCall(MatBindToCPU(b->A, B->boundtocpu));
16*2d1451d4SHong Zhang     PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
17*2d1451d4SHong Zhang     PetscCall(MatSetType(b->A, MATSEQSELLCUDA));
18*2d1451d4SHong Zhang     PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
19*2d1451d4SHong Zhang     PetscCall(MatBindToCPU(b->B, B->boundtocpu));
20*2d1451d4SHong Zhang     PetscCall(MatSetSizes(b->B, B->rmap->n, B->cmap->N, B->rmap->n, B->cmap->N));
21*2d1451d4SHong Zhang     PetscCall(MatSetType(b->B, MATSEQSELLCUDA));
22*2d1451d4SHong Zhang   }
23*2d1451d4SHong Zhang   PetscCall(MatSeqSELLSetPreallocation(b->A, d_rlenmax, d_rlen));
24*2d1451d4SHong Zhang   PetscCall(MatSeqSELLSetPreallocation(b->B, o_rlenmax, o_rlen));
25*2d1451d4SHong Zhang   B->preallocated  = PETSC_TRUE;
26*2d1451d4SHong Zhang   B->was_assembled = PETSC_FALSE;
27*2d1451d4SHong Zhang   B->assembled     = PETSC_FALSE;
28*2d1451d4SHong Zhang   PetscFunctionReturn(PETSC_SUCCESS);
29*2d1451d4SHong Zhang }
30*2d1451d4SHong Zhang 
31*2d1451d4SHong Zhang PetscErrorCode MatMult_MPISELLCUDA(Mat A, Vec xx, Vec yy)
32*2d1451d4SHong Zhang {
33*2d1451d4SHong Zhang   Mat_MPISELL *a = (Mat_MPISELL *)A->data;
34*2d1451d4SHong Zhang   PetscInt     nt;
35*2d1451d4SHong Zhang 
36*2d1451d4SHong Zhang   PetscFunctionBegin;
37*2d1451d4SHong Zhang   PetscCall(VecGetLocalSize(xx, &nt));
38*2d1451d4SHong Zhang   PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt);
39*2d1451d4SHong Zhang   PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
40*2d1451d4SHong Zhang   PetscCall((*a->A->ops->mult)(a->A, xx, yy));
41*2d1451d4SHong Zhang   PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
42*2d1451d4SHong Zhang   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, yy, yy));
43*2d1451d4SHong Zhang   PetscFunctionReturn(PETSC_SUCCESS);
44*2d1451d4SHong Zhang }
45*2d1451d4SHong Zhang 
46*2d1451d4SHong Zhang PetscErrorCode MatMultAdd_MPISELLCUDA(Mat A, Vec xx, Vec yy, Vec zz)
47*2d1451d4SHong Zhang {
48*2d1451d4SHong Zhang   Mat_MPISELL *a = (Mat_MPISELL *)A->data;
49*2d1451d4SHong Zhang   PetscInt     nt;
50*2d1451d4SHong Zhang 
51*2d1451d4SHong Zhang   PetscFunctionBegin;
52*2d1451d4SHong Zhang   PetscCall(VecGetLocalSize(xx, &nt));
53*2d1451d4SHong Zhang   PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt);
54*2d1451d4SHong Zhang   PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
55*2d1451d4SHong Zhang   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz));
56*2d1451d4SHong Zhang   PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
57*2d1451d4SHong Zhang   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz));
58*2d1451d4SHong Zhang   PetscFunctionReturn(PETSC_SUCCESS);
59*2d1451d4SHong Zhang }
60*2d1451d4SHong Zhang 
61*2d1451d4SHong Zhang PetscErrorCode MatMultTranspose_MPISELLCUDA(Mat A, Vec xx, Vec yy)
62*2d1451d4SHong Zhang {
63*2d1451d4SHong Zhang   Mat_MPISELL *a = (Mat_MPISELL *)A->data;
64*2d1451d4SHong Zhang   PetscInt     nt;
65*2d1451d4SHong Zhang 
66*2d1451d4SHong Zhang   PetscFunctionBegin;
67*2d1451d4SHong Zhang   PetscCall(VecGetLocalSize(xx, &nt));
68*2d1451d4SHong Zhang   PetscCheck(nt == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->rmap->n, nt);
69*2d1451d4SHong Zhang   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
70*2d1451d4SHong Zhang   PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy));
71*2d1451d4SHong Zhang   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
72*2d1451d4SHong Zhang   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
73*2d1451d4SHong Zhang   PetscFunctionReturn(PETSC_SUCCESS);
74*2d1451d4SHong Zhang }
75*2d1451d4SHong Zhang 
76*2d1451d4SHong Zhang PetscErrorCode MatSetFromOptions_MPISELLCUDA(PetscOptionItems *PetscOptionsObject, Mat A)
77*2d1451d4SHong Zhang {
78*2d1451d4SHong Zhang   PetscFunctionBegin;
79*2d1451d4SHong Zhang   PetscOptionsHeadBegin(PetscOptionsObject, "MPISELLCUDA options");
80*2d1451d4SHong Zhang   if (A->factortype == MAT_FACTOR_NONE) { }
81*2d1451d4SHong Zhang   PetscOptionsHeadEnd();
82*2d1451d4SHong Zhang   PetscFunctionReturn(PETSC_SUCCESS);
83*2d1451d4SHong Zhang }
84*2d1451d4SHong Zhang 
85*2d1451d4SHong Zhang PetscErrorCode MatAssemblyEnd_MPISELLCUDA(Mat A, MatAssemblyType mode)
86*2d1451d4SHong Zhang {
87*2d1451d4SHong Zhang   Mat_MPISELL *mpisell;
88*2d1451d4SHong Zhang 
89*2d1451d4SHong Zhang   PetscFunctionBegin;
90*2d1451d4SHong Zhang   mpisell = (Mat_MPISELL *)A->data;
91*2d1451d4SHong Zhang   PetscCall(MatAssemblyEnd_MPISELL(A, mode));
92*2d1451d4SHong Zhang   if (!A->was_assembled && mode == MAT_FINAL_ASSEMBLY) { PetscCall(VecSetType(mpisell->lvec, VECSEQCUDA)); }
93*2d1451d4SHong Zhang   PetscFunctionReturn(PETSC_SUCCESS);
94*2d1451d4SHong Zhang }
95*2d1451d4SHong Zhang 
96*2d1451d4SHong Zhang PetscErrorCode MatDestroy_MPISELLCUDA(Mat A)
97*2d1451d4SHong Zhang {
98*2d1451d4SHong Zhang   PetscFunctionBegin;
99*2d1451d4SHong Zhang   PetscCall(MatDestroy_MPISELL(A));
100*2d1451d4SHong Zhang   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpisellcuda_mpiaij_C", NULL));
101*2d1451d4SHong Zhang   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMPISELLSetPreallocation_C", NULL));
102*2d1451d4SHong Zhang   PetscFunctionReturn(PETSC_SUCCESS);
103*2d1451d4SHong Zhang }
104*2d1451d4SHong Zhang 
105*2d1451d4SHong Zhang PETSC_EXTERN PetscErrorCode MatCreate_MPISELLCUDA(Mat A)
106*2d1451d4SHong Zhang {
107*2d1451d4SHong Zhang   PetscFunctionBegin;
108*2d1451d4SHong Zhang   PetscCall(MatCreate_MPISELL(A));
109*2d1451d4SHong Zhang   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMPISELLSetPreallocation_C", MatMPISELLSetPreallocation_MPISELLCUDA));
110*2d1451d4SHong Zhang   PetscCall(PetscFree(A->defaultvectype));
111*2d1451d4SHong Zhang   PetscCall(PetscStrallocpy(VECCUDA, &A->defaultvectype));
112*2d1451d4SHong Zhang 
113*2d1451d4SHong Zhang   A->ops->assemblyend   = MatAssemblyEnd_MPISELLCUDA;
114*2d1451d4SHong Zhang   A->ops->mult          = MatMult_MPISELLCUDA;
115*2d1451d4SHong Zhang   A->ops->multadd       = MatMultAdd_MPISELLCUDA;
116*2d1451d4SHong Zhang   A->ops->multtranspose = MatMultTranspose_MPISELLCUDA;
117*2d1451d4SHong Zhang   A->ops->destroy       = MatDestroy_MPISELLCUDA;
118*2d1451d4SHong Zhang 
119*2d1451d4SHong Zhang   PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATMPISELLCUDA));
120*2d1451d4SHong Zhang   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpisellcuda_mpiaij_C", MatConvert_MPISELL_MPIAIJ));
121*2d1451d4SHong Zhang   PetscFunctionReturn(PETSC_SUCCESS);
122*2d1451d4SHong Zhang }
123*2d1451d4SHong Zhang 
124*2d1451d4SHong Zhang /*@
125*2d1451d4SHong Zhang    MatCreateSELLCUDA - Creates a sparse matrix in SELL format.
126*2d1451d4SHong Zhang    This matrix will ultimately pushed down to NVIDIA GPUs.
127*2d1451d4SHong Zhang 
128*2d1451d4SHong Zhang    Collective
129*2d1451d4SHong Zhang 
130*2d1451d4SHong Zhang    Input Parameters:
131*2d1451d4SHong Zhang +  comm - MPI communicator, set to `PETSC_COMM_SELF`
132*2d1451d4SHong Zhang .  m - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given)
133*2d1451d4SHong Zhang            This value should be the same as the local size used in creating the
134*2d1451d4SHong Zhang            y vector for the matrix-vector product y = Ax.
135*2d1451d4SHong Zhang .  n - This value should be the same as the local size used in creating the
136*2d1451d4SHong Zhang        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
137*2d1451d4SHong Zhang        calculated if `N` is given) For square matrices `n` is almost always `m`.
138*2d1451d4SHong Zhang .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
139*2d1451d4SHong Zhang .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
140*2d1451d4SHong Zhang .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
141*2d1451d4SHong Zhang            (same value is used for all local rows)
142*2d1451d4SHong Zhang .  d_nnz - array containing the number of nonzeros in the various rows of the
143*2d1451d4SHong Zhang            DIAGONAL portion of the local submatrix (possibly different for each row)
144*2d1451d4SHong Zhang            or `NULL`, if `d_nz` is used to specify the nonzero structure.
145*2d1451d4SHong Zhang            The size of this array is equal to the number of local rows, i.e `m`.
146*2d1451d4SHong Zhang            For matrices you plan to factor you must leave room for the diagonal entry and
147*2d1451d4SHong Zhang            put in the entry even if it is zero.
148*2d1451d4SHong Zhang .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
149*2d1451d4SHong Zhang            submatrix (same value is used for all local rows).
150*2d1451d4SHong Zhang -  o_nnz - array containing the number of nonzeros in the various rows of the
151*2d1451d4SHong Zhang            OFF-DIAGONAL portion of the local submatrix (possibly different for
152*2d1451d4SHong Zhang            each row) or `NULL`, if `o_nz` is used to specify the nonzero
153*2d1451d4SHong Zhang            structure. The size of this array is equal to the number
154*2d1451d4SHong Zhang            of local rows, i.e `m`.
155*2d1451d4SHong Zhang 
156*2d1451d4SHong Zhang    Output Parameter:
157*2d1451d4SHong Zhang .  A - the matrix
158*2d1451d4SHong Zhang 
159*2d1451d4SHong Zhang    Level: intermediate
160*2d1451d4SHong Zhang 
161*2d1451d4SHong Zhang    Notes:
162*2d1451d4SHong Zhang    If `nnz` is given then `nz` is ignored
163*2d1451d4SHong Zhang 
164*2d1451d4SHong Zhang    Specify the preallocated storage with either `nz` or `nnz` (not both).
165*2d1451d4SHong Zhang    Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory
166*2d1451d4SHong Zhang    allocation.
167*2d1451d4SHong Zhang 
168*2d1451d4SHong Zhang .seealso: [](chapter_matrices), `Mat`, `MatCreate()`, `MatCreateSELL()`, `MatSetValues()`, `MATMPISELLCUDA`, `MATSELLCUDA`
169*2d1451d4SHong Zhang @*/
170*2d1451d4SHong Zhang PetscErrorCode MatCreateSELLCUDA(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
171*2d1451d4SHong Zhang {
172*2d1451d4SHong Zhang   PetscMPIInt size;
173*2d1451d4SHong Zhang 
174*2d1451d4SHong Zhang   PetscFunctionBegin;
175*2d1451d4SHong Zhang   PetscCall(MatCreate(comm, A));
176*2d1451d4SHong Zhang   PetscCall(MatSetSizes(*A, m, n, M, N));
177*2d1451d4SHong Zhang   PetscCallMPI(MPI_Comm_size(comm, &size));
178*2d1451d4SHong Zhang   if (size > 1) {
179*2d1451d4SHong Zhang     PetscCall(MatSetType(*A, MATMPISELLCUDA));
180*2d1451d4SHong Zhang     PetscCall(MatMPISELLSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz));
181*2d1451d4SHong Zhang   } else {
182*2d1451d4SHong Zhang     PetscCall(MatSetType(*A, MATSEQSELLCUDA));
183*2d1451d4SHong Zhang     PetscCall(MatSeqSELLSetPreallocation(*A, d_nz, d_nnz));
184*2d1451d4SHong Zhang   }
185*2d1451d4SHong Zhang   PetscFunctionReturn(PETSC_SUCCESS);
186*2d1451d4SHong Zhang }
187*2d1451d4SHong Zhang 
188*2d1451d4SHong Zhang /*MC
189*2d1451d4SHong Zhang    MATSELLCUDA - "sellcuda" = "mpisellcuda" - A matrix type to be used for sparse matrices.
190*2d1451d4SHong Zhang 
191*2d1451d4SHong Zhang    Sliced ELLPACK matrix type whose data resides on NVIDIA GPUs.
192*2d1451d4SHong Zhang 
193*2d1451d4SHong Zhang    This matrix type is identical to `MATSEQSELLCUDA` when constructed with a single process communicator,
194*2d1451d4SHong Zhang    and `MATMPISELLCUDA` otherwise.  As a result, for single process communicators,
195*2d1451d4SHong Zhang    `MatSeqSELLSetPreallocation()` is supported, and similarly `MatMPISELLSetPreallocation()` is supported
196*2d1451d4SHong Zhang    for communicators controlling multiple processes.  It is recommended that you call both of
197*2d1451d4SHong Zhang    the above preallocation routines for simplicity.
198*2d1451d4SHong Zhang 
199*2d1451d4SHong Zhang    Options Database Key:
200*2d1451d4SHong Zhang .  -mat_type mpisellcuda - sets the matrix type to `MATMPISELLCUDA` during a call to MatSetFromOptions()
201*2d1451d4SHong Zhang 
202*2d1451d4SHong Zhang   Level: beginner
203*2d1451d4SHong Zhang 
204*2d1451d4SHong Zhang  .seealso: `MatCreateSELLCUDA()`, `MATSEQSELLCUDA`, `MatCreateSeqSELLCUDA()`, `MatCUDAFormatOperation()`
205*2d1451d4SHong Zhang M*/
206