xref: /petsc/src/mat/impls/sell/mpi/mpihip/mpisellhip.hip.cxx (revision daba9d70159ea2f6905738fcbec7404635487b2b)
1*d52a580bSJunchao Zhang #include <petscconf.h>
2*d52a580bSJunchao Zhang #include <petscdevice.h>
3*d52a580bSJunchao Zhang #include <../src/mat/impls/sell/mpi/mpisell.h> /*I "petscmat.h" I*/
4*d52a580bSJunchao Zhang 
5*d52a580bSJunchao Zhang static PetscErrorCode MatMPISELLSetPreallocation_MPISELLHIP(Mat B, PetscInt d_rlenmax, const PetscInt d_rlen[], PetscInt o_rlenmax, const PetscInt o_rlen[])
6*d52a580bSJunchao Zhang {
7*d52a580bSJunchao Zhang   Mat_MPISELL *b = (Mat_MPISELL *)B->data;
8*d52a580bSJunchao Zhang 
9*d52a580bSJunchao Zhang   PetscFunctionBegin;
10*d52a580bSJunchao Zhang   PetscCall(PetscLayoutSetUp(B->rmap));
11*d52a580bSJunchao Zhang   PetscCall(PetscLayoutSetUp(B->cmap));
12*d52a580bSJunchao Zhang 
13*d52a580bSJunchao Zhang   if (!B->preallocated) {
14*d52a580bSJunchao Zhang     /* Explicitly create 2 MATSEQSELLHIP matrices. */
15*d52a580bSJunchao Zhang     PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
16*d52a580bSJunchao Zhang     PetscCall(MatBindToCPU(b->A, B->boundtocpu));
17*d52a580bSJunchao Zhang     PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
18*d52a580bSJunchao Zhang     PetscCall(MatSetType(b->A, MATSEQSELLHIP));
19*d52a580bSJunchao Zhang     PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
20*d52a580bSJunchao Zhang     PetscCall(MatBindToCPU(b->B, B->boundtocpu));
21*d52a580bSJunchao Zhang     PetscCall(MatSetSizes(b->B, B->rmap->n, B->cmap->N, B->rmap->n, B->cmap->N));
22*d52a580bSJunchao Zhang     PetscCall(MatSetType(b->B, MATSEQSELLHIP));
23*d52a580bSJunchao Zhang   }
24*d52a580bSJunchao Zhang   PetscCall(MatSeqSELLSetPreallocation(b->A, d_rlenmax, d_rlen));
25*d52a580bSJunchao Zhang   PetscCall(MatSeqSELLSetPreallocation(b->B, o_rlenmax, o_rlen));
26*d52a580bSJunchao Zhang   B->preallocated  = PETSC_TRUE;
27*d52a580bSJunchao Zhang   B->was_assembled = PETSC_FALSE;
28*d52a580bSJunchao Zhang   B->assembled     = PETSC_FALSE;
29*d52a580bSJunchao Zhang   PetscFunctionReturn(PETSC_SUCCESS);
30*d52a580bSJunchao Zhang }
31*d52a580bSJunchao Zhang 
32*d52a580bSJunchao Zhang static PetscErrorCode MatSetFromOptions_MPISELLHIP(Mat, PetscOptionItems)
33*d52a580bSJunchao Zhang {
34*d52a580bSJunchao Zhang   return PETSC_SUCCESS;
35*d52a580bSJunchao Zhang }
36*d52a580bSJunchao Zhang 
37*d52a580bSJunchao Zhang static PetscErrorCode MatAssemblyEnd_MPISELLHIP(Mat A, MatAssemblyType mode)
38*d52a580bSJunchao Zhang {
39*d52a580bSJunchao Zhang   PetscFunctionBegin;
40*d52a580bSJunchao Zhang   PetscCall(MatAssemblyEnd_MPISELL(A, mode));
41*d52a580bSJunchao Zhang   if (!A->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(VecSetType(((Mat_MPISELL *)A->data)->lvec, VECSEQHIP));
42*d52a580bSJunchao Zhang   PetscFunctionReturn(PETSC_SUCCESS);
43*d52a580bSJunchao Zhang }
44*d52a580bSJunchao Zhang 
45*d52a580bSJunchao Zhang static PetscErrorCode MatDestroy_MPISELLHIP(Mat A)
46*d52a580bSJunchao Zhang {
47*d52a580bSJunchao Zhang   PetscFunctionBegin;
48*d52a580bSJunchao Zhang   PetscCall(MatDestroy_MPISELL(A));
49*d52a580bSJunchao Zhang   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMPISELLSetPreallocation_C", NULL));
50*d52a580bSJunchao Zhang   PetscFunctionReturn(PETSC_SUCCESS);
51*d52a580bSJunchao Zhang }
52*d52a580bSJunchao Zhang 
53*d52a580bSJunchao Zhang PETSC_INTERN PetscErrorCode MatConvert_MPISELL_MPISELLHIP(Mat B, MatType, MatReuse reuse, Mat *newmat)
54*d52a580bSJunchao Zhang {
55*d52a580bSJunchao Zhang   Mat_MPISELL *a;
56*d52a580bSJunchao Zhang   Mat          A;
57*d52a580bSJunchao Zhang 
58*d52a580bSJunchao Zhang   PetscFunctionBegin;
59*d52a580bSJunchao Zhang   PetscCall(PetscDeviceInitialize(PETSC_DEVICE_HIP));
60*d52a580bSJunchao Zhang   if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(B, MAT_COPY_VALUES, newmat));
61*d52a580bSJunchao Zhang   else if (reuse == MAT_REUSE_MATRIX) PetscCall(MatCopy(B, *newmat, SAME_NONZERO_PATTERN));
62*d52a580bSJunchao Zhang   A             = *newmat;
63*d52a580bSJunchao Zhang   A->boundtocpu = PETSC_FALSE;
64*d52a580bSJunchao Zhang   PetscCall(PetscFree(A->defaultvectype));
65*d52a580bSJunchao Zhang   PetscCall(PetscStrallocpy(VECHIP, &A->defaultvectype));
66*d52a580bSJunchao Zhang 
67*d52a580bSJunchao Zhang   a = (Mat_MPISELL *)A->data;
68*d52a580bSJunchao Zhang   if (a->A) PetscCall(MatSetType(a->A, MATSEQSELLHIP));
69*d52a580bSJunchao Zhang   if (a->B) PetscCall(MatSetType(a->B, MATSEQSELLHIP));
70*d52a580bSJunchao Zhang   if (a->lvec) PetscCall(VecSetType(a->lvec, VECSEQHIP));
71*d52a580bSJunchao Zhang 
72*d52a580bSJunchao Zhang   A->ops->assemblyend    = MatAssemblyEnd_MPISELLHIP;
73*d52a580bSJunchao Zhang   A->ops->setfromoptions = MatSetFromOptions_MPISELLHIP;
74*d52a580bSJunchao Zhang   A->ops->destroy        = MatDestroy_MPISELLHIP;
75*d52a580bSJunchao Zhang 
76*d52a580bSJunchao Zhang   PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATMPISELLHIP));
77*d52a580bSJunchao Zhang   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMPISELLSetPreallocation_C", MatMPISELLSetPreallocation_MPISELLHIP));
78*d52a580bSJunchao Zhang   PetscFunctionReturn(PETSC_SUCCESS);
79*d52a580bSJunchao Zhang }
80*d52a580bSJunchao Zhang 
81*d52a580bSJunchao Zhang PETSC_EXTERN PetscErrorCode MatCreate_MPISELLHIP(Mat A)
82*d52a580bSJunchao Zhang {
83*d52a580bSJunchao Zhang   PetscFunctionBegin;
84*d52a580bSJunchao Zhang   PetscCall(PetscDeviceInitialize(PETSC_DEVICE_HIP));
85*d52a580bSJunchao Zhang   PetscCall(MatCreate_MPISELL(A));
86*d52a580bSJunchao Zhang   PetscCall(MatConvert_MPISELL_MPISELLHIP(A, MATMPISELLHIP, MAT_INPLACE_MATRIX, &A));
87*d52a580bSJunchao Zhang   PetscFunctionReturn(PETSC_SUCCESS);
88*d52a580bSJunchao Zhang }
89*d52a580bSJunchao Zhang 
90*d52a580bSJunchao Zhang /*@
91*d52a580bSJunchao Zhang   MatCreateSELLHIP - Creates a sparse matrix in SELL format.
92*d52a580bSJunchao Zhang   This matrix will be ultimately pushed down to GPUs.
93*d52a580bSJunchao Zhang 
94*d52a580bSJunchao Zhang   Collective
95*d52a580bSJunchao Zhang 
96*d52a580bSJunchao Zhang   Input Parameters:
97*d52a580bSJunchao Zhang + comm  - MPI communicator, set to `PETSC_COMM_SELF`
98*d52a580bSJunchao Zhang . m     - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given)
99*d52a580bSJunchao Zhang            This value should be the same as the local size used in creating the
100*d52a580bSJunchao Zhang            y vector for the matrix-vector product $ y = Ax $.
101*d52a580bSJunchao Zhang . n     - This value should be the same as the local size used in creating the
102*d52a580bSJunchao Zhang        x vector for the matrix-vector product $ y = Ax $. (or `PETSC_DECIDE` to have
103*d52a580bSJunchao Zhang        calculated if `N` is given) For square matrices `n` is almost always `m`.
104*d52a580bSJunchao Zhang . M     - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
105*d52a580bSJunchao Zhang . N     - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
106*d52a580bSJunchao Zhang . d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
107*d52a580bSJunchao Zhang            (same value is used for all local rows)
108*d52a580bSJunchao Zhang . d_nnz - array containing the number of nonzeros in the various rows of the
109*d52a580bSJunchao Zhang            DIAGONAL portion of the local submatrix (possibly different for each row)
110*d52a580bSJunchao Zhang            or `NULL`, if `d_nz` is used to specify the nonzero structure.
111*d52a580bSJunchao Zhang            The size of this array is equal to the number of local rows, i.e `m`.
112*d52a580bSJunchao Zhang            For matrices you plan to factor you must leave room for the diagonal entry and
113*d52a580bSJunchao Zhang            put in the entry even if it is zero.
114*d52a580bSJunchao Zhang . o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
115*d52a580bSJunchao Zhang            submatrix (same value is used for all local rows).
116*d52a580bSJunchao Zhang - o_nnz - array containing the number of nonzeros in the various rows of the
117*d52a580bSJunchao Zhang            OFF-DIAGONAL portion of the local submatrix (possibly different for
118*d52a580bSJunchao Zhang            each row) or `NULL`, if `o_nz` is used to specify the nonzero
119*d52a580bSJunchao Zhang            structure. The size of this array is equal to the number
120*d52a580bSJunchao Zhang            of local rows, i.e `m`.
121*d52a580bSJunchao Zhang 
122*d52a580bSJunchao Zhang   Output Parameter:
123*d52a580bSJunchao Zhang . A - the matrix
124*d52a580bSJunchao Zhang 
125*d52a580bSJunchao Zhang   Level: intermediate
126*d52a580bSJunchao Zhang 
127*d52a580bSJunchao Zhang   Notes:
128*d52a580bSJunchao Zhang   If `nnz` is given then `nz` is ignored
129*d52a580bSJunchao Zhang 
130*d52a580bSJunchao Zhang   Specify the preallocated storage with either `nz` or `nnz` (not both).
131*d52a580bSJunchao Zhang   Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory
132*d52a580bSJunchao Zhang   allocation.
133*d52a580bSJunchao Zhang 
134*d52a580bSJunchao Zhang .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSELL()`, `MatSetValues()`, `MATMPISELLHIP`, `MATSELLHIP`
135*d52a580bSJunchao Zhang @*/
136*d52a580bSJunchao Zhang PetscErrorCode MatCreateSELLHIP(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
137*d52a580bSJunchao Zhang {
138*d52a580bSJunchao Zhang   PetscMPIInt size;
139*d52a580bSJunchao Zhang 
140*d52a580bSJunchao Zhang   PetscFunctionBegin;
141*d52a580bSJunchao Zhang   PetscCall(MatCreate(comm, A));
142*d52a580bSJunchao Zhang   PetscCall(MatSetSizes(*A, m, n, M, N));
143*d52a580bSJunchao Zhang   PetscCallMPI(MPI_Comm_size(comm, &size));
144*d52a580bSJunchao Zhang   if (size > 1) {
145*d52a580bSJunchao Zhang     PetscCall(MatSetType(*A, MATMPISELLHIP));
146*d52a580bSJunchao Zhang     PetscCall(MatMPISELLSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz));
147*d52a580bSJunchao Zhang   } else {
148*d52a580bSJunchao Zhang     PetscCall(MatSetType(*A, MATSEQSELLHIP));
149*d52a580bSJunchao Zhang     PetscCall(MatSeqSELLSetPreallocation(*A, d_nz, d_nnz));
150*d52a580bSJunchao Zhang   }
151*d52a580bSJunchao Zhang   PetscFunctionReturn(PETSC_SUCCESS);
152*d52a580bSJunchao Zhang }
153*d52a580bSJunchao Zhang 
154*d52a580bSJunchao Zhang /*MC
155*d52a580bSJunchao Zhang    MATSELLHIP - "sellhip" = "mpisellhip" - A matrix type to be used for sparse matrices on AMD GPUs
156*d52a580bSJunchao Zhang 
157*d52a580bSJunchao Zhang    Sliced ELLPACK matrix type whose data resides on GPUs.
158*d52a580bSJunchao Zhang 
159*d52a580bSJunchao Zhang    This matrix type is identical to `MATSEQSELLHIP` when constructed with a single process communicator,
160*d52a580bSJunchao Zhang    and `MATMPISELLHIP` otherwise.  As a result, for single process communicators,
161*d52a580bSJunchao Zhang    `MatSeqSELLSetPreallocation()` is supported, and similarly `MatMPISELLSetPreallocation()` is supported
162*d52a580bSJunchao Zhang    for communicators controlling multiple processes.  It is recommended that you call both of
163*d52a580bSJunchao Zhang    the above preallocation routines for simplicity.
164*d52a580bSJunchao Zhang 
165*d52a580bSJunchao Zhang    Options Database Key:
166*d52a580bSJunchao Zhang .  -mat_type sellhip - sets the matrix type to `MATSELLHIP` during a call to MatSetFromOptions()
167*d52a580bSJunchao Zhang 
168*d52a580bSJunchao Zhang   Level: beginner
169*d52a580bSJunchao Zhang 
170*d52a580bSJunchao Zhang .seealso: `MatCreateSELLHIP()`, `MATSEQSELLHIP`, `MatCreateSeqSELLHIP()`, `MatHIPFormatOperation()`
171*d52a580bSJunchao Zhang M*/
172