xref: /petsc/src/mat/impls/dense/mpi/cupm/cuda/matmpidensecuda.cu (revision 8d47944aebcd8a3a8a4a1aebd4b3f440b4d70fab)
1 #include "../matmpidensecupm.hpp"
2 
3 using namespace Petsc::mat::cupm;
4 using Petsc::device::cupm::DeviceType;
5 
6 static constexpr impl::MatDense_MPI_CUPM<DeviceType::CUDA> mat_cupm{};
7 
8 /*MC
9   MATDENSECUDA - "densecuda" - A matrix type to be used for dense matrices on GPUs.
10 
11   This matrix type is identical to `MATSEQDENSECUDA` when constructed with a single process
12   communicator, and `MATMPIDENSECUDA` otherwise.
13 
14   Options Database Key:
15 . -mat_type densecuda - sets the matrix type to `MATDENSECUDA` during a call to
16                         `MatSetFromOptions()`
17 
18   Level: beginner
19 
20 .seealso: [](chapter_matrices), `Mat`, `MATSEQDENSECUDA`, `MATMPIDENSECUDA`, `MATSEQDENSEHIP`,
21 `MATMPIDENSEHIP`, `MATDENSE`
22 M*/
23 
24 /*MC
25   MATMPIDENSECUDA - "mpidensecuda" - A matrix type to be used for distributed dense matrices on
26   GPUs.
27 
28   Options Database Key:
29 . -mat_type mpidensecuda - sets the matrix type to `MATMPIDENSECUDA` during a call to
30                            `MatSetFromOptions()`
31 
32   Level: beginner
33 
34 .seealso: [](chapter_matrices), `Mat`, `MATDENSECUDA`, `MATMPIDENSE`, `MATSEQDENSE`,
35 `MATSEQDENSECUDA`, `MATSEQDENSEHIP`
36 M*/
37 PETSC_INTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat A)
38 {
39   PetscFunctionBegin;
40   PetscCall(mat_cupm.Create(A));
41   PetscFunctionReturn(PETSC_SUCCESS);
42 }
43 
44 PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat A, MatType type, MatReuse reuse, Mat *ret)
45 {
46   PetscFunctionBegin;
47   PetscCall(mat_cupm.Convert_MPIDense_MPIDenseCUPM(A, type, reuse, ret));
48   PetscFunctionReturn(PETSC_SUCCESS);
49 }
50 
51 /*@C
52   MatCreateDenseCUDA - Creates a matrix in `MATDENSECUDA` format using CUDA.
53 
54   Collective
55 
56   Input Parameters:
57 + comm - MPI communicator
58 . m    - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given)
59 . n    - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given)
60 . M    - number of global rows (or `PETSC_DECIDE` to have calculated if `m` is given)
61 . N    - number of global columns (or `PETSC_DECIDE` to have calculated if `n` is given)
62 - data - optional location of GPU matrix data. Pass`NULL` to have PETSc to control matrix
63          memory allocation.
64 
65   Output Parameter:
66 . A - the matrix
67 
68   Level: intermediate
69 
70 .seealso: `MATDENSECUDA`, `MatCreate()`, `MatCreateDense()`
71 @*/
72 PetscErrorCode MatCreateDenseCUDA(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A)
73 {
74   PetscFunctionBegin;
75   PetscCall(MatCreateDenseCUPM<DeviceType::CUDA>(comm, m, n, M, N, data, A));
76   PetscFunctionReturn(PETSC_SUCCESS);
77 }
78 
79 /*@C
80   MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a `MATDENSECUDA` matrix with an
81   array provided by the user. This is useful to avoid copying an array into a matrix.
82 
83   Not Collective
84 
85   Input Parameters:
86 + mat   - the matrix
87 - array - the array in column major order
88 
89   Level: developer
90 
91   Note:
92   You can return to the original array with a call to `MatDenseCUDAResetArray()`. The user is
93   responsible for freeing this array; it will not be freed when the matrix is destroyed. The
94   array must have been allocated with `cudaMalloc()`.
95 
96 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDAResetArray()`,
97 `MatDenseCUDAReplaceArray()`
98 @*/
99 PetscErrorCode MatDenseCUDAPlaceArray(Mat mat, const PetscScalar *array)
100 {
101   PetscFunctionBegin;
102   PetscCall(MatDenseCUPMPlaceArray<DeviceType::CUDA>(mat, array));
103   PetscFunctionReturn(PETSC_SUCCESS);
104 }
105 
106 /*@C
107   MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to
108   `MatDenseCUDAPlaceArray()`
109 
110   Not Collective
111 
112   Input Parameters:
113 . mat - the matrix
114 
115   Level: developer
116 
117   Note:
118   You can only call this after a call to `MatDenseCUDAPlaceArray()`
119 
120 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()`
121 @*/
122 PetscErrorCode MatDenseCUDAResetArray(Mat mat)
123 {
124   PetscFunctionBegin;
125   PetscCall(MatDenseCUPMResetArray<DeviceType::CUDA>(mat));
126   PetscFunctionReturn(PETSC_SUCCESS);
127 }
128 
129 /*@C
130   MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a `MATDENSECUDA` matrix
131   with an array provided by the user. This is useful to avoid copying an array into a matrix.
132 
133   Not Collective
134 
135   Input Parameters:
136 + mat   - the matrix
137 - array - the array in column major order
138 
139   Level: developer
140 
141   Note:
142   This permanently replaces the GPU array and frees the memory associated with the old GPU
143   array. The memory passed in CANNOT be freed by the user. It will be freed when the matrix is
144   destroyed. The array should respect the matrix leading dimension.
145 
146 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()`, `MatDenseCUDAResetArray()`
147 @*/
148 PetscErrorCode MatDenseCUDAReplaceArray(Mat mat, const PetscScalar *array)
149 {
150   PetscFunctionBegin;
151   PetscCall(MatDenseCUPMReplaceArray<DeviceType::CUDA>(mat, array));
152   PetscFunctionReturn(PETSC_SUCCESS);
153 }
154 
155 /*@C
156   MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a `MATDENSECUDA`
157   matrix.
158 
159   Not Collective
160 
161   Input Parameters:
162 . A - the matrix
163 
164   Output Parameters
165 . array - the GPU array in column major order
166 
167   Level: developer
168 
169   Notes:
170   The data on the GPU may not be updated due to operations done on the CPU. If you need updated
171   data, use `MatDenseCUDAGetArray()`.
172 
173   The array must be restored with `MatDenseCUDARestoreArrayWrite()` when no longer needed.
174 
175 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`,
176 `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayRead()`,
177 `MatDenseCUDARestoreArrayRead()`
178 @*/
179 PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a)
180 {
181   PetscFunctionBegin;
182   PetscCall(MatDenseCUPMGetArrayWrite<DeviceType::CUDA>(A, a));
183   PetscFunctionReturn(PETSC_SUCCESS);
184 }
185 
186 /*@C
187   MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a
188   `MATDENSECUDA` matrix previously obtained with `MatDenseCUDAGetArrayWrite()`.
189 
190   Not Collective
191 
192   Input Parameters:
193 + A     - the matrix
194 - array - the GPU array in column major order
195 
196   Level: developer
197 
198 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`,
199 `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()`
200 @*/
201 PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a)
202 {
203   PetscFunctionBegin;
204   PetscCall(MatDenseCUPMRestoreArrayWrite<DeviceType::CUDA>(A, a));
205   PetscFunctionReturn(PETSC_SUCCESS);
206 }
207 
208 /*@C
209   MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a
210   `MATDENSECUDA` matrix. The array must be restored with `MatDenseCUDARestoreArrayRead()` when
211   no longer needed.
212 
213   Not Collective
214 
215   Input Parameters:
216 . A - the matrix
217 
218   Output Parameters
219 . array - the GPU array in column major order
220 
221   Level: developer
222 
223   Note:
224   Data may be copied to the GPU due to operations done on the CPU. If you need write only
225   access, use `MatDenseCUDAGetArrayWrite()`.
226 
227 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`,
228 `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`,
229 `MatDenseCUDARestoreArrayRead()`
230 @*/
231 PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a)
232 {
233   PetscFunctionBegin;
234   PetscCall(MatDenseCUPMGetArrayRead<DeviceType::CUDA>(A, a));
235   PetscFunctionReturn(PETSC_SUCCESS);
236 }
237 
238 /*@C
239   MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a
240   `MATDENSECUDA` matrix previously obtained with a call to `MatDenseCUDAGetArrayRead()`.
241 
242   Not Collective
243 
244   Input Parameters:
245 + A     - the matrix
246 - array - the GPU array in column major order
247 
248   Level: developer
249 
250   Note:
251   Data can be copied to the GPU due to operations done on the CPU. If you need write only
252   access, use `MatDenseCUDAGetArrayWrite()`.
253 
254 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`,
255 `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDAGetArrayRead()`
256 @*/
257 PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a)
258 {
259   PetscFunctionBegin;
260   PetscCall(MatDenseCUPMRestoreArrayRead<DeviceType::CUDA>(A, a));
261   PetscFunctionReturn(PETSC_SUCCESS);
262 }
263 
264 /*@C
265   MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a `MATDENSECUDA` matrix. The
266   array must be restored with `MatDenseCUDARestoreArray()` when no longer needed.
267 
268   Not Collective
269 
270   Input Parameters:
271 . A - the matrix
272 
273   Output Parameters
274 . array - the GPU array in column major order
275 
276   Level: developer
277 
278   Note:
279   Data can be copied to the GPU due to operations done on the CPU. If you need write only
280   access, use `MatDenseCUDAGetArrayWrite()`. For read-only access, use
281   `MatDenseCUDAGetArrayRead()`.
282 
283 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArrayRead()`, `MatDenseCUDARestoreArray()`,
284 `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`,
285 `MatDenseCUDARestoreArrayRead()`
286 @*/
287 PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a)
288 {
289   PetscFunctionBegin;
290   PetscCall(MatDenseCUPMGetArray<DeviceType::CUDA>(A, a));
291   PetscFunctionReturn(PETSC_SUCCESS);
292 }
293 
294 /*@C
295   MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a `MATDENSECUDA` matrix
296   previously obtained with `MatDenseCUDAGetArray()`.
297 
298   Not Collective
299 
300   Level: developer
301 
302   Input Parameters:
303 + A     - the matrix
304 - array - the GPU array in column major order
305 
306 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArrayWrite()`,
307 `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()`
308 @*/
309 PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a)
310 {
311   PetscFunctionBegin;
312   PetscCall(MatDenseCUPMRestoreArray<DeviceType::CUDA>(A, a));
313   PetscFunctionReturn(PETSC_SUCCESS);
314 }
315