1 #include "../matmpidensecupm.hpp" 2 3 using namespace Petsc::mat::cupm; 4 using Petsc::device::cupm::DeviceType; 5 6 static constexpr impl::MatDense_MPI_CUPM<DeviceType::CUDA> mat_cupm{}; 7 8 /*MC 9 MATDENSECUDA - "densecuda" - A matrix type to be used for dense matrices on GPUs. 10 11 This matrix type is identical to `MATSEQDENSECUDA` when constructed with a single process 12 communicator, and `MATMPIDENSECUDA` otherwise. 13 14 Options Database Key: 15 . -mat_type densecuda - sets the matrix type to `MATDENSECUDA` during a call to 16 `MatSetFromOptions()` 17 18 Level: beginner 19 20 .seealso: [](chapter_matrices), `Mat`, `MATSEQDENSECUDA`, `MATMPIDENSECUDA`, `MATSEQDENSEHIP`, 21 `MATMPIDENSEHIP`, `MATDENSE` 22 M*/ 23 24 /*MC 25 MATMPIDENSECUDA - "mpidensecuda" - A matrix type to be used for distributed dense matrices on 26 GPUs. 27 28 Options Database Key: 29 . -mat_type mpidensecuda - sets the matrix type to `MATMPIDENSECUDA` during a call to 30 `MatSetFromOptions()` 31 32 Level: beginner 33 34 .seealso: [](chapter_matrices), `Mat`, `MATDENSECUDA`, `MATMPIDENSE`, `MATSEQDENSE`, 35 `MATSEQDENSECUDA`, `MATSEQDENSEHIP` 36 M*/ 37 PETSC_INTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat A) 38 { 39 PetscFunctionBegin; 40 PetscCall(mat_cupm.Create(A)); 41 PetscFunctionReturn(PETSC_SUCCESS); 42 } 43 44 PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat A, MatType type, MatReuse reuse, Mat *ret) 45 { 46 PetscFunctionBegin; 47 PetscCall(mat_cupm.Convert_MPIDense_MPIDenseCUPM(A, type, reuse, ret)); 48 PetscFunctionReturn(PETSC_SUCCESS); 49 } 50 51 /*@C 52 MatCreateDenseCUDA - Creates a matrix in `MATDENSECUDA` format using CUDA. 53 54 Collective 55 56 Input Parameters: 57 + comm - MPI communicator 58 . m - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given) 59 . n - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given) 60 . M - number of global rows (or `PETSC_DECIDE` to have calculated if `m` is given) 61 . N - number of global columns (or `PETSC_DECIDE` to have calculated if `n` is given) 62 - data - optional location of GPU matrix data. Pass`NULL` to have PETSc to control matrix 63 memory allocation. 64 65 Output Parameter: 66 . A - the matrix 67 68 Level: intermediate 69 70 .seealso: `MATDENSECUDA`, `MatCreate()`, `MatCreateDense()` 71 @*/ 72 PetscErrorCode MatCreateDenseCUDA(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A) 73 { 74 PetscFunctionBegin; 75 PetscCall(MatCreateDenseCUPM<DeviceType::CUDA>(comm, m, n, M, N, data, A)); 76 PetscFunctionReturn(PETSC_SUCCESS); 77 } 78 79 /*@C 80 MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a `MATDENSECUDA` matrix with an 81 array provided by the user. This is useful to avoid copying an array into a matrix. 82 83 Not Collective 84 85 Input Parameters: 86 + mat - the matrix 87 - array - the array in column major order 88 89 Level: developer 90 91 Note: 92 You can return to the original array with a call to `MatDenseCUDAResetArray()`. The user is 93 responsible for freeing this array; it will not be freed when the matrix is destroyed. The 94 array must have been allocated with `cudaMalloc()`. 95 96 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDAResetArray()`, 97 `MatDenseCUDAReplaceArray()` 98 @*/ 99 PetscErrorCode MatDenseCUDAPlaceArray(Mat mat, const PetscScalar *array) 100 { 101 PetscFunctionBegin; 102 PetscCall(MatDenseCUPMPlaceArray<DeviceType::CUDA>(mat, array)); 103 PetscFunctionReturn(PETSC_SUCCESS); 104 } 105 106 /*@C 107 MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to 108 `MatDenseCUDAPlaceArray()` 109 110 Not Collective 111 112 Input Parameters: 113 . mat - the matrix 114 115 Level: developer 116 117 Note: 118 You can only call this after a call to `MatDenseCUDAPlaceArray()` 119 120 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()` 121 @*/ 122 PetscErrorCode MatDenseCUDAResetArray(Mat mat) 123 { 124 PetscFunctionBegin; 125 PetscCall(MatDenseCUPMResetArray<DeviceType::CUDA>(mat)); 126 PetscFunctionReturn(PETSC_SUCCESS); 127 } 128 129 /*@C 130 MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a `MATDENSECUDA` matrix 131 with an array provided by the user. This is useful to avoid copying an array into a matrix. 132 133 Not Collective 134 135 Input Parameters: 136 + mat - the matrix 137 - array - the array in column major order 138 139 Level: developer 140 141 Note: 142 This permanently replaces the GPU array and frees the memory associated with the old GPU 143 array. The memory passed in CANNOT be freed by the user. It will be freed when the matrix is 144 destroyed. The array should respect the matrix leading dimension. 145 146 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()`, `MatDenseCUDAResetArray()` 147 @*/ 148 PetscErrorCode MatDenseCUDAReplaceArray(Mat mat, const PetscScalar *array) 149 { 150 PetscFunctionBegin; 151 PetscCall(MatDenseCUPMReplaceArray<DeviceType::CUDA>(mat, array)); 152 PetscFunctionReturn(PETSC_SUCCESS); 153 } 154 155 /*@C 156 MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a `MATDENSECUDA` 157 matrix. 158 159 Not Collective 160 161 Input Parameters: 162 . A - the matrix 163 164 Output Parameters 165 . array - the GPU array in column major order 166 167 Level: developer 168 169 Notes: 170 The data on the GPU may not be updated due to operations done on the CPU. If you need updated 171 data, use `MatDenseCUDAGetArray()`. 172 173 The array must be restored with `MatDenseCUDARestoreArrayWrite()` when no longer needed. 174 175 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, 176 `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayRead()`, 177 `MatDenseCUDARestoreArrayRead()` 178 @*/ 179 PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a) 180 { 181 PetscFunctionBegin; 182 PetscCall(MatDenseCUPMGetArrayWrite<DeviceType::CUDA>(A, a)); 183 PetscFunctionReturn(PETSC_SUCCESS); 184 } 185 186 /*@C 187 MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a 188 `MATDENSECUDA` matrix previously obtained with `MatDenseCUDAGetArrayWrite()`. 189 190 Not Collective 191 192 Input Parameters: 193 + A - the matrix 194 - array - the GPU array in column major order 195 196 Level: developer 197 198 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, 199 `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()` 200 @*/ 201 PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a) 202 { 203 PetscFunctionBegin; 204 PetscCall(MatDenseCUPMRestoreArrayWrite<DeviceType::CUDA>(A, a)); 205 PetscFunctionReturn(PETSC_SUCCESS); 206 } 207 208 /*@C 209 MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a 210 `MATDENSECUDA` matrix. The array must be restored with `MatDenseCUDARestoreArrayRead()` when 211 no longer needed. 212 213 Not Collective 214 215 Input Parameters: 216 . A - the matrix 217 218 Output Parameters 219 . array - the GPU array in column major order 220 221 Level: developer 222 223 Note: 224 Data may be copied to the GPU due to operations done on the CPU. If you need write only 225 access, use `MatDenseCUDAGetArrayWrite()`. 226 227 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, 228 `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, 229 `MatDenseCUDARestoreArrayRead()` 230 @*/ 231 PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a) 232 { 233 PetscFunctionBegin; 234 PetscCall(MatDenseCUPMGetArrayRead<DeviceType::CUDA>(A, a)); 235 PetscFunctionReturn(PETSC_SUCCESS); 236 } 237 238 /*@C 239 MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a 240 `MATDENSECUDA` matrix previously obtained with a call to `MatDenseCUDAGetArrayRead()`. 241 242 Not Collective 243 244 Input Parameters: 245 + A - the matrix 246 - array - the GPU array in column major order 247 248 Level: developer 249 250 Note: 251 Data can be copied to the GPU due to operations done on the CPU. If you need write only 252 access, use `MatDenseCUDAGetArrayWrite()`. 253 254 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, 255 `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDAGetArrayRead()` 256 @*/ 257 PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a) 258 { 259 PetscFunctionBegin; 260 PetscCall(MatDenseCUPMRestoreArrayRead<DeviceType::CUDA>(A, a)); 261 PetscFunctionReturn(PETSC_SUCCESS); 262 } 263 264 /*@C 265 MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a `MATDENSECUDA` matrix. The 266 array must be restored with `MatDenseCUDARestoreArray()` when no longer needed. 267 268 Not Collective 269 270 Input Parameters: 271 . A - the matrix 272 273 Output Parameters 274 . array - the GPU array in column major order 275 276 Level: developer 277 278 Note: 279 Data can be copied to the GPU due to operations done on the CPU. If you need write only 280 access, use `MatDenseCUDAGetArrayWrite()`. For read-only access, use 281 `MatDenseCUDAGetArrayRead()`. 282 283 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArrayRead()`, `MatDenseCUDARestoreArray()`, 284 `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, 285 `MatDenseCUDARestoreArrayRead()` 286 @*/ 287 PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a) 288 { 289 PetscFunctionBegin; 290 PetscCall(MatDenseCUPMGetArray<DeviceType::CUDA>(A, a)); 291 PetscFunctionReturn(PETSC_SUCCESS); 292 } 293 294 /*@C 295 MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a `MATDENSECUDA` matrix 296 previously obtained with `MatDenseCUDAGetArray()`. 297 298 Not Collective 299 300 Level: developer 301 302 Input Parameters: 303 + A - the matrix 304 - array - the GPU array in column major order 305 306 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArrayWrite()`, 307 `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()` 308 @*/ 309 PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a) 310 { 311 PetscFunctionBegin; 312 PetscCall(MatDenseCUPMRestoreArray<DeviceType::CUDA>(A, a)); 313 PetscFunctionReturn(PETSC_SUCCESS); 314 } 315