1 2 /* 3 Defines the basic matrix operations for the AIJ (compressed row) 4 matrix storage format. 5 */ 6 7 #include <petscconf.h> 8 #define PETSC_SKIP_IMMINTRIN_H_CUDAWORKAROUND 1 9 #include <../src/mat/impls/aij/seq/aij.h> /*I "petscmat.h" I*/ 10 #include <petscbt.h> 11 #include <../src/vec/vec/impls/dvecimpl.h> 12 #include <petsc/private/vecimpl.h> 13 14 #include <../src/mat/impls/aij/seq/seqviennacl/viennaclmatimpl.h> 15 16 #include <algorithm> 17 #include <vector> 18 #include <string> 19 20 #include "viennacl/linalg/prod.hpp" 21 22 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJViennaCL(Mat A, MatType type, MatReuse reuse, Mat *newmat); 23 PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat, MatFactorType, Mat *); 24 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqDense(Mat); 25 26 PetscErrorCode MatViennaCLCopyToGPU(Mat A) { 27 Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL *)A->spptr; 28 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 29 30 PetscFunctionBegin; 31 if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) { //some OpenCL SDKs have issues with buffers of size 0 32 if (A->offloadmask == PETSC_OFFLOAD_UNALLOCATED || A->offloadmask == PETSC_OFFLOAD_CPU) { 33 PetscCall(PetscLogEventBegin(MAT_ViennaCLCopyToGPU, A, 0, 0, 0)); 34 35 try { 36 if (a->compressedrow.use) { 37 if (!viennaclstruct->compressed_mat) viennaclstruct->compressed_mat = new ViennaCLCompressedAIJMatrix(); 38 39 // Since PetscInt is different from cl_uint, we have to convert: 40 viennacl::backend::mem_handle dummy; 41 42 viennacl::backend::typesafe_host_array<unsigned int> row_buffer; 43 row_buffer.raw_resize(dummy, a->compressedrow.nrows + 1); 44 for (PetscInt i = 0; i <= a->compressedrow.nrows; ++i) row_buffer.set(i, (a->compressedrow.i)[i]); 45 46 viennacl::backend::typesafe_host_array<unsigned int> row_indices; 47 row_indices.raw_resize(dummy, a->compressedrow.nrows); 48 for (PetscInt i = 0; i < a->compressedrow.nrows; ++i) row_indices.set(i, (a->compressedrow.rindex)[i]); 49 50 viennacl::backend::typesafe_host_array<unsigned int> col_buffer; 51 col_buffer.raw_resize(dummy, a->nz); 52 for (PetscInt i = 0; i < a->nz; ++i) col_buffer.set(i, (a->j)[i]); 53 54 viennaclstruct->compressed_mat->set(row_buffer.get(), row_indices.get(), col_buffer.get(), a->a, A->rmap->n, A->cmap->n, a->compressedrow.nrows, a->nz); 55 PetscCall(PetscLogCpuToGpu(((2 * a->compressedrow.nrows) + 1 + a->nz) * sizeof(PetscInt) + (a->nz) * sizeof(PetscScalar))); 56 } else { 57 if (!viennaclstruct->mat) viennaclstruct->mat = new ViennaCLAIJMatrix(); 58 59 // Since PetscInt is in general different from cl_uint, we have to convert: 60 viennacl::backend::mem_handle dummy; 61 62 viennacl::backend::typesafe_host_array<unsigned int> row_buffer; 63 row_buffer.raw_resize(dummy, A->rmap->n + 1); 64 for (PetscInt i = 0; i <= A->rmap->n; ++i) row_buffer.set(i, (a->i)[i]); 65 66 viennacl::backend::typesafe_host_array<unsigned int> col_buffer; 67 col_buffer.raw_resize(dummy, a->nz); 68 for (PetscInt i = 0; i < a->nz; ++i) col_buffer.set(i, (a->j)[i]); 69 70 viennaclstruct->mat->set(row_buffer.get(), col_buffer.get(), a->a, A->rmap->n, A->cmap->n, a->nz); 71 PetscCall(PetscLogCpuToGpu(((A->rmap->n + 1) + a->nz) * sizeof(PetscInt) + (a->nz) * sizeof(PetscScalar))); 72 } 73 ViennaCLWaitForGPU(); 74 } catch (std::exception const &ex) { SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what()); } 75 76 // Create temporary vector for v += A*x: 77 if (viennaclstruct->tempvec) { 78 if (viennaclstruct->tempvec->size() != static_cast<std::size_t>(A->rmap->n)) { 79 delete (ViennaCLVector *)viennaclstruct->tempvec; 80 viennaclstruct->tempvec = new ViennaCLVector(A->rmap->n); 81 } else { 82 viennaclstruct->tempvec->clear(); 83 } 84 } else { 85 viennaclstruct->tempvec = new ViennaCLVector(A->rmap->n); 86 } 87 88 A->offloadmask = PETSC_OFFLOAD_BOTH; 89 90 PetscCall(PetscLogEventEnd(MAT_ViennaCLCopyToGPU, A, 0, 0, 0)); 91 } 92 } 93 PetscFunctionReturn(0); 94 } 95 96 PetscErrorCode MatViennaCLCopyFromGPU(Mat A, const ViennaCLAIJMatrix *Agpu) { 97 Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL *)A->spptr; 98 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 99 PetscInt m = A->rmap->n; 100 101 PetscFunctionBegin; 102 if (A->offloadmask == PETSC_OFFLOAD_BOTH) PetscFunctionReturn(0); 103 if (A->offloadmask == PETSC_OFFLOAD_UNALLOCATED && Agpu) { 104 try { 105 PetscCheck(!a->compressedrow.use, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ViennaCL: Cannot handle row compression for GPU matrices"); 106 PetscCheck((PetscInt)Agpu->size1() == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "GPU matrix has %lu rows, should be %" PetscInt_FMT, Agpu->size1(), m); 107 a->nz = Agpu->nnz(); 108 a->maxnz = a->nz; /* Since we allocate exactly the right amount */ 109 A->preallocated = PETSC_TRUE; 110 if (a->singlemalloc) { 111 if (a->a) PetscCall(PetscFree3(a->a, a->j, a->i)); 112 } else { 113 if (a->i) PetscCall(PetscFree(a->i)); 114 if (a->j) PetscCall(PetscFree(a->j)); 115 if (a->a) PetscCall(PetscFree(a->a)); 116 } 117 PetscCall(PetscMalloc3(a->nz, &a->a, a->nz, &a->j, m + 1, &a->i)); 118 119 a->singlemalloc = PETSC_TRUE; 120 121 /* Setup row lengths */ 122 PetscCall(PetscFree(a->imax)); 123 PetscCall(PetscFree(a->ilen)); 124 PetscCall(PetscMalloc1(m, &a->imax)); 125 PetscCall(PetscMalloc1(m, &a->ilen)); 126 127 /* Copy data back from GPU */ 128 viennacl::backend::typesafe_host_array<unsigned int> row_buffer; 129 row_buffer.raw_resize(Agpu->handle1(), Agpu->size1() + 1); 130 131 // copy row array 132 viennacl::backend::memory_read(Agpu->handle1(), 0, row_buffer.raw_size(), row_buffer.get()); 133 (a->i)[0] = row_buffer[0]; 134 for (PetscInt i = 0; i < (PetscInt)Agpu->size1(); ++i) { 135 (a->i)[i + 1] = row_buffer[i + 1]; 136 a->imax[i] = a->ilen[i] = a->i[i + 1] - a->i[i]; //Set imax[] and ilen[] arrays at the same time as i[] for better cache reuse 137 } 138 139 // copy column indices 140 viennacl::backend::typesafe_host_array<unsigned int> col_buffer; 141 col_buffer.raw_resize(Agpu->handle2(), Agpu->nnz()); 142 viennacl::backend::memory_read(Agpu->handle2(), 0, col_buffer.raw_size(), col_buffer.get()); 143 for (PetscInt i = 0; i < (PetscInt)Agpu->nnz(); ++i) (a->j)[i] = col_buffer[i]; 144 145 // copy nonzero entries directly to destination (no conversion required) 146 viennacl::backend::memory_read(Agpu->handle(), 0, sizeof(PetscScalar) * Agpu->nnz(), a->a); 147 148 PetscCall(PetscLogGpuToCpu(row_buffer.raw_size() + col_buffer.raw_size() + (Agpu->nnz() * sizeof(PetscScalar)))); 149 ViennaCLWaitForGPU(); 150 /* TODO: Once a->diag is moved out of MatAssemblyEnd(), invalidate it here. */ 151 } catch (std::exception const &ex) { SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what()); } 152 } else if (A->offloadmask == PETSC_OFFLOAD_UNALLOCATED) { 153 PetscFunctionReturn(0); 154 } else { 155 if (!Agpu && A->offloadmask != PETSC_OFFLOAD_GPU) PetscFunctionReturn(0); 156 157 PetscCheck(!a->compressedrow.use, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ViennaCL: Cannot handle row compression for GPU matrices"); 158 if (!Agpu) { 159 viennacl::backend::memory_read(viennaclstruct->mat->handle(), 0, sizeof(PetscScalar) * viennaclstruct->mat->nnz(), a->a); 160 } else { 161 viennacl::backend::memory_read(Agpu->handle(), 0, sizeof(PetscScalar) * Agpu->nnz(), a->a); 162 } 163 } 164 A->offloadmask = PETSC_OFFLOAD_BOTH; 165 /* This assembly prevents resetting the flag to PETSC_OFFLOAD_CPU and recopying */ 166 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 167 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 168 PetscFunctionReturn(0); 169 } 170 171 PetscErrorCode MatMult_SeqAIJViennaCL(Mat A, Vec xx, Vec yy) { 172 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 173 Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL *)A->spptr; 174 const ViennaCLVector *xgpu = NULL; 175 ViennaCLVector *ygpu = NULL; 176 177 PetscFunctionBegin; 178 /* The line below is necessary due to the operations that modify the matrix on the CPU (axpy, scale, etc) */ 179 PetscCall(MatViennaCLCopyToGPU(A)); 180 if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) { 181 PetscCall(VecViennaCLGetArrayRead(xx, &xgpu)); 182 PetscCall(VecViennaCLGetArrayWrite(yy, &ygpu)); 183 PetscCall(PetscLogGpuTimeBegin()); 184 try { 185 if (a->compressedrow.use) { 186 *ygpu = viennacl::linalg::prod(*viennaclstruct->compressed_mat, *xgpu); 187 } else { 188 *ygpu = viennacl::linalg::prod(*viennaclstruct->mat, *xgpu); 189 } 190 ViennaCLWaitForGPU(); 191 } catch (std::exception const &ex) { SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what()); } 192 PetscCall(PetscLogGpuTimeEnd()); 193 PetscCall(VecViennaCLRestoreArrayRead(xx, &xgpu)); 194 PetscCall(VecViennaCLRestoreArrayWrite(yy, &ygpu)); 195 PetscCall(PetscLogGpuFlops(2.0 * a->nz - a->nonzerorowcnt)); 196 } else { 197 PetscCall(VecSet_SeqViennaCL(yy, 0)); 198 } 199 PetscFunctionReturn(0); 200 } 201 202 PetscErrorCode MatMultAdd_SeqAIJViennaCL(Mat A, Vec xx, Vec yy, Vec zz) { 203 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 204 Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL *)A->spptr; 205 const ViennaCLVector *xgpu = NULL, *ygpu = NULL; 206 ViennaCLVector *zgpu = NULL; 207 208 PetscFunctionBegin; 209 /* The line below is necessary due to the operations that modify the matrix on the CPU (axpy, scale, etc) */ 210 PetscCall(MatViennaCLCopyToGPU(A)); 211 if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) { 212 try { 213 PetscCall(VecViennaCLGetArrayRead(xx, &xgpu)); 214 PetscCall(VecViennaCLGetArrayRead(yy, &ygpu)); 215 PetscCall(VecViennaCLGetArrayWrite(zz, &zgpu)); 216 PetscCall(PetscLogGpuTimeBegin()); 217 if (a->compressedrow.use) *viennaclstruct->tempvec = viennacl::linalg::prod(*viennaclstruct->compressed_mat, *xgpu); 218 else *viennaclstruct->tempvec = viennacl::linalg::prod(*viennaclstruct->mat, *xgpu); 219 if (zz != yy) *zgpu = *ygpu + *viennaclstruct->tempvec; 220 else *zgpu += *viennaclstruct->tempvec; 221 ViennaCLWaitForGPU(); 222 PetscCall(PetscLogGpuTimeEnd()); 223 PetscCall(VecViennaCLRestoreArrayRead(xx, &xgpu)); 224 PetscCall(VecViennaCLRestoreArrayRead(yy, &ygpu)); 225 PetscCall(VecViennaCLRestoreArrayWrite(zz, &zgpu)); 226 227 } catch (std::exception const &ex) { SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what()); } 228 PetscCall(PetscLogGpuFlops(2.0 * a->nz)); 229 } else { 230 PetscCall(VecCopy_SeqViennaCL(yy, zz)); 231 } 232 PetscFunctionReturn(0); 233 } 234 235 PetscErrorCode MatAssemblyEnd_SeqAIJViennaCL(Mat A, MatAssemblyType mode) { 236 PetscFunctionBegin; 237 PetscCall(MatAssemblyEnd_SeqAIJ(A, mode)); 238 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 239 if (!A->boundtocpu) PetscCall(MatViennaCLCopyToGPU(A)); 240 PetscFunctionReturn(0); 241 } 242 243 /* --------------------------------------------------------------------------------*/ 244 /*@C 245 MatCreateSeqAIJViennaCL - Creates a sparse matrix in `MATSEQAIJVIENNACL` (compressed row) format 246 (the default parallel PETSc format). This matrix will ultimately be pushed down 247 to GPUs and use the ViennaCL library for calculations. For good matrix 248 assembly performance the user should preallocate the matrix storage by setting 249 the parameter nz (or the array nnz). By setting these parameters accurately, 250 performance during matrix assembly can be increased substantially. 251 252 Collective 253 254 Input Parameters: 255 + comm - MPI communicator, set to `PETSC_COMM_SELF` 256 . m - number of rows 257 . n - number of columns 258 . nz - number of nonzeros per row (same for all rows) 259 - nnz - array containing the number of nonzeros in the various rows 260 (possibly different for each row) or NULL 261 262 Output Parameter: 263 . A - the matrix 264 265 It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 266 MatXXXXSetPreallocation() paradigm instead of this routine directly. 267 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 268 269 Notes: 270 If nnz is given then nz is ignored 271 272 The AIJ format, also called 273 compressed row storage, is fully compatible with standard Fortran 77 274 storage. That is, the stored row and column indices can begin at 275 either one (as in Fortran) or zero. See the users' manual for details. 276 277 Specify the preallocated storage with either nz or nnz (not both). 278 Set nz = `PETSC_DEFAULT` and nnz = NULL for PETSc to control dynamic memory 279 allocation. For large problems you MUST preallocate memory or you 280 will get TERRIBLE performance, see the users' manual chapter on matrices. 281 282 Level: intermediate 283 284 .seealso: `MATSEQAIJVIENNACL`, `MatCreate()`, `MatCreateAIJ()`, `MatCreateAIJCUSPARSE()`, `MatSetValues()`, `MatSeqAIJSetColumnIndices()`, `MatCreateSeqAIJWithArrays()`, `MatCreateAIJ()` 285 @*/ 286 PetscErrorCode MatCreateSeqAIJViennaCL(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A) { 287 PetscFunctionBegin; 288 PetscCall(MatCreate(comm, A)); 289 PetscCall(MatSetSizes(*A, m, n, m, n)); 290 PetscCall(MatSetType(*A, MATSEQAIJVIENNACL)); 291 PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(*A, nz, (PetscInt *)nnz)); 292 PetscFunctionReturn(0); 293 } 294 295 PetscErrorCode MatDestroy_SeqAIJViennaCL(Mat A) { 296 Mat_SeqAIJViennaCL *viennaclcontainer = (Mat_SeqAIJViennaCL *)A->spptr; 297 298 PetscFunctionBegin; 299 try { 300 if (viennaclcontainer) { 301 delete viennaclcontainer->tempvec; 302 delete viennaclcontainer->mat; 303 delete viennaclcontainer->compressed_mat; 304 delete viennaclcontainer; 305 } 306 A->offloadmask = PETSC_OFFLOAD_UNALLOCATED; 307 } catch (std::exception const &ex) { SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what()); } 308 309 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqaij_seqaijviennacl_C", NULL)); 310 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaijviennacl_seqdense_C", NULL)); 311 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaijviennacl_seqaij_C", NULL)); 312 313 /* this next line is because MatDestroy tries to PetscFree spptr if it is not zero, and PetscFree only works if the memory was allocated with PetscNew or PetscMalloc, which don't call the constructor */ 314 A->spptr = 0; 315 PetscCall(MatDestroy_SeqAIJ(A)); 316 PetscFunctionReturn(0); 317 } 318 319 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJViennaCL(Mat B) { 320 PetscFunctionBegin; 321 PetscCall(MatCreate_SeqAIJ(B)); 322 PetscCall(MatConvert_SeqAIJ_SeqAIJViennaCL(B, MATSEQAIJVIENNACL, MAT_INPLACE_MATRIX, &B)); 323 PetscFunctionReturn(0); 324 } 325 326 static PetscErrorCode MatBindToCPU_SeqAIJViennaCL(Mat, PetscBool); 327 static PetscErrorCode MatDuplicate_SeqAIJViennaCL(Mat A, MatDuplicateOption cpvalues, Mat *B) { 328 Mat C; 329 330 PetscFunctionBegin; 331 PetscCall(MatDuplicate_SeqAIJ(A, cpvalues, B)); 332 C = *B; 333 334 PetscCall(MatBindToCPU_SeqAIJViennaCL(A, PETSC_FALSE)); 335 C->ops->bindtocpu = MatBindToCPU_SeqAIJViennaCL; 336 337 C->spptr = new Mat_SeqAIJViennaCL(); 338 ((Mat_SeqAIJViennaCL *)C->spptr)->tempvec = NULL; 339 ((Mat_SeqAIJViennaCL *)C->spptr)->mat = NULL; 340 ((Mat_SeqAIJViennaCL *)C->spptr)->compressed_mat = NULL; 341 342 PetscCall(PetscObjectChangeTypeName((PetscObject)C, MATSEQAIJVIENNACL)); 343 344 C->offloadmask = PETSC_OFFLOAD_UNALLOCATED; 345 346 /* If the source matrix is already assembled, copy the destination matrix to the GPU */ 347 if (C->assembled) PetscCall(MatViennaCLCopyToGPU(C)); 348 PetscFunctionReturn(0); 349 } 350 351 static PetscErrorCode MatSeqAIJGetArray_SeqAIJViennaCL(Mat A, PetscScalar *array[]) { 352 PetscFunctionBegin; 353 PetscCall(MatViennaCLCopyFromGPU(A, (const ViennaCLAIJMatrix *)NULL)); 354 *array = ((Mat_SeqAIJ *)A->data)->a; 355 PetscFunctionReturn(0); 356 } 357 358 static PetscErrorCode MatSeqAIJRestoreArray_SeqAIJViennaCL(Mat A, PetscScalar *array[]) { 359 PetscFunctionBegin; 360 A->offloadmask = PETSC_OFFLOAD_CPU; 361 *array = NULL; 362 PetscFunctionReturn(0); 363 } 364 365 static PetscErrorCode MatSeqAIJGetArrayRead_SeqAIJViennaCL(Mat A, const PetscScalar *array[]) { 366 PetscFunctionBegin; 367 PetscCall(MatViennaCLCopyFromGPU(A, (const ViennaCLAIJMatrix *)NULL)); 368 *array = ((Mat_SeqAIJ *)A->data)->a; 369 PetscFunctionReturn(0); 370 } 371 372 static PetscErrorCode MatSeqAIJRestoreArrayRead_SeqAIJViennaCL(Mat A, const PetscScalar *array[]) { 373 PetscFunctionBegin; 374 *array = NULL; 375 /* No A->offloadmask = PETSC_OFFLOAD_CPU since if A->offloadmask was PETSC_OFFLOAD_BOTH, it is still BOTH */ 376 PetscFunctionReturn(0); 377 } 378 379 static PetscErrorCode MatSeqAIJGetArrayWrite_SeqAIJViennaCL(Mat A, PetscScalar *array[]) { 380 PetscFunctionBegin; 381 *array = ((Mat_SeqAIJ *)A->data)->a; 382 PetscFunctionReturn(0); 383 } 384 385 static PetscErrorCode MatSeqAIJRestoreArrayWrite_SeqAIJViennaCL(Mat A, PetscScalar *array[]) { 386 PetscFunctionBegin; 387 A->offloadmask = PETSC_OFFLOAD_CPU; 388 *array = NULL; 389 PetscFunctionReturn(0); 390 } 391 392 static PetscErrorCode MatBindToCPU_SeqAIJViennaCL(Mat A, PetscBool flg) { 393 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 394 395 PetscFunctionBegin; 396 A->boundtocpu = flg; 397 if (flg && a->inode.size) { 398 a->inode.use = PETSC_TRUE; 399 } else { 400 a->inode.use = PETSC_FALSE; 401 } 402 if (flg) { 403 /* make sure we have an up-to-date copy on the CPU */ 404 PetscCall(MatViennaCLCopyFromGPU(A, (const ViennaCLAIJMatrix *)NULL)); 405 A->ops->mult = MatMult_SeqAIJ; 406 A->ops->multadd = MatMultAdd_SeqAIJ; 407 A->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 408 A->ops->duplicate = MatDuplicate_SeqAIJ; 409 PetscCall(PetscMemzero(a->ops, sizeof(Mat_SeqAIJOps))); 410 } else { 411 A->ops->mult = MatMult_SeqAIJViennaCL; 412 A->ops->multadd = MatMultAdd_SeqAIJViennaCL; 413 A->ops->assemblyend = MatAssemblyEnd_SeqAIJViennaCL; 414 A->ops->destroy = MatDestroy_SeqAIJViennaCL; 415 A->ops->duplicate = MatDuplicate_SeqAIJViennaCL; 416 417 a->ops->getarray = MatSeqAIJGetArray_SeqAIJViennaCL; 418 a->ops->restorearray = MatSeqAIJRestoreArray_SeqAIJViennaCL; 419 a->ops->getarrayread = MatSeqAIJGetArrayRead_SeqAIJViennaCL; 420 a->ops->restorearrayread = MatSeqAIJRestoreArrayRead_SeqAIJViennaCL; 421 a->ops->getarraywrite = MatSeqAIJGetArrayWrite_SeqAIJViennaCL; 422 a->ops->restorearraywrite = MatSeqAIJRestoreArrayWrite_SeqAIJViennaCL; 423 } 424 PetscFunctionReturn(0); 425 } 426 427 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJViennaCL(Mat A, MatType type, MatReuse reuse, Mat *newmat) { 428 Mat B; 429 430 PetscFunctionBegin; 431 PetscCheck(reuse != MAT_REUSE_MATRIX, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MAT_REUSE_MATRIX is not supported. Consider using MAT_INPLACE_MATRIX instead"); 432 433 if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, newmat)); 434 435 B = *newmat; 436 437 B->spptr = new Mat_SeqAIJViennaCL(); 438 439 ((Mat_SeqAIJViennaCL *)B->spptr)->tempvec = NULL; 440 ((Mat_SeqAIJViennaCL *)B->spptr)->mat = NULL; 441 ((Mat_SeqAIJViennaCL *)B->spptr)->compressed_mat = NULL; 442 443 PetscCall(MatBindToCPU_SeqAIJViennaCL(A, PETSC_FALSE)); 444 A->ops->bindtocpu = MatBindToCPU_SeqAIJViennaCL; 445 446 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJVIENNACL)); 447 PetscCall(PetscFree(B->defaultvectype)); 448 PetscCall(PetscStrallocpy(VECVIENNACL, &B->defaultvectype)); 449 450 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqaij_seqaijviennacl_C", MatConvert_SeqAIJ_SeqAIJViennaCL)); 451 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaijviennacl_seqdense_C", MatProductSetFromOptions_SeqAIJ_SeqDense)); 452 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaijviennacl_seqaij_C", MatProductSetFromOptions_SeqAIJ)); 453 454 B->offloadmask = PETSC_OFFLOAD_UNALLOCATED; 455 456 /* If the source matrix is already assembled, copy the destination matrix to the GPU */ 457 if (B->assembled) PetscCall(MatViennaCLCopyToGPU(B)); 458 PetscFunctionReturn(0); 459 } 460 461 /*MC 462 MATSEQAIJVIENNACL - MATAIJVIENNACL = "aijviennacl" = "seqaijviennacl" - A matrix type to be used for sparse matrices. 463 464 A matrix type type whose data resides on GPUs. These matrices are in CSR format by 465 default. All matrix calculations are performed using the ViennaCL library. 466 467 Options Database Keys: 468 + -mat_type aijviennacl - sets the matrix type to `MATSEQAIJVIENNACL` during a call to `MatSetFromOptions() 469 . -mat_viennacl_storage_format csr - sets the storage format of matrices for `MatMult()` during a call to `MatSetFromOptions()`. 470 - -mat_viennacl_mult_storage_format csr - sets the storage format of matrices for `MatMult()` during a call to `MatSetFromOptions()`. 471 472 Level: beginner 473 474 .seealso: `MatCreateSeqAIJViennaCL()`, `MATAIJVIENNACL`, `MatCreateAIJViennaCL()` 475 M*/ 476 477 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ViennaCL(void) { 478 PetscFunctionBegin; 479 PetscCall(MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_LU, MatGetFactor_seqaij_petsc)); 480 PetscCall(MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_CHOLESKY, MatGetFactor_seqaij_petsc)); 481 PetscCall(MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_ILU, MatGetFactor_seqaij_petsc)); 482 PetscCall(MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_ICC, MatGetFactor_seqaij_petsc)); 483 PetscFunctionReturn(0); 484 } 485