1 /* 2 Defines the basic matrix operations for the AIJ (compressed row) 3 matrix storage format. 4 */ 5 6 #include <petscconf.h> 7 #define PETSC_SKIP_IMMINTRIN_H_CUDAWORKAROUND 1 8 #include <../src/mat/impls/aij/seq/aij.h> /*I "petscmat.h" I*/ 9 #include <petscbt.h> 10 #include <../src/vec/vec/impls/dvecimpl.h> 11 #include <petsc/private/vecimpl.h> 12 13 #include <../src/mat/impls/aij/seq/seqviennacl/viennaclmatimpl.h> 14 15 #include <algorithm> 16 #include <vector> 17 #include <string> 18 19 #include "viennacl/linalg/prod.hpp" 20 21 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJViennaCL(Mat A, MatType type, MatReuse reuse, Mat *newmat); 22 PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat, MatFactorType, Mat *); 23 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqDense(Mat); 24 25 PetscErrorCode MatViennaCLCopyToGPU(Mat A) 26 { 27 Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL *)A->spptr; 28 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 29 30 PetscFunctionBegin; 31 if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) { //some OpenCL SDKs have issues with buffers of size 0 32 if (A->offloadmask == PETSC_OFFLOAD_UNALLOCATED || A->offloadmask == PETSC_OFFLOAD_CPU) { 33 PetscCall(PetscLogEventBegin(MAT_ViennaCLCopyToGPU, A, 0, 0, 0)); 34 35 try { 36 if (a->compressedrow.use) { 37 if (!viennaclstruct->compressed_mat) viennaclstruct->compressed_mat = new ViennaCLCompressedAIJMatrix(); 38 39 // Since PetscInt is different from cl_uint, we have to convert: 40 viennacl::backend::mem_handle dummy; 41 42 viennacl::backend::typesafe_host_array<unsigned int> row_buffer; 43 row_buffer.raw_resize(dummy, a->compressedrow.nrows + 1); 44 for (PetscInt i = 0; i <= a->compressedrow.nrows; ++i) row_buffer.set(i, (a->compressedrow.i)[i]); 45 46 viennacl::backend::typesafe_host_array<unsigned int> row_indices; 47 row_indices.raw_resize(dummy, a->compressedrow.nrows); 48 for (PetscInt i = 0; i < a->compressedrow.nrows; ++i) row_indices.set(i, (a->compressedrow.rindex)[i]); 49 50 viennacl::backend::typesafe_host_array<unsigned int> col_buffer; 51 col_buffer.raw_resize(dummy, a->nz); 52 for (PetscInt i = 0; i < a->nz; ++i) col_buffer.set(i, (a->j)[i]); 53 54 viennaclstruct->compressed_mat->set(row_buffer.get(), row_indices.get(), col_buffer.get(), a->a, A->rmap->n, A->cmap->n, a->compressedrow.nrows, a->nz); 55 PetscCall(PetscLogCpuToGpu(((2 * a->compressedrow.nrows) + 1 + a->nz) * sizeof(PetscInt) + (a->nz) * sizeof(PetscScalar))); 56 } else { 57 if (!viennaclstruct->mat) viennaclstruct->mat = new ViennaCLAIJMatrix(); 58 59 // Since PetscInt is in general different from cl_uint, we have to convert: 60 viennacl::backend::mem_handle dummy; 61 62 viennacl::backend::typesafe_host_array<unsigned int> row_buffer; 63 row_buffer.raw_resize(dummy, A->rmap->n + 1); 64 for (PetscInt i = 0; i <= A->rmap->n; ++i) row_buffer.set(i, (a->i)[i]); 65 66 viennacl::backend::typesafe_host_array<unsigned int> col_buffer; 67 col_buffer.raw_resize(dummy, a->nz); 68 for (PetscInt i = 0; i < a->nz; ++i) col_buffer.set(i, (a->j)[i]); 69 70 viennaclstruct->mat->set(row_buffer.get(), col_buffer.get(), a->a, A->rmap->n, A->cmap->n, a->nz); 71 PetscCall(PetscLogCpuToGpu(((A->rmap->n + 1) + a->nz) * sizeof(PetscInt) + (a->nz) * sizeof(PetscScalar))); 72 } 73 ViennaCLWaitForGPU(); 74 } catch (std::exception const &ex) { 75 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what()); 76 } 77 78 // Create temporary vector for v += A*x: 79 if (viennaclstruct->tempvec) { 80 if (viennaclstruct->tempvec->size() != static_cast<std::size_t>(A->rmap->n)) { 81 delete (ViennaCLVector *)viennaclstruct->tempvec; 82 viennaclstruct->tempvec = new ViennaCLVector(A->rmap->n); 83 } else { 84 viennaclstruct->tempvec->clear(); 85 } 86 } else { 87 viennaclstruct->tempvec = new ViennaCLVector(A->rmap->n); 88 } 89 90 A->offloadmask = PETSC_OFFLOAD_BOTH; 91 92 PetscCall(PetscLogEventEnd(MAT_ViennaCLCopyToGPU, A, 0, 0, 0)); 93 } 94 } 95 PetscFunctionReturn(PETSC_SUCCESS); 96 } 97 98 PetscErrorCode MatViennaCLCopyFromGPU(Mat A, const ViennaCLAIJMatrix *Agpu) 99 { 100 Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL *)A->spptr; 101 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 102 PetscInt m = A->rmap->n; 103 104 PetscFunctionBegin; 105 if (A->offloadmask == PETSC_OFFLOAD_BOTH) PetscFunctionReturn(PETSC_SUCCESS); 106 if (A->offloadmask == PETSC_OFFLOAD_UNALLOCATED && Agpu) { 107 try { 108 PetscCheck(!a->compressedrow.use, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ViennaCL: Cannot handle row compression for GPU matrices"); 109 PetscCheck((PetscInt)Agpu->size1() == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "GPU matrix has %lu rows, should be %" PetscInt_FMT, Agpu->size1(), m); 110 a->nz = Agpu->nnz(); 111 a->maxnz = a->nz; /* Since we allocate exactly the right amount */ 112 A->preallocated = PETSC_TRUE; 113 if (a->free_a) PetscCall(PetscShmgetDeallocateArray((void **)a->a)); 114 if (a->free_ij) PetscCall(PetscShmgetDeallocateArray((void **)a->j)); 115 if (a->free_ij) PetscCall(PetscShmgetDeallocateArray((void **)a->i)); 116 PetscCall(PetscShmgetAllocateArray(a->nz, sizeof(PetscScalar), (void **)&a->a)); 117 PetscCall(PetscShmgetAllocateArray(a->nz, sizeof(PetscInt), (void **)&a->j)); 118 PetscCall(PetscShmgetAllocateArray(m + 1, sizeof(PetscInt), (void **)&a->i)); 119 a->free_a = PETSC_TRUE; 120 a->free_ij = PETSC_TRUE; 121 122 /* Setup row lengths */ 123 PetscCall(PetscFree(a->imax)); 124 PetscCall(PetscFree(a->ilen)); 125 PetscCall(PetscMalloc1(m, &a->imax)); 126 PetscCall(PetscMalloc1(m, &a->ilen)); 127 128 /* Copy data back from GPU */ 129 viennacl::backend::typesafe_host_array<unsigned int> row_buffer; 130 row_buffer.raw_resize(Agpu->handle1(), Agpu->size1() + 1); 131 132 // copy row array 133 viennacl::backend::memory_read(Agpu->handle1(), 0, row_buffer.raw_size(), row_buffer.get()); 134 (a->i)[0] = row_buffer[0]; 135 for (PetscInt i = 0; i < (PetscInt)Agpu->size1(); ++i) { 136 (a->i)[i + 1] = row_buffer[i + 1]; 137 a->imax[i] = a->ilen[i] = a->i[i + 1] - a->i[i]; //Set imax[] and ilen[] arrays at the same time as i[] for better cache reuse 138 } 139 140 // copy column indices 141 viennacl::backend::typesafe_host_array<unsigned int> col_buffer; 142 col_buffer.raw_resize(Agpu->handle2(), Agpu->nnz()); 143 viennacl::backend::memory_read(Agpu->handle2(), 0, col_buffer.raw_size(), col_buffer.get()); 144 for (PetscInt i = 0; i < (PetscInt)Agpu->nnz(); ++i) (a->j)[i] = col_buffer[i]; 145 146 // copy nonzero entries directly to destination (no conversion required) 147 viennacl::backend::memory_read(Agpu->handle(), 0, sizeof(PetscScalar) * Agpu->nnz(), a->a); 148 149 PetscCall(PetscLogGpuToCpu(row_buffer.raw_size() + col_buffer.raw_size() + (Agpu->nnz() * sizeof(PetscScalar)))); 150 ViennaCLWaitForGPU(); 151 } catch (std::exception const &ex) { 152 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what()); 153 } 154 } else if (A->offloadmask == PETSC_OFFLOAD_UNALLOCATED) { 155 PetscFunctionReturn(PETSC_SUCCESS); 156 } else { 157 if (!Agpu && A->offloadmask != PETSC_OFFLOAD_GPU) PetscFunctionReturn(PETSC_SUCCESS); 158 159 PetscCheck(!a->compressedrow.use, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ViennaCL: Cannot handle row compression for GPU matrices"); 160 if (!Agpu) { 161 viennacl::backend::memory_read(viennaclstruct->mat->handle(), 0, sizeof(PetscScalar) * viennaclstruct->mat->nnz(), a->a); 162 } else { 163 viennacl::backend::memory_read(Agpu->handle(), 0, sizeof(PetscScalar) * Agpu->nnz(), a->a); 164 } 165 } 166 A->offloadmask = PETSC_OFFLOAD_BOTH; 167 /* This assembly prevents resetting the flag to PETSC_OFFLOAD_CPU and recopying */ 168 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 169 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 170 PetscFunctionReturn(PETSC_SUCCESS); 171 } 172 173 static PetscErrorCode MatMult_SeqAIJViennaCL(Mat A, Vec xx, Vec yy) 174 { 175 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 176 Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL *)A->spptr; 177 const ViennaCLVector *xgpu = NULL; 178 ViennaCLVector *ygpu = NULL; 179 180 PetscFunctionBegin; 181 /* The line below is necessary due to the operations that modify the matrix on the CPU (axpy, scale, etc) */ 182 PetscCall(MatViennaCLCopyToGPU(A)); 183 if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) { 184 PetscCall(VecViennaCLGetArrayRead(xx, &xgpu)); 185 PetscCall(VecViennaCLGetArrayWrite(yy, &ygpu)); 186 PetscCall(PetscLogGpuTimeBegin()); 187 try { 188 if (a->compressedrow.use) { 189 *ygpu = viennacl::linalg::prod(*viennaclstruct->compressed_mat, *xgpu); 190 } else { 191 *ygpu = viennacl::linalg::prod(*viennaclstruct->mat, *xgpu); 192 } 193 ViennaCLWaitForGPU(); 194 } catch (std::exception const &ex) { 195 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what()); 196 } 197 PetscCall(PetscLogGpuTimeEnd()); 198 PetscCall(VecViennaCLRestoreArrayRead(xx, &xgpu)); 199 PetscCall(VecViennaCLRestoreArrayWrite(yy, &ygpu)); 200 PetscCall(PetscLogGpuFlops(2.0 * a->nz - a->nonzerorowcnt)); 201 } else { 202 PetscCall(VecSet_SeqViennaCL(yy, 0)); 203 } 204 PetscFunctionReturn(PETSC_SUCCESS); 205 } 206 207 static PetscErrorCode MatMultAdd_SeqAIJViennaCL(Mat A, Vec xx, Vec yy, Vec zz) 208 { 209 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 210 Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL *)A->spptr; 211 const ViennaCLVector *xgpu = NULL, *ygpu = NULL; 212 ViennaCLVector *zgpu = NULL; 213 214 PetscFunctionBegin; 215 /* The line below is necessary due to the operations that modify the matrix on the CPU (axpy, scale, etc) */ 216 PetscCall(MatViennaCLCopyToGPU(A)); 217 if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) { 218 try { 219 PetscCall(VecViennaCLGetArrayRead(xx, &xgpu)); 220 PetscCall(VecViennaCLGetArrayRead(yy, &ygpu)); 221 PetscCall(VecViennaCLGetArrayWrite(zz, &zgpu)); 222 PetscCall(PetscLogGpuTimeBegin()); 223 if (a->compressedrow.use) *viennaclstruct->tempvec = viennacl::linalg::prod(*viennaclstruct->compressed_mat, *xgpu); 224 else *viennaclstruct->tempvec = viennacl::linalg::prod(*viennaclstruct->mat, *xgpu); 225 if (zz != yy) *zgpu = *ygpu + *viennaclstruct->tempvec; 226 else *zgpu += *viennaclstruct->tempvec; 227 ViennaCLWaitForGPU(); 228 PetscCall(PetscLogGpuTimeEnd()); 229 PetscCall(VecViennaCLRestoreArrayRead(xx, &xgpu)); 230 PetscCall(VecViennaCLRestoreArrayRead(yy, &ygpu)); 231 PetscCall(VecViennaCLRestoreArrayWrite(zz, &zgpu)); 232 233 } catch (std::exception const &ex) { 234 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what()); 235 } 236 PetscCall(PetscLogGpuFlops(2.0 * a->nz)); 237 } else { 238 PetscCall(VecCopy_SeqViennaCL(yy, zz)); 239 } 240 PetscFunctionReturn(PETSC_SUCCESS); 241 } 242 243 static PetscErrorCode MatAssemblyEnd_SeqAIJViennaCL(Mat A, MatAssemblyType mode) 244 { 245 PetscFunctionBegin; 246 PetscCall(MatAssemblyEnd_SeqAIJ(A, mode)); 247 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(PETSC_SUCCESS); 248 if (!A->boundtocpu) PetscCall(MatViennaCLCopyToGPU(A)); 249 PetscFunctionReturn(PETSC_SUCCESS); 250 } 251 252 /*@C 253 MatCreateSeqAIJViennaCL - Creates a sparse matrix in `MATSEQAIJVIENNACL` (compressed row) format 254 (the default parallel PETSc format). This matrix will ultimately be pushed down 255 to GPUs and use the ViennaCL library for calculations. 256 257 Collective 258 259 Input Parameters: 260 + comm - MPI communicator, set to `PETSC_COMM_SELF` 261 . m - number of rows 262 . n - number of columns 263 . nz - number of nonzeros per row (same for all rows), ignored if `nnz` is set 264 - nnz - array containing the number of nonzeros in the various rows (possibly different for each row) or `NULL` 265 266 Output Parameter: 267 . A - the matrix 268 269 It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 270 MatXXXXSetPreallocation() paradigm instead of this routine directly. 271 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 272 273 Notes: 274 The AIJ format, also called 275 compressed row storage, is fully compatible with standard Fortran 276 storage. That is, the stored row and column indices can begin at 277 either one (as in Fortran) or zero. 278 279 Specify the preallocated storage with either `nz` or `nnz` (not both). 280 Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory 281 allocation. 282 283 Level: intermediate 284 285 .seealso: `MATSEQAIJVIENNACL`, `MatCreate()`, `MatCreateAIJ()`, `MatCreateAIJCUSPARSE()`, `MatSetValues()`, `MatSeqAIJSetColumnIndices()`, `MatCreateSeqAIJWithArrays()` 286 @*/ 287 PetscErrorCode MatCreateSeqAIJViennaCL(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A) 288 { 289 PetscFunctionBegin; 290 PetscCall(MatCreate(comm, A)); 291 PetscCall(MatSetSizes(*A, m, n, m, n)); 292 PetscCall(MatSetType(*A, MATSEQAIJVIENNACL)); 293 PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(*A, nz, (PetscInt *)nnz)); 294 PetscFunctionReturn(PETSC_SUCCESS); 295 } 296 297 static PetscErrorCode MatDestroy_SeqAIJViennaCL(Mat A) 298 { 299 Mat_SeqAIJViennaCL *viennaclcontainer = (Mat_SeqAIJViennaCL *)A->spptr; 300 301 PetscFunctionBegin; 302 try { 303 if (viennaclcontainer) { 304 delete viennaclcontainer->tempvec; 305 delete viennaclcontainer->mat; 306 delete viennaclcontainer->compressed_mat; 307 delete viennaclcontainer; 308 } 309 A->offloadmask = PETSC_OFFLOAD_UNALLOCATED; 310 } catch (std::exception const &ex) { 311 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what()); 312 } 313 314 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqaij_seqaijviennacl_C", NULL)); 315 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaijviennacl_seqdense_C", NULL)); 316 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaijviennacl_seqaij_C", NULL)); 317 318 /* this next line is because MatDestroy tries to PetscFree spptr if it is not zero, and PetscFree only works if the memory was allocated with PetscNew or PetscMalloc, which don't call the constructor */ 319 A->spptr = 0; 320 PetscCall(MatDestroy_SeqAIJ(A)); 321 PetscFunctionReturn(PETSC_SUCCESS); 322 } 323 324 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJViennaCL(Mat B) 325 { 326 PetscFunctionBegin; 327 PetscCall(MatCreate_SeqAIJ(B)); 328 PetscCall(MatConvert_SeqAIJ_SeqAIJViennaCL(B, MATSEQAIJVIENNACL, MAT_INPLACE_MATRIX, &B)); 329 PetscFunctionReturn(PETSC_SUCCESS); 330 } 331 332 static PetscErrorCode MatBindToCPU_SeqAIJViennaCL(Mat, PetscBool); 333 static PetscErrorCode MatDuplicate_SeqAIJViennaCL(Mat A, MatDuplicateOption cpvalues, Mat *B) 334 { 335 Mat C; 336 337 PetscFunctionBegin; 338 PetscCall(MatDuplicate_SeqAIJ(A, cpvalues, B)); 339 C = *B; 340 341 PetscCall(MatBindToCPU_SeqAIJViennaCL(A, PETSC_FALSE)); 342 C->ops->bindtocpu = MatBindToCPU_SeqAIJViennaCL; 343 344 C->spptr = new Mat_SeqAIJViennaCL(); 345 ((Mat_SeqAIJViennaCL *)C->spptr)->tempvec = NULL; 346 ((Mat_SeqAIJViennaCL *)C->spptr)->mat = NULL; 347 ((Mat_SeqAIJViennaCL *)C->spptr)->compressed_mat = NULL; 348 349 PetscCall(PetscObjectChangeTypeName((PetscObject)C, MATSEQAIJVIENNACL)); 350 351 C->offloadmask = PETSC_OFFLOAD_UNALLOCATED; 352 353 /* If the source matrix is already assembled, copy the destination matrix to the GPU */ 354 if (C->assembled) PetscCall(MatViennaCLCopyToGPU(C)); 355 PetscFunctionReturn(PETSC_SUCCESS); 356 } 357 358 static PetscErrorCode MatSeqAIJGetArray_SeqAIJViennaCL(Mat A, PetscScalar *array[]) 359 { 360 PetscFunctionBegin; 361 PetscCall(MatViennaCLCopyFromGPU(A, (const ViennaCLAIJMatrix *)NULL)); 362 *array = ((Mat_SeqAIJ *)A->data)->a; 363 PetscFunctionReturn(PETSC_SUCCESS); 364 } 365 366 static PetscErrorCode MatSeqAIJRestoreArray_SeqAIJViennaCL(Mat A, PetscScalar *array[]) 367 { 368 PetscFunctionBegin; 369 A->offloadmask = PETSC_OFFLOAD_CPU; 370 *array = NULL; 371 PetscFunctionReturn(PETSC_SUCCESS); 372 } 373 374 static PetscErrorCode MatSeqAIJGetArrayRead_SeqAIJViennaCL(Mat A, const PetscScalar *array[]) 375 { 376 PetscFunctionBegin; 377 PetscCall(MatViennaCLCopyFromGPU(A, (const ViennaCLAIJMatrix *)NULL)); 378 *array = ((Mat_SeqAIJ *)A->data)->a; 379 PetscFunctionReturn(PETSC_SUCCESS); 380 } 381 382 static PetscErrorCode MatSeqAIJRestoreArrayRead_SeqAIJViennaCL(Mat A, const PetscScalar *array[]) 383 { 384 PetscFunctionBegin; 385 *array = NULL; 386 /* No A->offloadmask = PETSC_OFFLOAD_CPU since if A->offloadmask was PETSC_OFFLOAD_BOTH, it is still BOTH */ 387 PetscFunctionReturn(PETSC_SUCCESS); 388 } 389 390 static PetscErrorCode MatSeqAIJGetArrayWrite_SeqAIJViennaCL(Mat A, PetscScalar *array[]) 391 { 392 PetscFunctionBegin; 393 *array = ((Mat_SeqAIJ *)A->data)->a; 394 PetscFunctionReturn(PETSC_SUCCESS); 395 } 396 397 static PetscErrorCode MatSeqAIJRestoreArrayWrite_SeqAIJViennaCL(Mat A, PetscScalar *array[]) 398 { 399 PetscFunctionBegin; 400 A->offloadmask = PETSC_OFFLOAD_CPU; 401 *array = NULL; 402 PetscFunctionReturn(PETSC_SUCCESS); 403 } 404 405 static PetscErrorCode MatBindToCPU_SeqAIJViennaCL(Mat A, PetscBool flg) 406 { 407 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 408 409 PetscFunctionBegin; 410 A->boundtocpu = flg; 411 if (flg && a->inode.size_csr) { 412 a->inode.use = PETSC_TRUE; 413 } else { 414 a->inode.use = PETSC_FALSE; 415 } 416 if (flg) { 417 /* make sure we have an up-to-date copy on the CPU */ 418 PetscCall(MatViennaCLCopyFromGPU(A, (const ViennaCLAIJMatrix *)NULL)); 419 A->ops->mult = MatMult_SeqAIJ; 420 A->ops->multadd = MatMultAdd_SeqAIJ; 421 A->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 422 A->ops->duplicate = MatDuplicate_SeqAIJ; 423 PetscCall(PetscMemzero(a->ops, sizeof(Mat_SeqAIJOps))); 424 } else { 425 A->ops->mult = MatMult_SeqAIJViennaCL; 426 A->ops->multadd = MatMultAdd_SeqAIJViennaCL; 427 A->ops->assemblyend = MatAssemblyEnd_SeqAIJViennaCL; 428 A->ops->destroy = MatDestroy_SeqAIJViennaCL; 429 A->ops->duplicate = MatDuplicate_SeqAIJViennaCL; 430 431 a->ops->getarray = MatSeqAIJGetArray_SeqAIJViennaCL; 432 a->ops->restorearray = MatSeqAIJRestoreArray_SeqAIJViennaCL; 433 a->ops->getarrayread = MatSeqAIJGetArrayRead_SeqAIJViennaCL; 434 a->ops->restorearrayread = MatSeqAIJRestoreArrayRead_SeqAIJViennaCL; 435 a->ops->getarraywrite = MatSeqAIJGetArrayWrite_SeqAIJViennaCL; 436 a->ops->restorearraywrite = MatSeqAIJRestoreArrayWrite_SeqAIJViennaCL; 437 } 438 PetscFunctionReturn(PETSC_SUCCESS); 439 } 440 441 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJViennaCL(Mat A, MatType type, MatReuse reuse, Mat *newmat) 442 { 443 Mat B; 444 445 PetscFunctionBegin; 446 PetscCheck(reuse != MAT_REUSE_MATRIX, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MAT_REUSE_MATRIX is not supported. Consider using MAT_INPLACE_MATRIX instead"); 447 448 if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, newmat)); 449 450 B = *newmat; 451 452 B->spptr = new Mat_SeqAIJViennaCL(); 453 454 ((Mat_SeqAIJViennaCL *)B->spptr)->tempvec = NULL; 455 ((Mat_SeqAIJViennaCL *)B->spptr)->mat = NULL; 456 ((Mat_SeqAIJViennaCL *)B->spptr)->compressed_mat = NULL; 457 458 PetscCall(MatBindToCPU_SeqAIJViennaCL(A, PETSC_FALSE)); 459 A->ops->bindtocpu = MatBindToCPU_SeqAIJViennaCL; 460 461 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJVIENNACL)); 462 PetscCall(PetscFree(B->defaultvectype)); 463 PetscCall(PetscStrallocpy(VECVIENNACL, &B->defaultvectype)); 464 465 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqaij_seqaijviennacl_C", MatConvert_SeqAIJ_SeqAIJViennaCL)); 466 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaijviennacl_seqdense_C", MatProductSetFromOptions_SeqAIJ_SeqDense)); 467 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaijviennacl_seqaij_C", MatProductSetFromOptions_SeqAIJ)); 468 469 B->offloadmask = PETSC_OFFLOAD_UNALLOCATED; 470 471 /* If the source matrix is already assembled, copy the destination matrix to the GPU */ 472 if (B->assembled) PetscCall(MatViennaCLCopyToGPU(B)); 473 PetscFunctionReturn(PETSC_SUCCESS); 474 } 475 476 /*MC 477 MATSEQAIJVIENNACL - MATAIJVIENNACL = "aijviennacl" = "seqaijviennacl" - A matrix type to be used for sparse matrices. 478 479 A matrix type whose data resides on GPUs. These matrices are in CSR format by 480 default. All matrix calculations are performed using the ViennaCL library. 481 482 Options Database Keys: 483 + -mat_type aijviennacl - sets the matrix type to `MATSEQAIJVIENNACL` during a call to `MatSetFromOptions() 484 . -mat_viennacl_storage_format csr - sets the storage format of matrices for `MatMult()` during a call to `MatSetFromOptions()`. 485 - -mat_viennacl_mult_storage_format csr - sets the storage format of matrices for `MatMult()` during a call to `MatSetFromOptions()`. 486 487 Level: beginner 488 489 .seealso: `MatCreateSeqAIJViennaCL()`, `MATAIJVIENNACL`, `MatCreateAIJViennaCL()` 490 M*/ 491 492 PETSC_INTERN PetscErrorCode MatSolverTypeRegister_ViennaCL(void) 493 { 494 PetscFunctionBegin; 495 PetscCall(MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_LU, MatGetFactor_seqaij_petsc)); 496 PetscCall(MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_CHOLESKY, MatGetFactor_seqaij_petsc)); 497 PetscCall(MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_ILU, MatGetFactor_seqaij_petsc)); 498 PetscCall(MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_ICC, MatGetFactor_seqaij_petsc)); 499 PetscFunctionReturn(PETSC_SUCCESS); 500 } 501