1 /* 2 Defines the basic matrix operations for the AIJ (compressed row) 3 matrix storage format. 4 */ 5 6 #include <petscconf.h> 7 #define PETSC_SKIP_IMMINTRIN_H_CUDAWORKAROUND 1 8 #include <../src/mat/impls/aij/seq/aij.h> /*I "petscmat.h" I*/ 9 #include <petscbt.h> 10 #include <../src/vec/vec/impls/dvecimpl.h> 11 #include <petsc/private/vecimpl.h> 12 13 #include <../src/mat/impls/aij/seq/seqviennacl/viennaclmatimpl.h> 14 15 #include <algorithm> 16 #include <vector> 17 #include <string> 18 19 #include "viennacl/linalg/prod.hpp" 20 21 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJViennaCL(Mat A, MatType type, MatReuse reuse, Mat *newmat); 22 PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat, MatFactorType, Mat *); 23 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqDense(Mat); 24 25 PetscErrorCode MatViennaCLCopyToGPU(Mat A) 26 { 27 Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL *)A->spptr; 28 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 29 30 PetscFunctionBegin; 31 if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) { //some OpenCL SDKs have issues with buffers of size 0 32 if (A->offloadmask == PETSC_OFFLOAD_UNALLOCATED || A->offloadmask == PETSC_OFFLOAD_CPU) { 33 PetscCall(PetscLogEventBegin(MAT_ViennaCLCopyToGPU, A, 0, 0, 0)); 34 35 try { 36 if (a->compressedrow.use) { 37 if (!viennaclstruct->compressed_mat) viennaclstruct->compressed_mat = new ViennaCLCompressedAIJMatrix(); 38 39 // Since PetscInt is different from cl_uint, we have to convert: 40 viennacl::backend::mem_handle dummy; 41 42 viennacl::backend::typesafe_host_array<unsigned int> row_buffer; 43 row_buffer.raw_resize(dummy, a->compressedrow.nrows + 1); 44 for (PetscInt i = 0; i <= a->compressedrow.nrows; ++i) row_buffer.set(i, (a->compressedrow.i)[i]); 45 46 viennacl::backend::typesafe_host_array<unsigned int> row_indices; 47 row_indices.raw_resize(dummy, a->compressedrow.nrows); 48 for (PetscInt i = 0; i < a->compressedrow.nrows; ++i) row_indices.set(i, (a->compressedrow.rindex)[i]); 49 50 viennacl::backend::typesafe_host_array<unsigned int> col_buffer; 51 col_buffer.raw_resize(dummy, a->nz); 52 for (PetscInt i = 0; i < a->nz; ++i) col_buffer.set(i, (a->j)[i]); 53 54 viennaclstruct->compressed_mat->set(row_buffer.get(), row_indices.get(), col_buffer.get(), a->a, A->rmap->n, A->cmap->n, a->compressedrow.nrows, a->nz); 55 PetscCall(PetscLogCpuToGpu(((2 * a->compressedrow.nrows) + 1 + a->nz) * sizeof(PetscInt) + (a->nz) * sizeof(PetscScalar))); 56 } else { 57 if (!viennaclstruct->mat) viennaclstruct->mat = new ViennaCLAIJMatrix(); 58 59 // Since PetscInt is in general different from cl_uint, we have to convert: 60 viennacl::backend::mem_handle dummy; 61 62 viennacl::backend::typesafe_host_array<unsigned int> row_buffer; 63 row_buffer.raw_resize(dummy, A->rmap->n + 1); 64 for (PetscInt i = 0; i <= A->rmap->n; ++i) row_buffer.set(i, (a->i)[i]); 65 66 viennacl::backend::typesafe_host_array<unsigned int> col_buffer; 67 col_buffer.raw_resize(dummy, a->nz); 68 for (PetscInt i = 0; i < a->nz; ++i) col_buffer.set(i, (a->j)[i]); 69 70 viennaclstruct->mat->set(row_buffer.get(), col_buffer.get(), a->a, A->rmap->n, A->cmap->n, a->nz); 71 PetscCall(PetscLogCpuToGpu(((A->rmap->n + 1) + a->nz) * sizeof(PetscInt) + (a->nz) * sizeof(PetscScalar))); 72 } 73 ViennaCLWaitForGPU(); 74 } catch (std::exception const &ex) { 75 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what()); 76 } 77 78 // Create temporary vector for v += A*x: 79 if (viennaclstruct->tempvec) { 80 if (viennaclstruct->tempvec->size() != static_cast<std::size_t>(A->rmap->n)) { 81 delete (ViennaCLVector *)viennaclstruct->tempvec; 82 viennaclstruct->tempvec = new ViennaCLVector(A->rmap->n); 83 } else { 84 viennaclstruct->tempvec->clear(); 85 } 86 } else { 87 viennaclstruct->tempvec = new ViennaCLVector(A->rmap->n); 88 } 89 90 A->offloadmask = PETSC_OFFLOAD_BOTH; 91 92 PetscCall(PetscLogEventEnd(MAT_ViennaCLCopyToGPU, A, 0, 0, 0)); 93 } 94 } 95 PetscFunctionReturn(PETSC_SUCCESS); 96 } 97 98 PetscErrorCode MatViennaCLCopyFromGPU(Mat A, const ViennaCLAIJMatrix *Agpu) 99 { 100 Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL *)A->spptr; 101 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 102 PetscInt m = A->rmap->n; 103 104 PetscFunctionBegin; 105 if (A->offloadmask == PETSC_OFFLOAD_BOTH) PetscFunctionReturn(PETSC_SUCCESS); 106 if (A->offloadmask == PETSC_OFFLOAD_UNALLOCATED && Agpu) { 107 try { 108 PetscCheck(!a->compressedrow.use, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ViennaCL: Cannot handle row compression for GPU matrices"); 109 PetscCheck((PetscInt)Agpu->size1() == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "GPU matrix has %lu rows, should be %" PetscInt_FMT, Agpu->size1(), m); 110 a->nz = Agpu->nnz(); 111 a->maxnz = a->nz; /* Since we allocate exactly the right amount */ 112 A->preallocated = PETSC_TRUE; 113 if (a->free_a) PetscCall(PetscShmgetDeallocateArray((void **)a->a)); 114 if (a->free_ij) PetscCall(PetscShmgetDeallocateArray((void **)a->j)); 115 if (a->free_ij) PetscCall(PetscShmgetDeallocateArray((void **)a->i)); 116 PetscCall(PetscShmgetAllocateArray(a->nz, sizeof(PetscScalar), (void **)&a->a)); 117 PetscCall(PetscShmgetAllocateArray(a->nz, sizeof(PetscInt), (void **)&a->j)); 118 PetscCall(PetscShmgetAllocateArray(m + 1, sizeof(PetscInt), (void **)&a->i)); 119 a->free_a = PETSC_TRUE; 120 a->free_ij = PETSC_TRUE; 121 122 /* Setup row lengths */ 123 PetscCall(PetscFree(a->imax)); 124 PetscCall(PetscFree(a->ilen)); 125 PetscCall(PetscMalloc1(m, &a->imax)); 126 PetscCall(PetscMalloc1(m, &a->ilen)); 127 128 /* Copy data back from GPU */ 129 viennacl::backend::typesafe_host_array<unsigned int> row_buffer; 130 row_buffer.raw_resize(Agpu->handle1(), Agpu->size1() + 1); 131 132 // copy row array 133 viennacl::backend::memory_read(Agpu->handle1(), 0, row_buffer.raw_size(), row_buffer.get()); 134 (a->i)[0] = row_buffer[0]; 135 for (PetscInt i = 0; i < (PetscInt)Agpu->size1(); ++i) { 136 (a->i)[i + 1] = row_buffer[i + 1]; 137 a->imax[i] = a->ilen[i] = a->i[i + 1] - a->i[i]; //Set imax[] and ilen[] arrays at the same time as i[] for better cache reuse 138 } 139 140 // copy column indices 141 viennacl::backend::typesafe_host_array<unsigned int> col_buffer; 142 col_buffer.raw_resize(Agpu->handle2(), Agpu->nnz()); 143 viennacl::backend::memory_read(Agpu->handle2(), 0, col_buffer.raw_size(), col_buffer.get()); 144 for (PetscInt i = 0; i < (PetscInt)Agpu->nnz(); ++i) (a->j)[i] = col_buffer[i]; 145 146 // copy nonzero entries directly to destination (no conversion required) 147 viennacl::backend::memory_read(Agpu->handle(), 0, sizeof(PetscScalar) * Agpu->nnz(), a->a); 148 149 PetscCall(PetscLogGpuToCpu(row_buffer.raw_size() + col_buffer.raw_size() + (Agpu->nnz() * sizeof(PetscScalar)))); 150 ViennaCLWaitForGPU(); 151 } catch (std::exception const &ex) { 152 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what()); 153 } 154 } else if (A->offloadmask == PETSC_OFFLOAD_UNALLOCATED) { 155 PetscFunctionReturn(PETSC_SUCCESS); 156 } else { 157 if (!Agpu && A->offloadmask != PETSC_OFFLOAD_GPU) PetscFunctionReturn(PETSC_SUCCESS); 158 159 PetscCheck(!a->compressedrow.use, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ViennaCL: Cannot handle row compression for GPU matrices"); 160 if (!Agpu) { 161 viennacl::backend::memory_read(viennaclstruct->mat->handle(), 0, sizeof(PetscScalar) * viennaclstruct->mat->nnz(), a->a); 162 } else { 163 viennacl::backend::memory_read(Agpu->handle(), 0, sizeof(PetscScalar) * Agpu->nnz(), a->a); 164 } 165 } 166 A->offloadmask = PETSC_OFFLOAD_BOTH; 167 /* This assembly prevents resetting the flag to PETSC_OFFLOAD_CPU and recopying */ 168 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 169 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 170 PetscFunctionReturn(PETSC_SUCCESS); 171 } 172 173 static PetscErrorCode MatMult_SeqAIJViennaCL(Mat A, Vec xx, Vec yy) 174 { 175 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 176 Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL *)A->spptr; 177 const ViennaCLVector *xgpu = NULL; 178 ViennaCLVector *ygpu = NULL; 179 180 PetscFunctionBegin; 181 /* The line below is necessary due to the operations that modify the matrix on the CPU (axpy, scale, etc) */ 182 PetscCall(MatViennaCLCopyToGPU(A)); 183 if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) { 184 PetscCall(VecViennaCLGetArrayRead(xx, &xgpu)); 185 PetscCall(VecViennaCLGetArrayWrite(yy, &ygpu)); 186 PetscCall(PetscLogGpuTimeBegin()); 187 try { 188 if (a->compressedrow.use) { 189 *ygpu = viennacl::linalg::prod(*viennaclstruct->compressed_mat, *xgpu); 190 } else { 191 *ygpu = viennacl::linalg::prod(*viennaclstruct->mat, *xgpu); 192 } 193 ViennaCLWaitForGPU(); 194 } catch (std::exception const &ex) { 195 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what()); 196 } 197 PetscCall(PetscLogGpuTimeEnd()); 198 PetscCall(VecViennaCLRestoreArrayRead(xx, &xgpu)); 199 PetscCall(VecViennaCLRestoreArrayWrite(yy, &ygpu)); 200 PetscCall(PetscLogGpuFlops(2.0 * a->nz - a->nonzerorowcnt)); 201 } else { 202 PetscCall(VecSet_SeqViennaCL(yy, 0)); 203 } 204 PetscFunctionReturn(PETSC_SUCCESS); 205 } 206 207 static PetscErrorCode MatMultAdd_SeqAIJViennaCL(Mat A, Vec xx, Vec yy, Vec zz) 208 { 209 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 210 Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL *)A->spptr; 211 const ViennaCLVector *xgpu = NULL, *ygpu = NULL; 212 ViennaCLVector *zgpu = NULL; 213 214 PetscFunctionBegin; 215 /* The line below is necessary due to the operations that modify the matrix on the CPU (axpy, scale, etc) */ 216 PetscCall(MatViennaCLCopyToGPU(A)); 217 if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) { 218 try { 219 PetscCall(VecViennaCLGetArrayRead(xx, &xgpu)); 220 PetscCall(VecViennaCLGetArrayRead(yy, &ygpu)); 221 PetscCall(VecViennaCLGetArrayWrite(zz, &zgpu)); 222 PetscCall(PetscLogGpuTimeBegin()); 223 if (a->compressedrow.use) *viennaclstruct->tempvec = viennacl::linalg::prod(*viennaclstruct->compressed_mat, *xgpu); 224 else *viennaclstruct->tempvec = viennacl::linalg::prod(*viennaclstruct->mat, *xgpu); 225 if (zz != yy) *zgpu = *ygpu + *viennaclstruct->tempvec; 226 else *zgpu += *viennaclstruct->tempvec; 227 ViennaCLWaitForGPU(); 228 PetscCall(PetscLogGpuTimeEnd()); 229 PetscCall(VecViennaCLRestoreArrayRead(xx, &xgpu)); 230 PetscCall(VecViennaCLRestoreArrayRead(yy, &ygpu)); 231 PetscCall(VecViennaCLRestoreArrayWrite(zz, &zgpu)); 232 233 } catch (std::exception const &ex) { 234 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what()); 235 } 236 PetscCall(PetscLogGpuFlops(2.0 * a->nz)); 237 } else { 238 PetscCall(VecCopy_SeqViennaCL(yy, zz)); 239 } 240 PetscFunctionReturn(PETSC_SUCCESS); 241 } 242 243 static PetscErrorCode MatAssemblyEnd_SeqAIJViennaCL(Mat A, MatAssemblyType mode) 244 { 245 PetscFunctionBegin; 246 PetscCall(MatAssemblyEnd_SeqAIJ(A, mode)); 247 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(PETSC_SUCCESS); 248 if (!A->boundtocpu) PetscCall(MatViennaCLCopyToGPU(A)); 249 PetscFunctionReturn(PETSC_SUCCESS); 250 } 251 252 /* --------------------------------------------------------------------------------*/ 253 /*@C 254 MatCreateSeqAIJViennaCL - Creates a sparse matrix in `MATSEQAIJVIENNACL` (compressed row) format 255 (the default parallel PETSc format). This matrix will ultimately be pushed down 256 to GPUs and use the ViennaCL library for calculations. 257 258 Collective 259 260 Input Parameters: 261 + comm - MPI communicator, set to `PETSC_COMM_SELF` 262 . m - number of rows 263 . n - number of columns 264 . nz - number of nonzeros per row (same for all rows), ignored if `nnz` is set 265 - nnz - array containing the number of nonzeros in the various rows (possibly different for each row) or `NULL` 266 267 Output Parameter: 268 . A - the matrix 269 270 It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 271 MatXXXXSetPreallocation() paradigm instead of this routine directly. 272 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 273 274 Notes: 275 The AIJ format, also called 276 compressed row storage, is fully compatible with standard Fortran 277 storage. That is, the stored row and column indices can begin at 278 either one (as in Fortran) or zero. 279 280 Specify the preallocated storage with either `nz` or `nnz` (not both). 281 Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory 282 allocation. 283 284 Level: intermediate 285 286 .seealso: `MATSEQAIJVIENNACL`, `MatCreate()`, `MatCreateAIJ()`, `MatCreateAIJCUSPARSE()`, `MatSetValues()`, `MatSeqAIJSetColumnIndices()`, `MatCreateSeqAIJWithArrays()` 287 @*/ 288 PetscErrorCode MatCreateSeqAIJViennaCL(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A) 289 { 290 PetscFunctionBegin; 291 PetscCall(MatCreate(comm, A)); 292 PetscCall(MatSetSizes(*A, m, n, m, n)); 293 PetscCall(MatSetType(*A, MATSEQAIJVIENNACL)); 294 PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(*A, nz, (PetscInt *)nnz)); 295 PetscFunctionReturn(PETSC_SUCCESS); 296 } 297 298 static PetscErrorCode MatDestroy_SeqAIJViennaCL(Mat A) 299 { 300 Mat_SeqAIJViennaCL *viennaclcontainer = (Mat_SeqAIJViennaCL *)A->spptr; 301 302 PetscFunctionBegin; 303 try { 304 if (viennaclcontainer) { 305 delete viennaclcontainer->tempvec; 306 delete viennaclcontainer->mat; 307 delete viennaclcontainer->compressed_mat; 308 delete viennaclcontainer; 309 } 310 A->offloadmask = PETSC_OFFLOAD_UNALLOCATED; 311 } catch (std::exception const &ex) { 312 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what()); 313 } 314 315 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqaij_seqaijviennacl_C", NULL)); 316 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaijviennacl_seqdense_C", NULL)); 317 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaijviennacl_seqaij_C", NULL)); 318 319 /* this next line is because MatDestroy tries to PetscFree spptr if it is not zero, and PetscFree only works if the memory was allocated with PetscNew or PetscMalloc, which don't call the constructor */ 320 A->spptr = 0; 321 PetscCall(MatDestroy_SeqAIJ(A)); 322 PetscFunctionReturn(PETSC_SUCCESS); 323 } 324 325 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJViennaCL(Mat B) 326 { 327 PetscFunctionBegin; 328 PetscCall(MatCreate_SeqAIJ(B)); 329 PetscCall(MatConvert_SeqAIJ_SeqAIJViennaCL(B, MATSEQAIJVIENNACL, MAT_INPLACE_MATRIX, &B)); 330 PetscFunctionReturn(PETSC_SUCCESS); 331 } 332 333 static PetscErrorCode MatBindToCPU_SeqAIJViennaCL(Mat, PetscBool); 334 static PetscErrorCode MatDuplicate_SeqAIJViennaCL(Mat A, MatDuplicateOption cpvalues, Mat *B) 335 { 336 Mat C; 337 338 PetscFunctionBegin; 339 PetscCall(MatDuplicate_SeqAIJ(A, cpvalues, B)); 340 C = *B; 341 342 PetscCall(MatBindToCPU_SeqAIJViennaCL(A, PETSC_FALSE)); 343 C->ops->bindtocpu = MatBindToCPU_SeqAIJViennaCL; 344 345 C->spptr = new Mat_SeqAIJViennaCL(); 346 ((Mat_SeqAIJViennaCL *)C->spptr)->tempvec = NULL; 347 ((Mat_SeqAIJViennaCL *)C->spptr)->mat = NULL; 348 ((Mat_SeqAIJViennaCL *)C->spptr)->compressed_mat = NULL; 349 350 PetscCall(PetscObjectChangeTypeName((PetscObject)C, MATSEQAIJVIENNACL)); 351 352 C->offloadmask = PETSC_OFFLOAD_UNALLOCATED; 353 354 /* If the source matrix is already assembled, copy the destination matrix to the GPU */ 355 if (C->assembled) PetscCall(MatViennaCLCopyToGPU(C)); 356 PetscFunctionReturn(PETSC_SUCCESS); 357 } 358 359 static PetscErrorCode MatSeqAIJGetArray_SeqAIJViennaCL(Mat A, PetscScalar *array[]) 360 { 361 PetscFunctionBegin; 362 PetscCall(MatViennaCLCopyFromGPU(A, (const ViennaCLAIJMatrix *)NULL)); 363 *array = ((Mat_SeqAIJ *)A->data)->a; 364 PetscFunctionReturn(PETSC_SUCCESS); 365 } 366 367 static PetscErrorCode MatSeqAIJRestoreArray_SeqAIJViennaCL(Mat A, PetscScalar *array[]) 368 { 369 PetscFunctionBegin; 370 A->offloadmask = PETSC_OFFLOAD_CPU; 371 *array = NULL; 372 PetscFunctionReturn(PETSC_SUCCESS); 373 } 374 375 static PetscErrorCode MatSeqAIJGetArrayRead_SeqAIJViennaCL(Mat A, const PetscScalar *array[]) 376 { 377 PetscFunctionBegin; 378 PetscCall(MatViennaCLCopyFromGPU(A, (const ViennaCLAIJMatrix *)NULL)); 379 *array = ((Mat_SeqAIJ *)A->data)->a; 380 PetscFunctionReturn(PETSC_SUCCESS); 381 } 382 383 static PetscErrorCode MatSeqAIJRestoreArrayRead_SeqAIJViennaCL(Mat A, const PetscScalar *array[]) 384 { 385 PetscFunctionBegin; 386 *array = NULL; 387 /* No A->offloadmask = PETSC_OFFLOAD_CPU since if A->offloadmask was PETSC_OFFLOAD_BOTH, it is still BOTH */ 388 PetscFunctionReturn(PETSC_SUCCESS); 389 } 390 391 static PetscErrorCode MatSeqAIJGetArrayWrite_SeqAIJViennaCL(Mat A, PetscScalar *array[]) 392 { 393 PetscFunctionBegin; 394 *array = ((Mat_SeqAIJ *)A->data)->a; 395 PetscFunctionReturn(PETSC_SUCCESS); 396 } 397 398 static PetscErrorCode MatSeqAIJRestoreArrayWrite_SeqAIJViennaCL(Mat A, PetscScalar *array[]) 399 { 400 PetscFunctionBegin; 401 A->offloadmask = PETSC_OFFLOAD_CPU; 402 *array = NULL; 403 PetscFunctionReturn(PETSC_SUCCESS); 404 } 405 406 static PetscErrorCode MatBindToCPU_SeqAIJViennaCL(Mat A, PetscBool flg) 407 { 408 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 409 410 PetscFunctionBegin; 411 A->boundtocpu = flg; 412 if (flg && a->inode.size_csr) { 413 a->inode.use = PETSC_TRUE; 414 } else { 415 a->inode.use = PETSC_FALSE; 416 } 417 if (flg) { 418 /* make sure we have an up-to-date copy on the CPU */ 419 PetscCall(MatViennaCLCopyFromGPU(A, (const ViennaCLAIJMatrix *)NULL)); 420 A->ops->mult = MatMult_SeqAIJ; 421 A->ops->multadd = MatMultAdd_SeqAIJ; 422 A->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 423 A->ops->duplicate = MatDuplicate_SeqAIJ; 424 PetscCall(PetscMemzero(a->ops, sizeof(Mat_SeqAIJOps))); 425 } else { 426 A->ops->mult = MatMult_SeqAIJViennaCL; 427 A->ops->multadd = MatMultAdd_SeqAIJViennaCL; 428 A->ops->assemblyend = MatAssemblyEnd_SeqAIJViennaCL; 429 A->ops->destroy = MatDestroy_SeqAIJViennaCL; 430 A->ops->duplicate = MatDuplicate_SeqAIJViennaCL; 431 432 a->ops->getarray = MatSeqAIJGetArray_SeqAIJViennaCL; 433 a->ops->restorearray = MatSeqAIJRestoreArray_SeqAIJViennaCL; 434 a->ops->getarrayread = MatSeqAIJGetArrayRead_SeqAIJViennaCL; 435 a->ops->restorearrayread = MatSeqAIJRestoreArrayRead_SeqAIJViennaCL; 436 a->ops->getarraywrite = MatSeqAIJGetArrayWrite_SeqAIJViennaCL; 437 a->ops->restorearraywrite = MatSeqAIJRestoreArrayWrite_SeqAIJViennaCL; 438 } 439 PetscFunctionReturn(PETSC_SUCCESS); 440 } 441 442 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJViennaCL(Mat A, MatType type, MatReuse reuse, Mat *newmat) 443 { 444 Mat B; 445 446 PetscFunctionBegin; 447 PetscCheck(reuse != MAT_REUSE_MATRIX, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MAT_REUSE_MATRIX is not supported. Consider using MAT_INPLACE_MATRIX instead"); 448 449 if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, newmat)); 450 451 B = *newmat; 452 453 B->spptr = new Mat_SeqAIJViennaCL(); 454 455 ((Mat_SeqAIJViennaCL *)B->spptr)->tempvec = NULL; 456 ((Mat_SeqAIJViennaCL *)B->spptr)->mat = NULL; 457 ((Mat_SeqAIJViennaCL *)B->spptr)->compressed_mat = NULL; 458 459 PetscCall(MatBindToCPU_SeqAIJViennaCL(A, PETSC_FALSE)); 460 A->ops->bindtocpu = MatBindToCPU_SeqAIJViennaCL; 461 462 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJVIENNACL)); 463 PetscCall(PetscFree(B->defaultvectype)); 464 PetscCall(PetscStrallocpy(VECVIENNACL, &B->defaultvectype)); 465 466 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqaij_seqaijviennacl_C", MatConvert_SeqAIJ_SeqAIJViennaCL)); 467 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaijviennacl_seqdense_C", MatProductSetFromOptions_SeqAIJ_SeqDense)); 468 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaijviennacl_seqaij_C", MatProductSetFromOptions_SeqAIJ)); 469 470 B->offloadmask = PETSC_OFFLOAD_UNALLOCATED; 471 472 /* If the source matrix is already assembled, copy the destination matrix to the GPU */ 473 if (B->assembled) PetscCall(MatViennaCLCopyToGPU(B)); 474 PetscFunctionReturn(PETSC_SUCCESS); 475 } 476 477 /*MC 478 MATSEQAIJVIENNACL - MATAIJVIENNACL = "aijviennacl" = "seqaijviennacl" - A matrix type to be used for sparse matrices. 479 480 A matrix type whose data resides on GPUs. These matrices are in CSR format by 481 default. All matrix calculations are performed using the ViennaCL library. 482 483 Options Database Keys: 484 + -mat_type aijviennacl - sets the matrix type to `MATSEQAIJVIENNACL` during a call to `MatSetFromOptions() 485 . -mat_viennacl_storage_format csr - sets the storage format of matrices for `MatMult()` during a call to `MatSetFromOptions()`. 486 - -mat_viennacl_mult_storage_format csr - sets the storage format of matrices for `MatMult()` during a call to `MatSetFromOptions()`. 487 488 Level: beginner 489 490 .seealso: `MatCreateSeqAIJViennaCL()`, `MATAIJVIENNACL`, `MatCreateAIJViennaCL()` 491 M*/ 492 493 PETSC_INTERN PetscErrorCode MatSolverTypeRegister_ViennaCL(void) 494 { 495 PetscFunctionBegin; 496 PetscCall(MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_LU, MatGetFactor_seqaij_petsc)); 497 PetscCall(MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_CHOLESKY, MatGetFactor_seqaij_petsc)); 498 PetscCall(MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_ILU, MatGetFactor_seqaij_petsc)); 499 PetscCall(MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_ICC, MatGetFactor_seqaij_petsc)); 500 PetscFunctionReturn(PETSC_SUCCESS); 501 } 502