1 /* 2 Defines the basic matrix operations for the AIJ (compressed row) 3 matrix storage format. 4 */ 5 6 #include <petscconf.h> 7 #define PETSC_SKIP_IMMINTRIN_H_CUDAWORKAROUND 1 8 #include <../src/mat/impls/aij/seq/aij.h> /*I "petscmat.h" I*/ 9 #include <petscbt.h> 10 #include <../src/vec/vec/impls/dvecimpl.h> 11 #include <petsc/private/vecimpl.h> 12 13 #include <../src/mat/impls/aij/seq/seqviennacl/viennaclmatimpl.h> 14 15 #include <algorithm> 16 #include <vector> 17 #include <string> 18 19 #include "viennacl/linalg/prod.hpp" 20 21 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJViennaCL(Mat A, MatType type, MatReuse reuse, Mat *newmat); 22 PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat, MatFactorType, Mat *); 23 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqDense(Mat); 24 25 PetscErrorCode MatViennaCLCopyToGPU(Mat A) 26 { 27 Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL *)A->spptr; 28 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 29 30 PetscFunctionBegin; 31 if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) { //some OpenCL SDKs have issues with buffers of size 0 32 if (A->offloadmask == PETSC_OFFLOAD_UNALLOCATED || A->offloadmask == PETSC_OFFLOAD_CPU) { 33 PetscCall(PetscLogEventBegin(MAT_ViennaCLCopyToGPU, A, 0, 0, 0)); 34 35 try { 36 if (a->compressedrow.use) { 37 if (!viennaclstruct->compressed_mat) viennaclstruct->compressed_mat = new ViennaCLCompressedAIJMatrix(); 38 39 // Since PetscInt is different from cl_uint, we have to convert: 40 viennacl::backend::mem_handle dummy; 41 42 viennacl::backend::typesafe_host_array<unsigned int> row_buffer; 43 row_buffer.raw_resize(dummy, a->compressedrow.nrows + 1); 44 for (PetscInt i = 0; i <= a->compressedrow.nrows; ++i) row_buffer.set(i, (a->compressedrow.i)[i]); 45 46 viennacl::backend::typesafe_host_array<unsigned int> row_indices; 47 row_indices.raw_resize(dummy, a->compressedrow.nrows); 48 for (PetscInt i = 0; i < a->compressedrow.nrows; ++i) row_indices.set(i, (a->compressedrow.rindex)[i]); 49 50 viennacl::backend::typesafe_host_array<unsigned int> col_buffer; 51 col_buffer.raw_resize(dummy, a->nz); 52 for (PetscInt i = 0; i < a->nz; ++i) col_buffer.set(i, (a->j)[i]); 53 54 viennaclstruct->compressed_mat->set(row_buffer.get(), row_indices.get(), col_buffer.get(), a->a, A->rmap->n, A->cmap->n, a->compressedrow.nrows, a->nz); 55 PetscCall(PetscLogCpuToGpu(((2 * a->compressedrow.nrows) + 1 + a->nz) * sizeof(PetscInt) + (a->nz) * sizeof(PetscScalar))); 56 } else { 57 if (!viennaclstruct->mat) viennaclstruct->mat = new ViennaCLAIJMatrix(); 58 59 // Since PetscInt is in general different from cl_uint, we have to convert: 60 viennacl::backend::mem_handle dummy; 61 62 viennacl::backend::typesafe_host_array<unsigned int> row_buffer; 63 row_buffer.raw_resize(dummy, A->rmap->n + 1); 64 for (PetscInt i = 0; i <= A->rmap->n; ++i) row_buffer.set(i, (a->i)[i]); 65 66 viennacl::backend::typesafe_host_array<unsigned int> col_buffer; 67 col_buffer.raw_resize(dummy, a->nz); 68 for (PetscInt i = 0; i < a->nz; ++i) col_buffer.set(i, (a->j)[i]); 69 70 viennaclstruct->mat->set(row_buffer.get(), col_buffer.get(), a->a, A->rmap->n, A->cmap->n, a->nz); 71 PetscCall(PetscLogCpuToGpu(((A->rmap->n + 1) + a->nz) * sizeof(PetscInt) + (a->nz) * sizeof(PetscScalar))); 72 } 73 ViennaCLWaitForGPU(); 74 } catch (std::exception const &ex) { 75 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what()); 76 } 77 78 // Create temporary vector for v += A*x: 79 if (viennaclstruct->tempvec) { 80 if (viennaclstruct->tempvec->size() != static_cast<std::size_t>(A->rmap->n)) { 81 delete (ViennaCLVector *)viennaclstruct->tempvec; 82 viennaclstruct->tempvec = new ViennaCLVector(A->rmap->n); 83 } else { 84 viennaclstruct->tempvec->clear(); 85 } 86 } else { 87 viennaclstruct->tempvec = new ViennaCLVector(A->rmap->n); 88 } 89 90 A->offloadmask = PETSC_OFFLOAD_BOTH; 91 92 PetscCall(PetscLogEventEnd(MAT_ViennaCLCopyToGPU, A, 0, 0, 0)); 93 } 94 } 95 PetscFunctionReturn(PETSC_SUCCESS); 96 } 97 98 PetscErrorCode MatViennaCLCopyFromGPU(Mat A, const ViennaCLAIJMatrix *Agpu) 99 { 100 Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL *)A->spptr; 101 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 102 PetscInt m = A->rmap->n; 103 104 PetscFunctionBegin; 105 if (A->offloadmask == PETSC_OFFLOAD_BOTH) PetscFunctionReturn(PETSC_SUCCESS); 106 if (A->offloadmask == PETSC_OFFLOAD_UNALLOCATED && Agpu) { 107 try { 108 PetscCheck(!a->compressedrow.use, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ViennaCL: Cannot handle row compression for GPU matrices"); 109 PetscCheck((PetscInt)Agpu->size1() == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "GPU matrix has %lu rows, should be %" PetscInt_FMT, Agpu->size1(), m); 110 a->nz = Agpu->nnz(); 111 a->maxnz = a->nz; /* Since we allocate exactly the right amount */ 112 A->preallocated = PETSC_TRUE; 113 if (a->free_a) PetscCall(PetscShmgetDeallocateArray((void **)a->a)); 114 if (a->free_ij) PetscCall(PetscShmgetDeallocateArray((void **)a->j)); 115 if (a->free_ij) PetscCall(PetscShmgetDeallocateArray((void **)a->i)); 116 PetscCall(PetscShmgetAllocateArray(a->nz, sizeof(PetscScalar), (void **)&a->a)); 117 PetscCall(PetscShmgetAllocateArray(a->nz, sizeof(PetscInt), (void **)&a->j)); 118 PetscCall(PetscShmgetAllocateArray(m + 1, sizeof(PetscInt), (void **)&a->i)); 119 a->free_a = PETSC_TRUE; 120 a->free_ij = PETSC_TRUE; 121 122 /* Setup row lengths */ 123 PetscCall(PetscFree(a->imax)); 124 PetscCall(PetscFree(a->ilen)); 125 PetscCall(PetscMalloc1(m, &a->imax)); 126 PetscCall(PetscMalloc1(m, &a->ilen)); 127 128 /* Copy data back from GPU */ 129 viennacl::backend::typesafe_host_array<unsigned int> row_buffer; 130 row_buffer.raw_resize(Agpu->handle1(), Agpu->size1() + 1); 131 132 // copy row array 133 viennacl::backend::memory_read(Agpu->handle1(), 0, row_buffer.raw_size(), row_buffer.get()); 134 (a->i)[0] = row_buffer[0]; 135 for (PetscInt i = 0; i < (PetscInt)Agpu->size1(); ++i) { 136 (a->i)[i + 1] = row_buffer[i + 1]; 137 a->imax[i] = a->ilen[i] = a->i[i + 1] - a->i[i]; //Set imax[] and ilen[] arrays at the same time as i[] for better cache reuse 138 } 139 140 // copy column indices 141 viennacl::backend::typesafe_host_array<unsigned int> col_buffer; 142 col_buffer.raw_resize(Agpu->handle2(), Agpu->nnz()); 143 viennacl::backend::memory_read(Agpu->handle2(), 0, col_buffer.raw_size(), col_buffer.get()); 144 for (PetscInt i = 0; i < (PetscInt)Agpu->nnz(); ++i) (a->j)[i] = col_buffer[i]; 145 146 // copy nonzero entries directly to destination (no conversion required) 147 viennacl::backend::memory_read(Agpu->handle(), 0, sizeof(PetscScalar) * Agpu->nnz(), a->a); 148 149 PetscCall(PetscLogGpuToCpu(row_buffer.raw_size() + col_buffer.raw_size() + (Agpu->nnz() * sizeof(PetscScalar)))); 150 ViennaCLWaitForGPU(); 151 /* TODO: Once a->diag is moved out of MatAssemblyEnd(), invalidate it here. */ 152 } catch (std::exception const &ex) { 153 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what()); 154 } 155 } else if (A->offloadmask == PETSC_OFFLOAD_UNALLOCATED) { 156 PetscFunctionReturn(PETSC_SUCCESS); 157 } else { 158 if (!Agpu && A->offloadmask != PETSC_OFFLOAD_GPU) PetscFunctionReturn(PETSC_SUCCESS); 159 160 PetscCheck(!a->compressedrow.use, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ViennaCL: Cannot handle row compression for GPU matrices"); 161 if (!Agpu) { 162 viennacl::backend::memory_read(viennaclstruct->mat->handle(), 0, sizeof(PetscScalar) * viennaclstruct->mat->nnz(), a->a); 163 } else { 164 viennacl::backend::memory_read(Agpu->handle(), 0, sizeof(PetscScalar) * Agpu->nnz(), a->a); 165 } 166 } 167 A->offloadmask = PETSC_OFFLOAD_BOTH; 168 /* This assembly prevents resetting the flag to PETSC_OFFLOAD_CPU and recopying */ 169 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 170 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 171 PetscFunctionReturn(PETSC_SUCCESS); 172 } 173 174 static PetscErrorCode MatMult_SeqAIJViennaCL(Mat A, Vec xx, Vec yy) 175 { 176 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 177 Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL *)A->spptr; 178 const ViennaCLVector *xgpu = NULL; 179 ViennaCLVector *ygpu = NULL; 180 181 PetscFunctionBegin; 182 /* The line below is necessary due to the operations that modify the matrix on the CPU (axpy, scale, etc) */ 183 PetscCall(MatViennaCLCopyToGPU(A)); 184 if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) { 185 PetscCall(VecViennaCLGetArrayRead(xx, &xgpu)); 186 PetscCall(VecViennaCLGetArrayWrite(yy, &ygpu)); 187 PetscCall(PetscLogGpuTimeBegin()); 188 try { 189 if (a->compressedrow.use) { 190 *ygpu = viennacl::linalg::prod(*viennaclstruct->compressed_mat, *xgpu); 191 } else { 192 *ygpu = viennacl::linalg::prod(*viennaclstruct->mat, *xgpu); 193 } 194 ViennaCLWaitForGPU(); 195 } catch (std::exception const &ex) { 196 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what()); 197 } 198 PetscCall(PetscLogGpuTimeEnd()); 199 PetscCall(VecViennaCLRestoreArrayRead(xx, &xgpu)); 200 PetscCall(VecViennaCLRestoreArrayWrite(yy, &ygpu)); 201 PetscCall(PetscLogGpuFlops(2.0 * a->nz - a->nonzerorowcnt)); 202 } else { 203 PetscCall(VecSet_SeqViennaCL(yy, 0)); 204 } 205 PetscFunctionReturn(PETSC_SUCCESS); 206 } 207 208 static PetscErrorCode MatMultAdd_SeqAIJViennaCL(Mat A, Vec xx, Vec yy, Vec zz) 209 { 210 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 211 Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL *)A->spptr; 212 const ViennaCLVector *xgpu = NULL, *ygpu = NULL; 213 ViennaCLVector *zgpu = NULL; 214 215 PetscFunctionBegin; 216 /* The line below is necessary due to the operations that modify the matrix on the CPU (axpy, scale, etc) */ 217 PetscCall(MatViennaCLCopyToGPU(A)); 218 if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) { 219 try { 220 PetscCall(VecViennaCLGetArrayRead(xx, &xgpu)); 221 PetscCall(VecViennaCLGetArrayRead(yy, &ygpu)); 222 PetscCall(VecViennaCLGetArrayWrite(zz, &zgpu)); 223 PetscCall(PetscLogGpuTimeBegin()); 224 if (a->compressedrow.use) *viennaclstruct->tempvec = viennacl::linalg::prod(*viennaclstruct->compressed_mat, *xgpu); 225 else *viennaclstruct->tempvec = viennacl::linalg::prod(*viennaclstruct->mat, *xgpu); 226 if (zz != yy) *zgpu = *ygpu + *viennaclstruct->tempvec; 227 else *zgpu += *viennaclstruct->tempvec; 228 ViennaCLWaitForGPU(); 229 PetscCall(PetscLogGpuTimeEnd()); 230 PetscCall(VecViennaCLRestoreArrayRead(xx, &xgpu)); 231 PetscCall(VecViennaCLRestoreArrayRead(yy, &ygpu)); 232 PetscCall(VecViennaCLRestoreArrayWrite(zz, &zgpu)); 233 234 } catch (std::exception const &ex) { 235 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what()); 236 } 237 PetscCall(PetscLogGpuFlops(2.0 * a->nz)); 238 } else { 239 PetscCall(VecCopy_SeqViennaCL(yy, zz)); 240 } 241 PetscFunctionReturn(PETSC_SUCCESS); 242 } 243 244 static PetscErrorCode MatAssemblyEnd_SeqAIJViennaCL(Mat A, MatAssemblyType mode) 245 { 246 PetscFunctionBegin; 247 PetscCall(MatAssemblyEnd_SeqAIJ(A, mode)); 248 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(PETSC_SUCCESS); 249 if (!A->boundtocpu) PetscCall(MatViennaCLCopyToGPU(A)); 250 PetscFunctionReturn(PETSC_SUCCESS); 251 } 252 253 /* --------------------------------------------------------------------------------*/ 254 /*@C 255 MatCreateSeqAIJViennaCL - Creates a sparse matrix in `MATSEQAIJVIENNACL` (compressed row) format 256 (the default parallel PETSc format). This matrix will ultimately be pushed down 257 to GPUs and use the ViennaCL library for calculations. 258 259 Collective 260 261 Input Parameters: 262 + comm - MPI communicator, set to `PETSC_COMM_SELF` 263 . m - number of rows 264 . n - number of columns 265 . nz - number of nonzeros per row (same for all rows), ignored if `nnz` is set 266 - nnz - array containing the number of nonzeros in the various rows (possibly different for each row) or `NULL` 267 268 Output Parameter: 269 . A - the matrix 270 271 It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 272 MatXXXXSetPreallocation() paradigm instead of this routine directly. 273 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 274 275 Notes: 276 The AIJ format, also called 277 compressed row storage, is fully compatible with standard Fortran 278 storage. That is, the stored row and column indices can begin at 279 either one (as in Fortran) or zero. 280 281 Specify the preallocated storage with either `nz` or `nnz` (not both). 282 Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory 283 allocation. 284 285 Level: intermediate 286 287 .seealso: `MATSEQAIJVIENNACL`, `MatCreate()`, `MatCreateAIJ()`, `MatCreateAIJCUSPARSE()`, `MatSetValues()`, `MatSeqAIJSetColumnIndices()`, `MatCreateSeqAIJWithArrays()` 288 @*/ 289 PetscErrorCode MatCreateSeqAIJViennaCL(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A) 290 { 291 PetscFunctionBegin; 292 PetscCall(MatCreate(comm, A)); 293 PetscCall(MatSetSizes(*A, m, n, m, n)); 294 PetscCall(MatSetType(*A, MATSEQAIJVIENNACL)); 295 PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(*A, nz, (PetscInt *)nnz)); 296 PetscFunctionReturn(PETSC_SUCCESS); 297 } 298 299 static PetscErrorCode MatDestroy_SeqAIJViennaCL(Mat A) 300 { 301 Mat_SeqAIJViennaCL *viennaclcontainer = (Mat_SeqAIJViennaCL *)A->spptr; 302 303 PetscFunctionBegin; 304 try { 305 if (viennaclcontainer) { 306 delete viennaclcontainer->tempvec; 307 delete viennaclcontainer->mat; 308 delete viennaclcontainer->compressed_mat; 309 delete viennaclcontainer; 310 } 311 A->offloadmask = PETSC_OFFLOAD_UNALLOCATED; 312 } catch (std::exception const &ex) { 313 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what()); 314 } 315 316 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqaij_seqaijviennacl_C", NULL)); 317 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaijviennacl_seqdense_C", NULL)); 318 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaijviennacl_seqaij_C", NULL)); 319 320 /* this next line is because MatDestroy tries to PetscFree spptr if it is not zero, and PetscFree only works if the memory was allocated with PetscNew or PetscMalloc, which don't call the constructor */ 321 A->spptr = 0; 322 PetscCall(MatDestroy_SeqAIJ(A)); 323 PetscFunctionReturn(PETSC_SUCCESS); 324 } 325 326 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJViennaCL(Mat B) 327 { 328 PetscFunctionBegin; 329 PetscCall(MatCreate_SeqAIJ(B)); 330 PetscCall(MatConvert_SeqAIJ_SeqAIJViennaCL(B, MATSEQAIJVIENNACL, MAT_INPLACE_MATRIX, &B)); 331 PetscFunctionReturn(PETSC_SUCCESS); 332 } 333 334 static PetscErrorCode MatBindToCPU_SeqAIJViennaCL(Mat, PetscBool); 335 static PetscErrorCode MatDuplicate_SeqAIJViennaCL(Mat A, MatDuplicateOption cpvalues, Mat *B) 336 { 337 Mat C; 338 339 PetscFunctionBegin; 340 PetscCall(MatDuplicate_SeqAIJ(A, cpvalues, B)); 341 C = *B; 342 343 PetscCall(MatBindToCPU_SeqAIJViennaCL(A, PETSC_FALSE)); 344 C->ops->bindtocpu = MatBindToCPU_SeqAIJViennaCL; 345 346 C->spptr = new Mat_SeqAIJViennaCL(); 347 ((Mat_SeqAIJViennaCL *)C->spptr)->tempvec = NULL; 348 ((Mat_SeqAIJViennaCL *)C->spptr)->mat = NULL; 349 ((Mat_SeqAIJViennaCL *)C->spptr)->compressed_mat = NULL; 350 351 PetscCall(PetscObjectChangeTypeName((PetscObject)C, MATSEQAIJVIENNACL)); 352 353 C->offloadmask = PETSC_OFFLOAD_UNALLOCATED; 354 355 /* If the source matrix is already assembled, copy the destination matrix to the GPU */ 356 if (C->assembled) PetscCall(MatViennaCLCopyToGPU(C)); 357 PetscFunctionReturn(PETSC_SUCCESS); 358 } 359 360 static PetscErrorCode MatSeqAIJGetArray_SeqAIJViennaCL(Mat A, PetscScalar *array[]) 361 { 362 PetscFunctionBegin; 363 PetscCall(MatViennaCLCopyFromGPU(A, (const ViennaCLAIJMatrix *)NULL)); 364 *array = ((Mat_SeqAIJ *)A->data)->a; 365 PetscFunctionReturn(PETSC_SUCCESS); 366 } 367 368 static PetscErrorCode MatSeqAIJRestoreArray_SeqAIJViennaCL(Mat A, PetscScalar *array[]) 369 { 370 PetscFunctionBegin; 371 A->offloadmask = PETSC_OFFLOAD_CPU; 372 *array = NULL; 373 PetscFunctionReturn(PETSC_SUCCESS); 374 } 375 376 static PetscErrorCode MatSeqAIJGetArrayRead_SeqAIJViennaCL(Mat A, const PetscScalar *array[]) 377 { 378 PetscFunctionBegin; 379 PetscCall(MatViennaCLCopyFromGPU(A, (const ViennaCLAIJMatrix *)NULL)); 380 *array = ((Mat_SeqAIJ *)A->data)->a; 381 PetscFunctionReturn(PETSC_SUCCESS); 382 } 383 384 static PetscErrorCode MatSeqAIJRestoreArrayRead_SeqAIJViennaCL(Mat A, const PetscScalar *array[]) 385 { 386 PetscFunctionBegin; 387 *array = NULL; 388 /* No A->offloadmask = PETSC_OFFLOAD_CPU since if A->offloadmask was PETSC_OFFLOAD_BOTH, it is still BOTH */ 389 PetscFunctionReturn(PETSC_SUCCESS); 390 } 391 392 static PetscErrorCode MatSeqAIJGetArrayWrite_SeqAIJViennaCL(Mat A, PetscScalar *array[]) 393 { 394 PetscFunctionBegin; 395 *array = ((Mat_SeqAIJ *)A->data)->a; 396 PetscFunctionReturn(PETSC_SUCCESS); 397 } 398 399 static PetscErrorCode MatSeqAIJRestoreArrayWrite_SeqAIJViennaCL(Mat A, PetscScalar *array[]) 400 { 401 PetscFunctionBegin; 402 A->offloadmask = PETSC_OFFLOAD_CPU; 403 *array = NULL; 404 PetscFunctionReturn(PETSC_SUCCESS); 405 } 406 407 static PetscErrorCode MatBindToCPU_SeqAIJViennaCL(Mat A, PetscBool flg) 408 { 409 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 410 411 PetscFunctionBegin; 412 A->boundtocpu = flg; 413 if (flg && a->inode.size) { 414 a->inode.use = PETSC_TRUE; 415 } else { 416 a->inode.use = PETSC_FALSE; 417 } 418 if (flg) { 419 /* make sure we have an up-to-date copy on the CPU */ 420 PetscCall(MatViennaCLCopyFromGPU(A, (const ViennaCLAIJMatrix *)NULL)); 421 A->ops->mult = MatMult_SeqAIJ; 422 A->ops->multadd = MatMultAdd_SeqAIJ; 423 A->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 424 A->ops->duplicate = MatDuplicate_SeqAIJ; 425 PetscCall(PetscMemzero(a->ops, sizeof(Mat_SeqAIJOps))); 426 } else { 427 A->ops->mult = MatMult_SeqAIJViennaCL; 428 A->ops->multadd = MatMultAdd_SeqAIJViennaCL; 429 A->ops->assemblyend = MatAssemblyEnd_SeqAIJViennaCL; 430 A->ops->destroy = MatDestroy_SeqAIJViennaCL; 431 A->ops->duplicate = MatDuplicate_SeqAIJViennaCL; 432 433 a->ops->getarray = MatSeqAIJGetArray_SeqAIJViennaCL; 434 a->ops->restorearray = MatSeqAIJRestoreArray_SeqAIJViennaCL; 435 a->ops->getarrayread = MatSeqAIJGetArrayRead_SeqAIJViennaCL; 436 a->ops->restorearrayread = MatSeqAIJRestoreArrayRead_SeqAIJViennaCL; 437 a->ops->getarraywrite = MatSeqAIJGetArrayWrite_SeqAIJViennaCL; 438 a->ops->restorearraywrite = MatSeqAIJRestoreArrayWrite_SeqAIJViennaCL; 439 } 440 PetscFunctionReturn(PETSC_SUCCESS); 441 } 442 443 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJViennaCL(Mat A, MatType type, MatReuse reuse, Mat *newmat) 444 { 445 Mat B; 446 447 PetscFunctionBegin; 448 PetscCheck(reuse != MAT_REUSE_MATRIX, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MAT_REUSE_MATRIX is not supported. Consider using MAT_INPLACE_MATRIX instead"); 449 450 if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, newmat)); 451 452 B = *newmat; 453 454 B->spptr = new Mat_SeqAIJViennaCL(); 455 456 ((Mat_SeqAIJViennaCL *)B->spptr)->tempvec = NULL; 457 ((Mat_SeqAIJViennaCL *)B->spptr)->mat = NULL; 458 ((Mat_SeqAIJViennaCL *)B->spptr)->compressed_mat = NULL; 459 460 PetscCall(MatBindToCPU_SeqAIJViennaCL(A, PETSC_FALSE)); 461 A->ops->bindtocpu = MatBindToCPU_SeqAIJViennaCL; 462 463 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJVIENNACL)); 464 PetscCall(PetscFree(B->defaultvectype)); 465 PetscCall(PetscStrallocpy(VECVIENNACL, &B->defaultvectype)); 466 467 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqaij_seqaijviennacl_C", MatConvert_SeqAIJ_SeqAIJViennaCL)); 468 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaijviennacl_seqdense_C", MatProductSetFromOptions_SeqAIJ_SeqDense)); 469 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaijviennacl_seqaij_C", MatProductSetFromOptions_SeqAIJ)); 470 471 B->offloadmask = PETSC_OFFLOAD_UNALLOCATED; 472 473 /* If the source matrix is already assembled, copy the destination matrix to the GPU */ 474 if (B->assembled) PetscCall(MatViennaCLCopyToGPU(B)); 475 PetscFunctionReturn(PETSC_SUCCESS); 476 } 477 478 /*MC 479 MATSEQAIJVIENNACL - MATAIJVIENNACL = "aijviennacl" = "seqaijviennacl" - A matrix type to be used for sparse matrices. 480 481 A matrix type whose data resides on GPUs. These matrices are in CSR format by 482 default. All matrix calculations are performed using the ViennaCL library. 483 484 Options Database Keys: 485 + -mat_type aijviennacl - sets the matrix type to `MATSEQAIJVIENNACL` during a call to `MatSetFromOptions() 486 . -mat_viennacl_storage_format csr - sets the storage format of matrices for `MatMult()` during a call to `MatSetFromOptions()`. 487 - -mat_viennacl_mult_storage_format csr - sets the storage format of matrices for `MatMult()` during a call to `MatSetFromOptions()`. 488 489 Level: beginner 490 491 .seealso: `MatCreateSeqAIJViennaCL()`, `MATAIJVIENNACL`, `MatCreateAIJViennaCL()` 492 M*/ 493 494 PETSC_INTERN PetscErrorCode MatSolverTypeRegister_ViennaCL(void) 495 { 496 PetscFunctionBegin; 497 PetscCall(MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_LU, MatGetFactor_seqaij_petsc)); 498 PetscCall(MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_CHOLESKY, MatGetFactor_seqaij_petsc)); 499 PetscCall(MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_ILU, MatGetFactor_seqaij_petsc)); 500 PetscCall(MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_ICC, MatGetFactor_seqaij_petsc)); 501 PetscFunctionReturn(PETSC_SUCCESS); 502 } 503