1 /* 2 Defines the basic matrix operations for the AIJ (compressed row) 3 matrix storage format. 4 */ 5 6 #include <petscconf.h> 7 #define PETSC_SKIP_IMMINTRIN_H_CUDAWORKAROUND 1 8 #include <../src/mat/impls/aij/seq/aij.h> /*I "petscmat.h" I*/ 9 #include <petscbt.h> 10 #include <../src/vec/vec/impls/dvecimpl.h> 11 #include <petsc/private/vecimpl.h> 12 13 #include <../src/mat/impls/aij/seq/seqviennacl/viennaclmatimpl.h> 14 15 #include <algorithm> 16 #include <vector> 17 #include <string> 18 19 #include "viennacl/linalg/prod.hpp" 20 21 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJViennaCL(Mat A, MatType type, MatReuse reuse, Mat *newmat); 22 PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat, MatFactorType, Mat *); 23 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqDense(Mat); 24 25 PetscErrorCode MatViennaCLCopyToGPU(Mat A) 26 { 27 Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL *)A->spptr; 28 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 29 30 PetscFunctionBegin; 31 if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) { //some OpenCL SDKs have issues with buffers of size 0 32 if (A->offloadmask == PETSC_OFFLOAD_UNALLOCATED || A->offloadmask == PETSC_OFFLOAD_CPU) { 33 PetscCall(PetscLogEventBegin(MAT_ViennaCLCopyToGPU, A, 0, 0, 0)); 34 35 try { 36 if (a->compressedrow.use) { 37 if (!viennaclstruct->compressed_mat) viennaclstruct->compressed_mat = new ViennaCLCompressedAIJMatrix(); 38 39 // Since PetscInt is different from cl_uint, we have to convert: 40 viennacl::backend::mem_handle dummy; 41 42 viennacl::backend::typesafe_host_array<unsigned int> row_buffer; 43 row_buffer.raw_resize(dummy, a->compressedrow.nrows + 1); 44 for (PetscInt i = 0; i <= a->compressedrow.nrows; ++i) row_buffer.set(i, (a->compressedrow.i)[i]); 45 46 viennacl::backend::typesafe_host_array<unsigned int> row_indices; 47 row_indices.raw_resize(dummy, a->compressedrow.nrows); 48 for (PetscInt i = 0; i < a->compressedrow.nrows; ++i) row_indices.set(i, (a->compressedrow.rindex)[i]); 49 50 viennacl::backend::typesafe_host_array<unsigned int> col_buffer; 51 col_buffer.raw_resize(dummy, a->nz); 52 for (PetscInt i = 0; i < a->nz; ++i) col_buffer.set(i, (a->j)[i]); 53 54 viennaclstruct->compressed_mat->set(row_buffer.get(), row_indices.get(), col_buffer.get(), a->a, A->rmap->n, A->cmap->n, a->compressedrow.nrows, a->nz); 55 PetscCall(PetscLogCpuToGpu(((2 * a->compressedrow.nrows) + 1 + a->nz) * sizeof(PetscInt) + (a->nz) * sizeof(PetscScalar))); 56 } else { 57 if (!viennaclstruct->mat) viennaclstruct->mat = new ViennaCLAIJMatrix(); 58 59 // Since PetscInt is in general different from cl_uint, we have to convert: 60 viennacl::backend::mem_handle dummy; 61 62 viennacl::backend::typesafe_host_array<unsigned int> row_buffer; 63 row_buffer.raw_resize(dummy, A->rmap->n + 1); 64 for (PetscInt i = 0; i <= A->rmap->n; ++i) row_buffer.set(i, (a->i)[i]); 65 66 viennacl::backend::typesafe_host_array<unsigned int> col_buffer; 67 col_buffer.raw_resize(dummy, a->nz); 68 for (PetscInt i = 0; i < a->nz; ++i) col_buffer.set(i, (a->j)[i]); 69 70 viennaclstruct->mat->set(row_buffer.get(), col_buffer.get(), a->a, A->rmap->n, A->cmap->n, a->nz); 71 PetscCall(PetscLogCpuToGpu(((A->rmap->n + 1) + a->nz) * sizeof(PetscInt) + (a->nz) * sizeof(PetscScalar))); 72 } 73 ViennaCLWaitForGPU(); 74 } catch (std::exception const &ex) { 75 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what()); 76 } 77 78 // Create temporary vector for v += A*x: 79 if (viennaclstruct->tempvec) { 80 if (viennaclstruct->tempvec->size() != static_cast<std::size_t>(A->rmap->n)) { 81 delete (ViennaCLVector *)viennaclstruct->tempvec; 82 viennaclstruct->tempvec = new ViennaCLVector(A->rmap->n); 83 } else { 84 viennaclstruct->tempvec->clear(); 85 } 86 } else { 87 viennaclstruct->tempvec = new ViennaCLVector(A->rmap->n); 88 } 89 90 A->offloadmask = PETSC_OFFLOAD_BOTH; 91 92 PetscCall(PetscLogEventEnd(MAT_ViennaCLCopyToGPU, A, 0, 0, 0)); 93 } 94 } 95 PetscFunctionReturn(PETSC_SUCCESS); 96 } 97 98 PetscErrorCode MatViennaCLCopyFromGPU(Mat A, const ViennaCLAIJMatrix *Agpu) 99 { 100 Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL *)A->spptr; 101 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 102 PetscInt m = A->rmap->n; 103 104 PetscFunctionBegin; 105 if (A->offloadmask == PETSC_OFFLOAD_BOTH) PetscFunctionReturn(PETSC_SUCCESS); 106 if (A->offloadmask == PETSC_OFFLOAD_UNALLOCATED && Agpu) { 107 try { 108 PetscCheck(!a->compressedrow.use, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ViennaCL: Cannot handle row compression for GPU matrices"); 109 PetscCheck((PetscInt)Agpu->size1() == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "GPU matrix has %lu rows, should be %" PetscInt_FMT, Agpu->size1(), m); 110 a->nz = Agpu->nnz(); 111 a->maxnz = a->nz; /* Since we allocate exactly the right amount */ 112 A->preallocated = PETSC_TRUE; 113 if (a->singlemalloc) { 114 if (a->a) PetscCall(PetscFree3(a->a, a->j, a->i)); 115 } else { 116 if (a->i) PetscCall(PetscFree(a->i)); 117 if (a->j) PetscCall(PetscFree(a->j)); 118 if (a->a) PetscCall(PetscFree(a->a)); 119 } 120 PetscCall(PetscMalloc3(a->nz, &a->a, a->nz, &a->j, m + 1, &a->i)); 121 122 a->singlemalloc = PETSC_TRUE; 123 124 /* Setup row lengths */ 125 PetscCall(PetscFree(a->imax)); 126 PetscCall(PetscFree(a->ilen)); 127 PetscCall(PetscMalloc1(m, &a->imax)); 128 PetscCall(PetscMalloc1(m, &a->ilen)); 129 130 /* Copy data back from GPU */ 131 viennacl::backend::typesafe_host_array<unsigned int> row_buffer; 132 row_buffer.raw_resize(Agpu->handle1(), Agpu->size1() + 1); 133 134 // copy row array 135 viennacl::backend::memory_read(Agpu->handle1(), 0, row_buffer.raw_size(), row_buffer.get()); 136 (a->i)[0] = row_buffer[0]; 137 for (PetscInt i = 0; i < (PetscInt)Agpu->size1(); ++i) { 138 (a->i)[i + 1] = row_buffer[i + 1]; 139 a->imax[i] = a->ilen[i] = a->i[i + 1] - a->i[i]; //Set imax[] and ilen[] arrays at the same time as i[] for better cache reuse 140 } 141 142 // copy column indices 143 viennacl::backend::typesafe_host_array<unsigned int> col_buffer; 144 col_buffer.raw_resize(Agpu->handle2(), Agpu->nnz()); 145 viennacl::backend::memory_read(Agpu->handle2(), 0, col_buffer.raw_size(), col_buffer.get()); 146 for (PetscInt i = 0; i < (PetscInt)Agpu->nnz(); ++i) (a->j)[i] = col_buffer[i]; 147 148 // copy nonzero entries directly to destination (no conversion required) 149 viennacl::backend::memory_read(Agpu->handle(), 0, sizeof(PetscScalar) * Agpu->nnz(), a->a); 150 151 PetscCall(PetscLogGpuToCpu(row_buffer.raw_size() + col_buffer.raw_size() + (Agpu->nnz() * sizeof(PetscScalar)))); 152 ViennaCLWaitForGPU(); 153 /* TODO: Once a->diag is moved out of MatAssemblyEnd(), invalidate it here. */ 154 } catch (std::exception const &ex) { 155 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what()); 156 } 157 } else if (A->offloadmask == PETSC_OFFLOAD_UNALLOCATED) { 158 PetscFunctionReturn(PETSC_SUCCESS); 159 } else { 160 if (!Agpu && A->offloadmask != PETSC_OFFLOAD_GPU) PetscFunctionReturn(PETSC_SUCCESS); 161 162 PetscCheck(!a->compressedrow.use, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ViennaCL: Cannot handle row compression for GPU matrices"); 163 if (!Agpu) { 164 viennacl::backend::memory_read(viennaclstruct->mat->handle(), 0, sizeof(PetscScalar) * viennaclstruct->mat->nnz(), a->a); 165 } else { 166 viennacl::backend::memory_read(Agpu->handle(), 0, sizeof(PetscScalar) * Agpu->nnz(), a->a); 167 } 168 } 169 A->offloadmask = PETSC_OFFLOAD_BOTH; 170 /* This assembly prevents resetting the flag to PETSC_OFFLOAD_CPU and recopying */ 171 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 172 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 173 PetscFunctionReturn(PETSC_SUCCESS); 174 } 175 176 static PetscErrorCode MatMult_SeqAIJViennaCL(Mat A, Vec xx, Vec yy) 177 { 178 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 179 Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL *)A->spptr; 180 const ViennaCLVector *xgpu = NULL; 181 ViennaCLVector *ygpu = NULL; 182 183 PetscFunctionBegin; 184 /* The line below is necessary due to the operations that modify the matrix on the CPU (axpy, scale, etc) */ 185 PetscCall(MatViennaCLCopyToGPU(A)); 186 if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) { 187 PetscCall(VecViennaCLGetArrayRead(xx, &xgpu)); 188 PetscCall(VecViennaCLGetArrayWrite(yy, &ygpu)); 189 PetscCall(PetscLogGpuTimeBegin()); 190 try { 191 if (a->compressedrow.use) { 192 *ygpu = viennacl::linalg::prod(*viennaclstruct->compressed_mat, *xgpu); 193 } else { 194 *ygpu = viennacl::linalg::prod(*viennaclstruct->mat, *xgpu); 195 } 196 ViennaCLWaitForGPU(); 197 } catch (std::exception const &ex) { 198 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what()); 199 } 200 PetscCall(PetscLogGpuTimeEnd()); 201 PetscCall(VecViennaCLRestoreArrayRead(xx, &xgpu)); 202 PetscCall(VecViennaCLRestoreArrayWrite(yy, &ygpu)); 203 PetscCall(PetscLogGpuFlops(2.0 * a->nz - a->nonzerorowcnt)); 204 } else { 205 PetscCall(VecSet_SeqViennaCL(yy, 0)); 206 } 207 PetscFunctionReturn(PETSC_SUCCESS); 208 } 209 210 static PetscErrorCode MatMultAdd_SeqAIJViennaCL(Mat A, Vec xx, Vec yy, Vec zz) 211 { 212 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 213 Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL *)A->spptr; 214 const ViennaCLVector *xgpu = NULL, *ygpu = NULL; 215 ViennaCLVector *zgpu = NULL; 216 217 PetscFunctionBegin; 218 /* The line below is necessary due to the operations that modify the matrix on the CPU (axpy, scale, etc) */ 219 PetscCall(MatViennaCLCopyToGPU(A)); 220 if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) { 221 try { 222 PetscCall(VecViennaCLGetArrayRead(xx, &xgpu)); 223 PetscCall(VecViennaCLGetArrayRead(yy, &ygpu)); 224 PetscCall(VecViennaCLGetArrayWrite(zz, &zgpu)); 225 PetscCall(PetscLogGpuTimeBegin()); 226 if (a->compressedrow.use) *viennaclstruct->tempvec = viennacl::linalg::prod(*viennaclstruct->compressed_mat, *xgpu); 227 else *viennaclstruct->tempvec = viennacl::linalg::prod(*viennaclstruct->mat, *xgpu); 228 if (zz != yy) *zgpu = *ygpu + *viennaclstruct->tempvec; 229 else *zgpu += *viennaclstruct->tempvec; 230 ViennaCLWaitForGPU(); 231 PetscCall(PetscLogGpuTimeEnd()); 232 PetscCall(VecViennaCLRestoreArrayRead(xx, &xgpu)); 233 PetscCall(VecViennaCLRestoreArrayRead(yy, &ygpu)); 234 PetscCall(VecViennaCLRestoreArrayWrite(zz, &zgpu)); 235 236 } catch (std::exception const &ex) { 237 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what()); 238 } 239 PetscCall(PetscLogGpuFlops(2.0 * a->nz)); 240 } else { 241 PetscCall(VecCopy_SeqViennaCL(yy, zz)); 242 } 243 PetscFunctionReturn(PETSC_SUCCESS); 244 } 245 246 static PetscErrorCode MatAssemblyEnd_SeqAIJViennaCL(Mat A, MatAssemblyType mode) 247 { 248 PetscFunctionBegin; 249 PetscCall(MatAssemblyEnd_SeqAIJ(A, mode)); 250 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(PETSC_SUCCESS); 251 if (!A->boundtocpu) PetscCall(MatViennaCLCopyToGPU(A)); 252 PetscFunctionReturn(PETSC_SUCCESS); 253 } 254 255 /* --------------------------------------------------------------------------------*/ 256 /*@C 257 MatCreateSeqAIJViennaCL - Creates a sparse matrix in `MATSEQAIJVIENNACL` (compressed row) format 258 (the default parallel PETSc format). This matrix will ultimately be pushed down 259 to GPUs and use the ViennaCL library for calculations. 260 261 Collective 262 263 Input Parameters: 264 + comm - MPI communicator, set to `PETSC_COMM_SELF` 265 . m - number of rows 266 . n - number of columns 267 . nz - number of nonzeros per row (same for all rows), ignored if `nnz` is set 268 - nnz - array containing the number of nonzeros in the various rows (possibly different for each row) or `NULL` 269 270 Output Parameter: 271 . A - the matrix 272 273 It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 274 MatXXXXSetPreallocation() paradigm instead of this routine directly. 275 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 276 277 Notes: 278 The AIJ format, also called 279 compressed row storage, is fully compatible with standard Fortran 280 storage. That is, the stored row and column indices can begin at 281 either one (as in Fortran) or zero. 282 283 Specify the preallocated storage with either `nz` or `nnz` (not both). 284 Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory 285 allocation. 286 287 Level: intermediate 288 289 .seealso: `MATSEQAIJVIENNACL`, `MatCreate()`, `MatCreateAIJ()`, `MatCreateAIJCUSPARSE()`, `MatSetValues()`, `MatSeqAIJSetColumnIndices()`, `MatCreateSeqAIJWithArrays()` 290 @*/ 291 PetscErrorCode MatCreateSeqAIJViennaCL(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A) 292 { 293 PetscFunctionBegin; 294 PetscCall(MatCreate(comm, A)); 295 PetscCall(MatSetSizes(*A, m, n, m, n)); 296 PetscCall(MatSetType(*A, MATSEQAIJVIENNACL)); 297 PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(*A, nz, (PetscInt *)nnz)); 298 PetscFunctionReturn(PETSC_SUCCESS); 299 } 300 301 static PetscErrorCode MatDestroy_SeqAIJViennaCL(Mat A) 302 { 303 Mat_SeqAIJViennaCL *viennaclcontainer = (Mat_SeqAIJViennaCL *)A->spptr; 304 305 PetscFunctionBegin; 306 try { 307 if (viennaclcontainer) { 308 delete viennaclcontainer->tempvec; 309 delete viennaclcontainer->mat; 310 delete viennaclcontainer->compressed_mat; 311 delete viennaclcontainer; 312 } 313 A->offloadmask = PETSC_OFFLOAD_UNALLOCATED; 314 } catch (std::exception const &ex) { 315 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what()); 316 } 317 318 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqaij_seqaijviennacl_C", NULL)); 319 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaijviennacl_seqdense_C", NULL)); 320 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaijviennacl_seqaij_C", NULL)); 321 322 /* this next line is because MatDestroy tries to PetscFree spptr if it is not zero, and PetscFree only works if the memory was allocated with PetscNew or PetscMalloc, which don't call the constructor */ 323 A->spptr = 0; 324 PetscCall(MatDestroy_SeqAIJ(A)); 325 PetscFunctionReturn(PETSC_SUCCESS); 326 } 327 328 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJViennaCL(Mat B) 329 { 330 PetscFunctionBegin; 331 PetscCall(MatCreate_SeqAIJ(B)); 332 PetscCall(MatConvert_SeqAIJ_SeqAIJViennaCL(B, MATSEQAIJVIENNACL, MAT_INPLACE_MATRIX, &B)); 333 PetscFunctionReturn(PETSC_SUCCESS); 334 } 335 336 static PetscErrorCode MatBindToCPU_SeqAIJViennaCL(Mat, PetscBool); 337 static PetscErrorCode MatDuplicate_SeqAIJViennaCL(Mat A, MatDuplicateOption cpvalues, Mat *B) 338 { 339 Mat C; 340 341 PetscFunctionBegin; 342 PetscCall(MatDuplicate_SeqAIJ(A, cpvalues, B)); 343 C = *B; 344 345 PetscCall(MatBindToCPU_SeqAIJViennaCL(A, PETSC_FALSE)); 346 C->ops->bindtocpu = MatBindToCPU_SeqAIJViennaCL; 347 348 C->spptr = new Mat_SeqAIJViennaCL(); 349 ((Mat_SeqAIJViennaCL *)C->spptr)->tempvec = NULL; 350 ((Mat_SeqAIJViennaCL *)C->spptr)->mat = NULL; 351 ((Mat_SeqAIJViennaCL *)C->spptr)->compressed_mat = NULL; 352 353 PetscCall(PetscObjectChangeTypeName((PetscObject)C, MATSEQAIJVIENNACL)); 354 355 C->offloadmask = PETSC_OFFLOAD_UNALLOCATED; 356 357 /* If the source matrix is already assembled, copy the destination matrix to the GPU */ 358 if (C->assembled) PetscCall(MatViennaCLCopyToGPU(C)); 359 PetscFunctionReturn(PETSC_SUCCESS); 360 } 361 362 static PetscErrorCode MatSeqAIJGetArray_SeqAIJViennaCL(Mat A, PetscScalar *array[]) 363 { 364 PetscFunctionBegin; 365 PetscCall(MatViennaCLCopyFromGPU(A, (const ViennaCLAIJMatrix *)NULL)); 366 *array = ((Mat_SeqAIJ *)A->data)->a; 367 PetscFunctionReturn(PETSC_SUCCESS); 368 } 369 370 static PetscErrorCode MatSeqAIJRestoreArray_SeqAIJViennaCL(Mat A, PetscScalar *array[]) 371 { 372 PetscFunctionBegin; 373 A->offloadmask = PETSC_OFFLOAD_CPU; 374 *array = NULL; 375 PetscFunctionReturn(PETSC_SUCCESS); 376 } 377 378 static PetscErrorCode MatSeqAIJGetArrayRead_SeqAIJViennaCL(Mat A, const PetscScalar *array[]) 379 { 380 PetscFunctionBegin; 381 PetscCall(MatViennaCLCopyFromGPU(A, (const ViennaCLAIJMatrix *)NULL)); 382 *array = ((Mat_SeqAIJ *)A->data)->a; 383 PetscFunctionReturn(PETSC_SUCCESS); 384 } 385 386 static PetscErrorCode MatSeqAIJRestoreArrayRead_SeqAIJViennaCL(Mat A, const PetscScalar *array[]) 387 { 388 PetscFunctionBegin; 389 *array = NULL; 390 /* No A->offloadmask = PETSC_OFFLOAD_CPU since if A->offloadmask was PETSC_OFFLOAD_BOTH, it is still BOTH */ 391 PetscFunctionReturn(PETSC_SUCCESS); 392 } 393 394 static PetscErrorCode MatSeqAIJGetArrayWrite_SeqAIJViennaCL(Mat A, PetscScalar *array[]) 395 { 396 PetscFunctionBegin; 397 *array = ((Mat_SeqAIJ *)A->data)->a; 398 PetscFunctionReturn(PETSC_SUCCESS); 399 } 400 401 static PetscErrorCode MatSeqAIJRestoreArrayWrite_SeqAIJViennaCL(Mat A, PetscScalar *array[]) 402 { 403 PetscFunctionBegin; 404 A->offloadmask = PETSC_OFFLOAD_CPU; 405 *array = NULL; 406 PetscFunctionReturn(PETSC_SUCCESS); 407 } 408 409 static PetscErrorCode MatBindToCPU_SeqAIJViennaCL(Mat A, PetscBool flg) 410 { 411 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 412 413 PetscFunctionBegin; 414 A->boundtocpu = flg; 415 if (flg && a->inode.size) { 416 a->inode.use = PETSC_TRUE; 417 } else { 418 a->inode.use = PETSC_FALSE; 419 } 420 if (flg) { 421 /* make sure we have an up-to-date copy on the CPU */ 422 PetscCall(MatViennaCLCopyFromGPU(A, (const ViennaCLAIJMatrix *)NULL)); 423 A->ops->mult = MatMult_SeqAIJ; 424 A->ops->multadd = MatMultAdd_SeqAIJ; 425 A->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 426 A->ops->duplicate = MatDuplicate_SeqAIJ; 427 PetscCall(PetscMemzero(a->ops, sizeof(Mat_SeqAIJOps))); 428 } else { 429 A->ops->mult = MatMult_SeqAIJViennaCL; 430 A->ops->multadd = MatMultAdd_SeqAIJViennaCL; 431 A->ops->assemblyend = MatAssemblyEnd_SeqAIJViennaCL; 432 A->ops->destroy = MatDestroy_SeqAIJViennaCL; 433 A->ops->duplicate = MatDuplicate_SeqAIJViennaCL; 434 435 a->ops->getarray = MatSeqAIJGetArray_SeqAIJViennaCL; 436 a->ops->restorearray = MatSeqAIJRestoreArray_SeqAIJViennaCL; 437 a->ops->getarrayread = MatSeqAIJGetArrayRead_SeqAIJViennaCL; 438 a->ops->restorearrayread = MatSeqAIJRestoreArrayRead_SeqAIJViennaCL; 439 a->ops->getarraywrite = MatSeqAIJGetArrayWrite_SeqAIJViennaCL; 440 a->ops->restorearraywrite = MatSeqAIJRestoreArrayWrite_SeqAIJViennaCL; 441 } 442 PetscFunctionReturn(PETSC_SUCCESS); 443 } 444 445 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJViennaCL(Mat A, MatType type, MatReuse reuse, Mat *newmat) 446 { 447 Mat B; 448 449 PetscFunctionBegin; 450 PetscCheck(reuse != MAT_REUSE_MATRIX, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MAT_REUSE_MATRIX is not supported. Consider using MAT_INPLACE_MATRIX instead"); 451 452 if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, newmat)); 453 454 B = *newmat; 455 456 B->spptr = new Mat_SeqAIJViennaCL(); 457 458 ((Mat_SeqAIJViennaCL *)B->spptr)->tempvec = NULL; 459 ((Mat_SeqAIJViennaCL *)B->spptr)->mat = NULL; 460 ((Mat_SeqAIJViennaCL *)B->spptr)->compressed_mat = NULL; 461 462 PetscCall(MatBindToCPU_SeqAIJViennaCL(A, PETSC_FALSE)); 463 A->ops->bindtocpu = MatBindToCPU_SeqAIJViennaCL; 464 465 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJVIENNACL)); 466 PetscCall(PetscFree(B->defaultvectype)); 467 PetscCall(PetscStrallocpy(VECVIENNACL, &B->defaultvectype)); 468 469 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqaij_seqaijviennacl_C", MatConvert_SeqAIJ_SeqAIJViennaCL)); 470 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaijviennacl_seqdense_C", MatProductSetFromOptions_SeqAIJ_SeqDense)); 471 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaijviennacl_seqaij_C", MatProductSetFromOptions_SeqAIJ)); 472 473 B->offloadmask = PETSC_OFFLOAD_UNALLOCATED; 474 475 /* If the source matrix is already assembled, copy the destination matrix to the GPU */ 476 if (B->assembled) PetscCall(MatViennaCLCopyToGPU(B)); 477 PetscFunctionReturn(PETSC_SUCCESS); 478 } 479 480 /*MC 481 MATSEQAIJVIENNACL - MATAIJVIENNACL = "aijviennacl" = "seqaijviennacl" - A matrix type to be used for sparse matrices. 482 483 A matrix type whose data resides on GPUs. These matrices are in CSR format by 484 default. All matrix calculations are performed using the ViennaCL library. 485 486 Options Database Keys: 487 + -mat_type aijviennacl - sets the matrix type to `MATSEQAIJVIENNACL` during a call to `MatSetFromOptions() 488 . -mat_viennacl_storage_format csr - sets the storage format of matrices for `MatMult()` during a call to `MatSetFromOptions()`. 489 - -mat_viennacl_mult_storage_format csr - sets the storage format of matrices for `MatMult()` during a call to `MatSetFromOptions()`. 490 491 Level: beginner 492 493 .seealso: `MatCreateSeqAIJViennaCL()`, `MATAIJVIENNACL`, `MatCreateAIJViennaCL()` 494 M*/ 495 496 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ViennaCL(void) 497 { 498 PetscFunctionBegin; 499 PetscCall(MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_LU, MatGetFactor_seqaij_petsc)); 500 PetscCall(MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_CHOLESKY, MatGetFactor_seqaij_petsc)); 501 PetscCall(MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_ILU, MatGetFactor_seqaij_petsc)); 502 PetscCall(MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_ICC, MatGetFactor_seqaij_petsc)); 503 PetscFunctionReturn(PETSC_SUCCESS); 504 } 505