1 2 /* 3 Defines the basic matrix operations for the AIJ (compressed row) 4 matrix storage format. 5 */ 6 7 #include <petscconf.h> 8 #define PETSC_SKIP_IMMINTRIN_H_CUDAWORKAROUND 1 9 #include <../src/mat/impls/aij/seq/aij.h> /*I "petscmat.h" I*/ 10 #include <petscbt.h> 11 #include <../src/vec/vec/impls/dvecimpl.h> 12 #include <petsc/private/vecimpl.h> 13 14 #include <../src/mat/impls/aij/seq/seqviennacl/viennaclmatimpl.h> 15 16 #include <algorithm> 17 #include <vector> 18 #include <string> 19 20 #include "viennacl/linalg/prod.hpp" 21 22 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJViennaCL(Mat A, MatType type, MatReuse reuse, Mat *newmat); 23 PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat, MatFactorType, Mat *); 24 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqDense(Mat); 25 26 PetscErrorCode MatViennaCLCopyToGPU(Mat A) { 27 Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL *)A->spptr; 28 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 29 30 PetscFunctionBegin; 31 if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) { //some OpenCL SDKs have issues with buffers of size 0 32 if (A->offloadmask == PETSC_OFFLOAD_UNALLOCATED || A->offloadmask == PETSC_OFFLOAD_CPU) { 33 PetscCall(PetscLogEventBegin(MAT_ViennaCLCopyToGPU, A, 0, 0, 0)); 34 35 try { 36 if (a->compressedrow.use) { 37 if (!viennaclstruct->compressed_mat) viennaclstruct->compressed_mat = new ViennaCLCompressedAIJMatrix(); 38 39 // Since PetscInt is different from cl_uint, we have to convert: 40 viennacl::backend::mem_handle dummy; 41 42 viennacl::backend::typesafe_host_array<unsigned int> row_buffer; 43 row_buffer.raw_resize(dummy, a->compressedrow.nrows + 1); 44 for (PetscInt i = 0; i <= a->compressedrow.nrows; ++i) row_buffer.set(i, (a->compressedrow.i)[i]); 45 46 viennacl::backend::typesafe_host_array<unsigned int> row_indices; 47 row_indices.raw_resize(dummy, a->compressedrow.nrows); 48 for (PetscInt i = 0; i < a->compressedrow.nrows; ++i) row_indices.set(i, (a->compressedrow.rindex)[i]); 49 50 viennacl::backend::typesafe_host_array<unsigned int> col_buffer; 51 col_buffer.raw_resize(dummy, a->nz); 52 for (PetscInt i = 0; i < a->nz; ++i) col_buffer.set(i, (a->j)[i]); 53 54 viennaclstruct->compressed_mat->set(row_buffer.get(), row_indices.get(), col_buffer.get(), a->a, A->rmap->n, A->cmap->n, a->compressedrow.nrows, a->nz); 55 PetscCall(PetscLogCpuToGpu(((2 * a->compressedrow.nrows) + 1 + a->nz) * sizeof(PetscInt) + (a->nz) * sizeof(PetscScalar))); 56 } else { 57 if (!viennaclstruct->mat) viennaclstruct->mat = new ViennaCLAIJMatrix(); 58 59 // Since PetscInt is in general different from cl_uint, we have to convert: 60 viennacl::backend::mem_handle dummy; 61 62 viennacl::backend::typesafe_host_array<unsigned int> row_buffer; 63 row_buffer.raw_resize(dummy, A->rmap->n + 1); 64 for (PetscInt i = 0; i <= A->rmap->n; ++i) row_buffer.set(i, (a->i)[i]); 65 66 viennacl::backend::typesafe_host_array<unsigned int> col_buffer; 67 col_buffer.raw_resize(dummy, a->nz); 68 for (PetscInt i = 0; i < a->nz; ++i) col_buffer.set(i, (a->j)[i]); 69 70 viennaclstruct->mat->set(row_buffer.get(), col_buffer.get(), a->a, A->rmap->n, A->cmap->n, a->nz); 71 PetscCall(PetscLogCpuToGpu(((A->rmap->n + 1) + a->nz) * sizeof(PetscInt) + (a->nz) * sizeof(PetscScalar))); 72 } 73 ViennaCLWaitForGPU(); 74 } catch (std::exception const &ex) { SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what()); } 75 76 // Create temporary vector for v += A*x: 77 if (viennaclstruct->tempvec) { 78 if (viennaclstruct->tempvec->size() != static_cast<std::size_t>(A->rmap->n)) { 79 delete (ViennaCLVector *)viennaclstruct->tempvec; 80 viennaclstruct->tempvec = new ViennaCLVector(A->rmap->n); 81 } else { 82 viennaclstruct->tempvec->clear(); 83 } 84 } else { 85 viennaclstruct->tempvec = new ViennaCLVector(A->rmap->n); 86 } 87 88 A->offloadmask = PETSC_OFFLOAD_BOTH; 89 90 PetscCall(PetscLogEventEnd(MAT_ViennaCLCopyToGPU, A, 0, 0, 0)); 91 } 92 } 93 PetscFunctionReturn(0); 94 } 95 96 PetscErrorCode MatViennaCLCopyFromGPU(Mat A, const ViennaCLAIJMatrix *Agpu) { 97 Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL *)A->spptr; 98 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 99 PetscInt m = A->rmap->n; 100 101 PetscFunctionBegin; 102 if (A->offloadmask == PETSC_OFFLOAD_BOTH) PetscFunctionReturn(0); 103 if (A->offloadmask == PETSC_OFFLOAD_UNALLOCATED && Agpu) { 104 try { 105 PetscCheck(!a->compressedrow.use, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ViennaCL: Cannot handle row compression for GPU matrices"); 106 PetscCheck((PetscInt)Agpu->size1() == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "GPU matrix has %lu rows, should be %" PetscInt_FMT, Agpu->size1(), m); 107 a->nz = Agpu->nnz(); 108 a->maxnz = a->nz; /* Since we allocate exactly the right amount */ 109 A->preallocated = PETSC_TRUE; 110 if (a->singlemalloc) { 111 if (a->a) PetscCall(PetscFree3(a->a, a->j, a->i)); 112 } else { 113 if (a->i) PetscCall(PetscFree(a->i)); 114 if (a->j) PetscCall(PetscFree(a->j)); 115 if (a->a) PetscCall(PetscFree(a->a)); 116 } 117 PetscCall(PetscMalloc3(a->nz, &a->a, a->nz, &a->j, m + 1, &a->i)); 118 PetscCall(PetscLogObjectMemory((PetscObject)A, a->nz * (sizeof(PetscScalar) + sizeof(PetscInt)) + (m + 1) * sizeof(PetscInt))); 119 120 a->singlemalloc = PETSC_TRUE; 121 122 /* Setup row lengths */ 123 PetscCall(PetscFree(a->imax)); 124 PetscCall(PetscFree(a->ilen)); 125 PetscCall(PetscMalloc1(m, &a->imax)); 126 PetscCall(PetscMalloc1(m, &a->ilen)); 127 PetscCall(PetscLogObjectMemory((PetscObject)A, 2 * m * sizeof(PetscInt))); 128 129 /* Copy data back from GPU */ 130 viennacl::backend::typesafe_host_array<unsigned int> row_buffer; 131 row_buffer.raw_resize(Agpu->handle1(), Agpu->size1() + 1); 132 133 // copy row array 134 viennacl::backend::memory_read(Agpu->handle1(), 0, row_buffer.raw_size(), row_buffer.get()); 135 (a->i)[0] = row_buffer[0]; 136 for (PetscInt i = 0; i < (PetscInt)Agpu->size1(); ++i) { 137 (a->i)[i + 1] = row_buffer[i + 1]; 138 a->imax[i] = a->ilen[i] = a->i[i + 1] - a->i[i]; //Set imax[] and ilen[] arrays at the same time as i[] for better cache reuse 139 } 140 141 // copy column indices 142 viennacl::backend::typesafe_host_array<unsigned int> col_buffer; 143 col_buffer.raw_resize(Agpu->handle2(), Agpu->nnz()); 144 viennacl::backend::memory_read(Agpu->handle2(), 0, col_buffer.raw_size(), col_buffer.get()); 145 for (PetscInt i = 0; i < (PetscInt)Agpu->nnz(); ++i) (a->j)[i] = col_buffer[i]; 146 147 // copy nonzero entries directly to destination (no conversion required) 148 viennacl::backend::memory_read(Agpu->handle(), 0, sizeof(PetscScalar) * Agpu->nnz(), a->a); 149 150 PetscCall(PetscLogGpuToCpu(row_buffer.raw_size() + col_buffer.raw_size() + (Agpu->nnz() * sizeof(PetscScalar)))); 151 ViennaCLWaitForGPU(); 152 /* TODO: Once a->diag is moved out of MatAssemblyEnd(), invalidate it here. */ 153 } catch (std::exception const &ex) { SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what()); } 154 } else if (A->offloadmask == PETSC_OFFLOAD_UNALLOCATED) { 155 PetscFunctionReturn(0); 156 } else { 157 if (!Agpu && A->offloadmask != PETSC_OFFLOAD_GPU) PetscFunctionReturn(0); 158 159 PetscCheck(!a->compressedrow.use, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ViennaCL: Cannot handle row compression for GPU matrices"); 160 if (!Agpu) { 161 viennacl::backend::memory_read(viennaclstruct->mat->handle(), 0, sizeof(PetscScalar) * viennaclstruct->mat->nnz(), a->a); 162 } else { 163 viennacl::backend::memory_read(Agpu->handle(), 0, sizeof(PetscScalar) * Agpu->nnz(), a->a); 164 } 165 } 166 A->offloadmask = PETSC_OFFLOAD_BOTH; 167 /* This assembly prevents resetting the flag to PETSC_OFFLOAD_CPU and recopying */ 168 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 169 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 170 PetscFunctionReturn(0); 171 } 172 173 PetscErrorCode MatMult_SeqAIJViennaCL(Mat A, Vec xx, Vec yy) { 174 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 175 Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL *)A->spptr; 176 const ViennaCLVector *xgpu = NULL; 177 ViennaCLVector *ygpu = NULL; 178 179 PetscFunctionBegin; 180 /* The line below is necessary due to the operations that modify the matrix on the CPU (axpy, scale, etc) */ 181 PetscCall(MatViennaCLCopyToGPU(A)); 182 if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) { 183 PetscCall(VecViennaCLGetArrayRead(xx, &xgpu)); 184 PetscCall(VecViennaCLGetArrayWrite(yy, &ygpu)); 185 PetscCall(PetscLogGpuTimeBegin()); 186 try { 187 if (a->compressedrow.use) { 188 *ygpu = viennacl::linalg::prod(*viennaclstruct->compressed_mat, *xgpu); 189 } else { 190 *ygpu = viennacl::linalg::prod(*viennaclstruct->mat, *xgpu); 191 } 192 ViennaCLWaitForGPU(); 193 } catch (std::exception const &ex) { SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what()); } 194 PetscCall(PetscLogGpuTimeEnd()); 195 PetscCall(VecViennaCLRestoreArrayRead(xx, &xgpu)); 196 PetscCall(VecViennaCLRestoreArrayWrite(yy, &ygpu)); 197 PetscCall(PetscLogGpuFlops(2.0 * a->nz - a->nonzerorowcnt)); 198 } else { 199 PetscCall(VecSet_SeqViennaCL(yy, 0)); 200 } 201 PetscFunctionReturn(0); 202 } 203 204 PetscErrorCode MatMultAdd_SeqAIJViennaCL(Mat A, Vec xx, Vec yy, Vec zz) { 205 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 206 Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL *)A->spptr; 207 const ViennaCLVector *xgpu = NULL, *ygpu = NULL; 208 ViennaCLVector *zgpu = NULL; 209 210 PetscFunctionBegin; 211 /* The line below is necessary due to the operations that modify the matrix on the CPU (axpy, scale, etc) */ 212 PetscCall(MatViennaCLCopyToGPU(A)); 213 if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) { 214 try { 215 PetscCall(VecViennaCLGetArrayRead(xx, &xgpu)); 216 PetscCall(VecViennaCLGetArrayRead(yy, &ygpu)); 217 PetscCall(VecViennaCLGetArrayWrite(zz, &zgpu)); 218 PetscCall(PetscLogGpuTimeBegin()); 219 if (a->compressedrow.use) *viennaclstruct->tempvec = viennacl::linalg::prod(*viennaclstruct->compressed_mat, *xgpu); 220 else *viennaclstruct->tempvec = viennacl::linalg::prod(*viennaclstruct->mat, *xgpu); 221 if (zz != yy) *zgpu = *ygpu + *viennaclstruct->tempvec; 222 else *zgpu += *viennaclstruct->tempvec; 223 ViennaCLWaitForGPU(); 224 PetscCall(PetscLogGpuTimeEnd()); 225 PetscCall(VecViennaCLRestoreArrayRead(xx, &xgpu)); 226 PetscCall(VecViennaCLRestoreArrayRead(yy, &ygpu)); 227 PetscCall(VecViennaCLRestoreArrayWrite(zz, &zgpu)); 228 229 } catch (std::exception const &ex) { SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what()); } 230 PetscCall(PetscLogGpuFlops(2.0 * a->nz)); 231 } else { 232 PetscCall(VecCopy_SeqViennaCL(yy, zz)); 233 } 234 PetscFunctionReturn(0); 235 } 236 237 PetscErrorCode MatAssemblyEnd_SeqAIJViennaCL(Mat A, MatAssemblyType mode) { 238 PetscFunctionBegin; 239 PetscCall(MatAssemblyEnd_SeqAIJ(A, mode)); 240 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 241 if (!A->boundtocpu) PetscCall(MatViennaCLCopyToGPU(A)); 242 PetscFunctionReturn(0); 243 } 244 245 /* --------------------------------------------------------------------------------*/ 246 /*@C 247 MatCreateSeqAIJViennaCL - Creates a sparse matrix in `MATSEQAIJVIENNACL` (compressed row) format 248 (the default parallel PETSc format). This matrix will ultimately be pushed down 249 to GPUs and use the ViennaCL library for calculations. For good matrix 250 assembly performance the user should preallocate the matrix storage by setting 251 the parameter nz (or the array nnz). By setting these parameters accurately, 252 performance during matrix assembly can be increased substantially. 253 254 Collective 255 256 Input Parameters: 257 + comm - MPI communicator, set to `PETSC_COMM_SELF` 258 . m - number of rows 259 . n - number of columns 260 . nz - number of nonzeros per row (same for all rows) 261 - nnz - array containing the number of nonzeros in the various rows 262 (possibly different for each row) or NULL 263 264 Output Parameter: 265 . A - the matrix 266 267 It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 268 MatXXXXSetPreallocation() paradigm instead of this routine directly. 269 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 270 271 Notes: 272 If nnz is given then nz is ignored 273 274 The AIJ format, also called 275 compressed row storage, is fully compatible with standard Fortran 77 276 storage. That is, the stored row and column indices can begin at 277 either one (as in Fortran) or zero. See the users' manual for details. 278 279 Specify the preallocated storage with either nz or nnz (not both). 280 Set nz = `PETSC_DEFAULT` and nnz = NULL for PETSc to control dynamic memory 281 allocation. For large problems you MUST preallocate memory or you 282 will get TERRIBLE performance, see the users' manual chapter on matrices. 283 284 Level: intermediate 285 286 .seealso: `MATSEQAIJVIENNACL`, `MatCreate()`, `MatCreateAIJ()`, `MatCreateAIJCUSPARSE()`, `MatSetValues()`, `MatSeqAIJSetColumnIndices()`, `MatCreateSeqAIJWithArrays()`, `MatCreateAIJ()` 287 @*/ 288 PetscErrorCode MatCreateSeqAIJViennaCL(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A) { 289 PetscFunctionBegin; 290 PetscCall(MatCreate(comm, A)); 291 PetscCall(MatSetSizes(*A, m, n, m, n)); 292 PetscCall(MatSetType(*A, MATSEQAIJVIENNACL)); 293 PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(*A, nz, (PetscInt *)nnz)); 294 PetscFunctionReturn(0); 295 } 296 297 PetscErrorCode MatDestroy_SeqAIJViennaCL(Mat A) { 298 Mat_SeqAIJViennaCL *viennaclcontainer = (Mat_SeqAIJViennaCL *)A->spptr; 299 300 PetscFunctionBegin; 301 try { 302 if (viennaclcontainer) { 303 delete viennaclcontainer->tempvec; 304 delete viennaclcontainer->mat; 305 delete viennaclcontainer->compressed_mat; 306 delete viennaclcontainer; 307 } 308 A->offloadmask = PETSC_OFFLOAD_UNALLOCATED; 309 } catch (std::exception const &ex) { SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what()); } 310 311 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqaij_seqaijviennacl_C", NULL)); 312 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaijviennacl_seqdense_C", NULL)); 313 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaijviennacl_seqaij_C", NULL)); 314 315 /* this next line is because MatDestroy tries to PetscFree spptr if it is not zero, and PetscFree only works if the memory was allocated with PetscNew or PetscMalloc, which don't call the constructor */ 316 A->spptr = 0; 317 PetscCall(MatDestroy_SeqAIJ(A)); 318 PetscFunctionReturn(0); 319 } 320 321 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJViennaCL(Mat B) { 322 PetscFunctionBegin; 323 PetscCall(MatCreate_SeqAIJ(B)); 324 PetscCall(MatConvert_SeqAIJ_SeqAIJViennaCL(B, MATSEQAIJVIENNACL, MAT_INPLACE_MATRIX, &B)); 325 PetscFunctionReturn(0); 326 } 327 328 static PetscErrorCode MatBindToCPU_SeqAIJViennaCL(Mat, PetscBool); 329 static PetscErrorCode MatDuplicate_SeqAIJViennaCL(Mat A, MatDuplicateOption cpvalues, Mat *B) { 330 Mat C; 331 332 PetscFunctionBegin; 333 PetscCall(MatDuplicate_SeqAIJ(A, cpvalues, B)); 334 C = *B; 335 336 PetscCall(MatBindToCPU_SeqAIJViennaCL(A, PETSC_FALSE)); 337 C->ops->bindtocpu = MatBindToCPU_SeqAIJViennaCL; 338 339 C->spptr = new Mat_SeqAIJViennaCL(); 340 ((Mat_SeqAIJViennaCL *)C->spptr)->tempvec = NULL; 341 ((Mat_SeqAIJViennaCL *)C->spptr)->mat = NULL; 342 ((Mat_SeqAIJViennaCL *)C->spptr)->compressed_mat = NULL; 343 344 PetscCall(PetscObjectChangeTypeName((PetscObject)C, MATSEQAIJVIENNACL)); 345 346 C->offloadmask = PETSC_OFFLOAD_UNALLOCATED; 347 348 /* If the source matrix is already assembled, copy the destination matrix to the GPU */ 349 if (C->assembled) PetscCall(MatViennaCLCopyToGPU(C)); 350 PetscFunctionReturn(0); 351 } 352 353 static PetscErrorCode MatSeqAIJGetArray_SeqAIJViennaCL(Mat A, PetscScalar *array[]) { 354 PetscFunctionBegin; 355 PetscCall(MatViennaCLCopyFromGPU(A, (const ViennaCLAIJMatrix *)NULL)); 356 *array = ((Mat_SeqAIJ *)A->data)->a; 357 PetscFunctionReturn(0); 358 } 359 360 static PetscErrorCode MatSeqAIJRestoreArray_SeqAIJViennaCL(Mat A, PetscScalar *array[]) { 361 PetscFunctionBegin; 362 A->offloadmask = PETSC_OFFLOAD_CPU; 363 *array = NULL; 364 PetscFunctionReturn(0); 365 } 366 367 static PetscErrorCode MatSeqAIJGetArrayRead_SeqAIJViennaCL(Mat A, const PetscScalar *array[]) { 368 PetscFunctionBegin; 369 PetscCall(MatViennaCLCopyFromGPU(A, (const ViennaCLAIJMatrix *)NULL)); 370 *array = ((Mat_SeqAIJ *)A->data)->a; 371 PetscFunctionReturn(0); 372 } 373 374 static PetscErrorCode MatSeqAIJRestoreArrayRead_SeqAIJViennaCL(Mat A, const PetscScalar *array[]) { 375 PetscFunctionBegin; 376 *array = NULL; 377 /* No A->offloadmask = PETSC_OFFLOAD_CPU since if A->offloadmask was PETSC_OFFLOAD_BOTH, it is still BOTH */ 378 PetscFunctionReturn(0); 379 } 380 381 static PetscErrorCode MatSeqAIJGetArrayWrite_SeqAIJViennaCL(Mat A, PetscScalar *array[]) { 382 PetscFunctionBegin; 383 *array = ((Mat_SeqAIJ *)A->data)->a; 384 PetscFunctionReturn(0); 385 } 386 387 static PetscErrorCode MatSeqAIJRestoreArrayWrite_SeqAIJViennaCL(Mat A, PetscScalar *array[]) { 388 PetscFunctionBegin; 389 A->offloadmask = PETSC_OFFLOAD_CPU; 390 *array = NULL; 391 PetscFunctionReturn(0); 392 } 393 394 static PetscErrorCode MatBindToCPU_SeqAIJViennaCL(Mat A, PetscBool flg) { 395 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 396 397 PetscFunctionBegin; 398 A->boundtocpu = flg; 399 if (flg && a->inode.size) { 400 a->inode.use = PETSC_TRUE; 401 } else { 402 a->inode.use = PETSC_FALSE; 403 } 404 if (flg) { 405 /* make sure we have an up-to-date copy on the CPU */ 406 PetscCall(MatViennaCLCopyFromGPU(A, (const ViennaCLAIJMatrix *)NULL)); 407 A->ops->mult = MatMult_SeqAIJ; 408 A->ops->multadd = MatMultAdd_SeqAIJ; 409 A->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 410 A->ops->duplicate = MatDuplicate_SeqAIJ; 411 PetscCall(PetscMemzero(a->ops, sizeof(Mat_SeqAIJOps))); 412 } else { 413 A->ops->mult = MatMult_SeqAIJViennaCL; 414 A->ops->multadd = MatMultAdd_SeqAIJViennaCL; 415 A->ops->assemblyend = MatAssemblyEnd_SeqAIJViennaCL; 416 A->ops->destroy = MatDestroy_SeqAIJViennaCL; 417 A->ops->duplicate = MatDuplicate_SeqAIJViennaCL; 418 419 a->ops->getarray = MatSeqAIJGetArray_SeqAIJViennaCL; 420 a->ops->restorearray = MatSeqAIJRestoreArray_SeqAIJViennaCL; 421 a->ops->getarrayread = MatSeqAIJGetArrayRead_SeqAIJViennaCL; 422 a->ops->restorearrayread = MatSeqAIJRestoreArrayRead_SeqAIJViennaCL; 423 a->ops->getarraywrite = MatSeqAIJGetArrayWrite_SeqAIJViennaCL; 424 a->ops->restorearraywrite = MatSeqAIJRestoreArrayWrite_SeqAIJViennaCL; 425 } 426 PetscFunctionReturn(0); 427 } 428 429 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJViennaCL(Mat A, MatType type, MatReuse reuse, Mat *newmat) { 430 Mat B; 431 432 PetscFunctionBegin; 433 PetscCheck(reuse != MAT_REUSE_MATRIX, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MAT_REUSE_MATRIX is not supported. Consider using MAT_INPLACE_MATRIX instead"); 434 435 if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, newmat)); 436 437 B = *newmat; 438 439 B->spptr = new Mat_SeqAIJViennaCL(); 440 441 ((Mat_SeqAIJViennaCL *)B->spptr)->tempvec = NULL; 442 ((Mat_SeqAIJViennaCL *)B->spptr)->mat = NULL; 443 ((Mat_SeqAIJViennaCL *)B->spptr)->compressed_mat = NULL; 444 445 PetscCall(MatBindToCPU_SeqAIJViennaCL(A, PETSC_FALSE)); 446 A->ops->bindtocpu = MatBindToCPU_SeqAIJViennaCL; 447 448 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJVIENNACL)); 449 PetscCall(PetscFree(B->defaultvectype)); 450 PetscCall(PetscStrallocpy(VECVIENNACL, &B->defaultvectype)); 451 452 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqaij_seqaijviennacl_C", MatConvert_SeqAIJ_SeqAIJViennaCL)); 453 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaijviennacl_seqdense_C", MatProductSetFromOptions_SeqAIJ_SeqDense)); 454 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaijviennacl_seqaij_C", MatProductSetFromOptions_SeqAIJ)); 455 456 B->offloadmask = PETSC_OFFLOAD_UNALLOCATED; 457 458 /* If the source matrix is already assembled, copy the destination matrix to the GPU */ 459 if (B->assembled) PetscCall(MatViennaCLCopyToGPU(B)); 460 PetscFunctionReturn(0); 461 } 462 463 /*MC 464 MATSEQAIJVIENNACL - MATAIJVIENNACL = "aijviennacl" = "seqaijviennacl" - A matrix type to be used for sparse matrices. 465 466 A matrix type type whose data resides on GPUs. These matrices are in CSR format by 467 default. All matrix calculations are performed using the ViennaCL library. 468 469 Options Database Keys: 470 + -mat_type aijviennacl - sets the matrix type to `MATSEQAIJVIENNACL` during a call to `MatSetFromOptions() 471 . -mat_viennacl_storage_format csr - sets the storage format of matrices for `MatMult()` during a call to `MatSetFromOptions()`. 472 - -mat_viennacl_mult_storage_format csr - sets the storage format of matrices for `MatMult()` during a call to `MatSetFromOptions()`. 473 474 Level: beginner 475 476 .seealso: `MatCreateSeqAIJViennaCL()`, `MATAIJVIENNACL`, `MatCreateAIJViennaCL()` 477 M*/ 478 479 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ViennaCL(void) { 480 PetscFunctionBegin; 481 PetscCall(MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_LU, MatGetFactor_seqaij_petsc)); 482 PetscCall(MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_CHOLESKY, MatGetFactor_seqaij_petsc)); 483 PetscCall(MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_ILU, MatGetFactor_seqaij_petsc)); 484 PetscCall(MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_ICC, MatGetFactor_seqaij_petsc)); 485 PetscFunctionReturn(0); 486 } 487