1 2 /* 3 Defines the basic matrix operations for the AIJ (compressed row) 4 matrix storage format. 5 */ 6 7 #include <petscconf.h> 8 #define PETSC_SKIP_IMMINTRIN_H_CUDAWORKAROUND 1 9 #include <../src/mat/impls/aij/seq/aij.h> /*I "petscmat.h" I*/ 10 #include <petscbt.h> 11 #include <../src/vec/vec/impls/dvecimpl.h> 12 #include <petsc/private/vecimpl.h> 13 14 #include <../src/mat/impls/aij/seq/seqviennacl/viennaclmatimpl.h> 15 16 #include <algorithm> 17 #include <vector> 18 #include <string> 19 20 #include "viennacl/linalg/prod.hpp" 21 22 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJViennaCL(Mat A, MatType type, MatReuse reuse, Mat *newmat); 23 PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat, MatFactorType, Mat *); 24 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqDense(Mat); 25 26 PetscErrorCode MatViennaCLCopyToGPU(Mat A) 27 { 28 Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL *)A->spptr; 29 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 30 31 PetscFunctionBegin; 32 if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) { //some OpenCL SDKs have issues with buffers of size 0 33 if (A->offloadmask == PETSC_OFFLOAD_UNALLOCATED || A->offloadmask == PETSC_OFFLOAD_CPU) { 34 PetscCall(PetscLogEventBegin(MAT_ViennaCLCopyToGPU, A, 0, 0, 0)); 35 36 try { 37 if (a->compressedrow.use) { 38 if (!viennaclstruct->compressed_mat) viennaclstruct->compressed_mat = new ViennaCLCompressedAIJMatrix(); 39 40 // Since PetscInt is different from cl_uint, we have to convert: 41 viennacl::backend::mem_handle dummy; 42 43 viennacl::backend::typesafe_host_array<unsigned int> row_buffer; 44 row_buffer.raw_resize(dummy, a->compressedrow.nrows + 1); 45 for (PetscInt i = 0; i <= a->compressedrow.nrows; ++i) row_buffer.set(i, (a->compressedrow.i)[i]); 46 47 viennacl::backend::typesafe_host_array<unsigned int> row_indices; 48 row_indices.raw_resize(dummy, a->compressedrow.nrows); 49 for (PetscInt i = 0; i < a->compressedrow.nrows; ++i) row_indices.set(i, (a->compressedrow.rindex)[i]); 50 51 viennacl::backend::typesafe_host_array<unsigned int> col_buffer; 52 col_buffer.raw_resize(dummy, a->nz); 53 for (PetscInt i = 0; i < a->nz; ++i) col_buffer.set(i, (a->j)[i]); 54 55 viennaclstruct->compressed_mat->set(row_buffer.get(), row_indices.get(), col_buffer.get(), a->a, A->rmap->n, A->cmap->n, a->compressedrow.nrows, a->nz); 56 PetscCall(PetscLogCpuToGpu(((2 * a->compressedrow.nrows) + 1 + a->nz) * sizeof(PetscInt) + (a->nz) * sizeof(PetscScalar))); 57 } else { 58 if (!viennaclstruct->mat) viennaclstruct->mat = new ViennaCLAIJMatrix(); 59 60 // Since PetscInt is in general different from cl_uint, we have to convert: 61 viennacl::backend::mem_handle dummy; 62 63 viennacl::backend::typesafe_host_array<unsigned int> row_buffer; 64 row_buffer.raw_resize(dummy, A->rmap->n + 1); 65 for (PetscInt i = 0; i <= A->rmap->n; ++i) row_buffer.set(i, (a->i)[i]); 66 67 viennacl::backend::typesafe_host_array<unsigned int> col_buffer; 68 col_buffer.raw_resize(dummy, a->nz); 69 for (PetscInt i = 0; i < a->nz; ++i) col_buffer.set(i, (a->j)[i]); 70 71 viennaclstruct->mat->set(row_buffer.get(), col_buffer.get(), a->a, A->rmap->n, A->cmap->n, a->nz); 72 PetscCall(PetscLogCpuToGpu(((A->rmap->n + 1) + a->nz) * sizeof(PetscInt) + (a->nz) * sizeof(PetscScalar))); 73 } 74 ViennaCLWaitForGPU(); 75 } catch (std::exception const &ex) { 76 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what()); 77 } 78 79 // Create temporary vector for v += A*x: 80 if (viennaclstruct->tempvec) { 81 if (viennaclstruct->tempvec->size() != static_cast<std::size_t>(A->rmap->n)) { 82 delete (ViennaCLVector *)viennaclstruct->tempvec; 83 viennaclstruct->tempvec = new ViennaCLVector(A->rmap->n); 84 } else { 85 viennaclstruct->tempvec->clear(); 86 } 87 } else { 88 viennaclstruct->tempvec = new ViennaCLVector(A->rmap->n); 89 } 90 91 A->offloadmask = PETSC_OFFLOAD_BOTH; 92 93 PetscCall(PetscLogEventEnd(MAT_ViennaCLCopyToGPU, A, 0, 0, 0)); 94 } 95 } 96 PetscFunctionReturn(PETSC_SUCCESS); 97 } 98 99 PetscErrorCode MatViennaCLCopyFromGPU(Mat A, const ViennaCLAIJMatrix *Agpu) 100 { 101 Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL *)A->spptr; 102 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 103 PetscInt m = A->rmap->n; 104 105 PetscFunctionBegin; 106 if (A->offloadmask == PETSC_OFFLOAD_BOTH) PetscFunctionReturn(PETSC_SUCCESS); 107 if (A->offloadmask == PETSC_OFFLOAD_UNALLOCATED && Agpu) { 108 try { 109 PetscCheck(!a->compressedrow.use, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ViennaCL: Cannot handle row compression for GPU matrices"); 110 PetscCheck((PetscInt)Agpu->size1() == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "GPU matrix has %lu rows, should be %" PetscInt_FMT, Agpu->size1(), m); 111 a->nz = Agpu->nnz(); 112 a->maxnz = a->nz; /* Since we allocate exactly the right amount */ 113 A->preallocated = PETSC_TRUE; 114 if (a->singlemalloc) { 115 if (a->a) PetscCall(PetscFree3(a->a, a->j, a->i)); 116 } else { 117 if (a->i) PetscCall(PetscFree(a->i)); 118 if (a->j) PetscCall(PetscFree(a->j)); 119 if (a->a) PetscCall(PetscFree(a->a)); 120 } 121 PetscCall(PetscMalloc3(a->nz, &a->a, a->nz, &a->j, m + 1, &a->i)); 122 123 a->singlemalloc = PETSC_TRUE; 124 125 /* Setup row lengths */ 126 PetscCall(PetscFree(a->imax)); 127 PetscCall(PetscFree(a->ilen)); 128 PetscCall(PetscMalloc1(m, &a->imax)); 129 PetscCall(PetscMalloc1(m, &a->ilen)); 130 131 /* Copy data back from GPU */ 132 viennacl::backend::typesafe_host_array<unsigned int> row_buffer; 133 row_buffer.raw_resize(Agpu->handle1(), Agpu->size1() + 1); 134 135 // copy row array 136 viennacl::backend::memory_read(Agpu->handle1(), 0, row_buffer.raw_size(), row_buffer.get()); 137 (a->i)[0] = row_buffer[0]; 138 for (PetscInt i = 0; i < (PetscInt)Agpu->size1(); ++i) { 139 (a->i)[i + 1] = row_buffer[i + 1]; 140 a->imax[i] = a->ilen[i] = a->i[i + 1] - a->i[i]; //Set imax[] and ilen[] arrays at the same time as i[] for better cache reuse 141 } 142 143 // copy column indices 144 viennacl::backend::typesafe_host_array<unsigned int> col_buffer; 145 col_buffer.raw_resize(Agpu->handle2(), Agpu->nnz()); 146 viennacl::backend::memory_read(Agpu->handle2(), 0, col_buffer.raw_size(), col_buffer.get()); 147 for (PetscInt i = 0; i < (PetscInt)Agpu->nnz(); ++i) (a->j)[i] = col_buffer[i]; 148 149 // copy nonzero entries directly to destination (no conversion required) 150 viennacl::backend::memory_read(Agpu->handle(), 0, sizeof(PetscScalar) * Agpu->nnz(), a->a); 151 152 PetscCall(PetscLogGpuToCpu(row_buffer.raw_size() + col_buffer.raw_size() + (Agpu->nnz() * sizeof(PetscScalar)))); 153 ViennaCLWaitForGPU(); 154 /* TODO: Once a->diag is moved out of MatAssemblyEnd(), invalidate it here. */ 155 } catch (std::exception const &ex) { 156 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what()); 157 } 158 } else if (A->offloadmask == PETSC_OFFLOAD_UNALLOCATED) { 159 PetscFunctionReturn(PETSC_SUCCESS); 160 } else { 161 if (!Agpu && A->offloadmask != PETSC_OFFLOAD_GPU) PetscFunctionReturn(PETSC_SUCCESS); 162 163 PetscCheck(!a->compressedrow.use, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ViennaCL: Cannot handle row compression for GPU matrices"); 164 if (!Agpu) { 165 viennacl::backend::memory_read(viennaclstruct->mat->handle(), 0, sizeof(PetscScalar) * viennaclstruct->mat->nnz(), a->a); 166 } else { 167 viennacl::backend::memory_read(Agpu->handle(), 0, sizeof(PetscScalar) * Agpu->nnz(), a->a); 168 } 169 } 170 A->offloadmask = PETSC_OFFLOAD_BOTH; 171 /* This assembly prevents resetting the flag to PETSC_OFFLOAD_CPU and recopying */ 172 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 173 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 174 PetscFunctionReturn(PETSC_SUCCESS); 175 } 176 177 PetscErrorCode MatMult_SeqAIJViennaCL(Mat A, Vec xx, Vec yy) 178 { 179 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 180 Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL *)A->spptr; 181 const ViennaCLVector *xgpu = NULL; 182 ViennaCLVector *ygpu = NULL; 183 184 PetscFunctionBegin; 185 /* The line below is necessary due to the operations that modify the matrix on the CPU (axpy, scale, etc) */ 186 PetscCall(MatViennaCLCopyToGPU(A)); 187 if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) { 188 PetscCall(VecViennaCLGetArrayRead(xx, &xgpu)); 189 PetscCall(VecViennaCLGetArrayWrite(yy, &ygpu)); 190 PetscCall(PetscLogGpuTimeBegin()); 191 try { 192 if (a->compressedrow.use) { 193 *ygpu = viennacl::linalg::prod(*viennaclstruct->compressed_mat, *xgpu); 194 } else { 195 *ygpu = viennacl::linalg::prod(*viennaclstruct->mat, *xgpu); 196 } 197 ViennaCLWaitForGPU(); 198 } catch (std::exception const &ex) { 199 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what()); 200 } 201 PetscCall(PetscLogGpuTimeEnd()); 202 PetscCall(VecViennaCLRestoreArrayRead(xx, &xgpu)); 203 PetscCall(VecViennaCLRestoreArrayWrite(yy, &ygpu)); 204 PetscCall(PetscLogGpuFlops(2.0 * a->nz - a->nonzerorowcnt)); 205 } else { 206 PetscCall(VecSet_SeqViennaCL(yy, 0)); 207 } 208 PetscFunctionReturn(PETSC_SUCCESS); 209 } 210 211 PetscErrorCode MatMultAdd_SeqAIJViennaCL(Mat A, Vec xx, Vec yy, Vec zz) 212 { 213 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 214 Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL *)A->spptr; 215 const ViennaCLVector *xgpu = NULL, *ygpu = NULL; 216 ViennaCLVector *zgpu = NULL; 217 218 PetscFunctionBegin; 219 /* The line below is necessary due to the operations that modify the matrix on the CPU (axpy, scale, etc) */ 220 PetscCall(MatViennaCLCopyToGPU(A)); 221 if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) { 222 try { 223 PetscCall(VecViennaCLGetArrayRead(xx, &xgpu)); 224 PetscCall(VecViennaCLGetArrayRead(yy, &ygpu)); 225 PetscCall(VecViennaCLGetArrayWrite(zz, &zgpu)); 226 PetscCall(PetscLogGpuTimeBegin()); 227 if (a->compressedrow.use) *viennaclstruct->tempvec = viennacl::linalg::prod(*viennaclstruct->compressed_mat, *xgpu); 228 else *viennaclstruct->tempvec = viennacl::linalg::prod(*viennaclstruct->mat, *xgpu); 229 if (zz != yy) *zgpu = *ygpu + *viennaclstruct->tempvec; 230 else *zgpu += *viennaclstruct->tempvec; 231 ViennaCLWaitForGPU(); 232 PetscCall(PetscLogGpuTimeEnd()); 233 PetscCall(VecViennaCLRestoreArrayRead(xx, &xgpu)); 234 PetscCall(VecViennaCLRestoreArrayRead(yy, &ygpu)); 235 PetscCall(VecViennaCLRestoreArrayWrite(zz, &zgpu)); 236 237 } catch (std::exception const &ex) { 238 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what()); 239 } 240 PetscCall(PetscLogGpuFlops(2.0 * a->nz)); 241 } else { 242 PetscCall(VecCopy_SeqViennaCL(yy, zz)); 243 } 244 PetscFunctionReturn(PETSC_SUCCESS); 245 } 246 247 PetscErrorCode MatAssemblyEnd_SeqAIJViennaCL(Mat A, MatAssemblyType mode) 248 { 249 PetscFunctionBegin; 250 PetscCall(MatAssemblyEnd_SeqAIJ(A, mode)); 251 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(PETSC_SUCCESS); 252 if (!A->boundtocpu) PetscCall(MatViennaCLCopyToGPU(A)); 253 PetscFunctionReturn(PETSC_SUCCESS); 254 } 255 256 /* --------------------------------------------------------------------------------*/ 257 /*@C 258 MatCreateSeqAIJViennaCL - Creates a sparse matrix in `MATSEQAIJVIENNACL` (compressed row) format 259 (the default parallel PETSc format). This matrix will ultimately be pushed down 260 to GPUs and use the ViennaCL library for calculations. For good matrix 261 assembly performance the user should preallocate the matrix storage by setting 262 the parameter nz (or the array nnz). By setting these parameters accurately, 263 performance during matrix assembly can be increased substantially. 264 265 Collective 266 267 Input Parameters: 268 + comm - MPI communicator, set to `PETSC_COMM_SELF` 269 . m - number of rows 270 . n - number of columns 271 . nz - number of nonzeros per row (same for all rows) 272 - nnz - array containing the number of nonzeros in the various rows 273 (possibly different for each row) or NULL 274 275 Output Parameter: 276 . A - the matrix 277 278 It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 279 MatXXXXSetPreallocation() paradigm instead of this routine directly. 280 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 281 282 Notes: 283 If nnz is given then nz is ignored 284 285 The AIJ format, also called 286 compressed row storage, is fully compatible with standard Fortran 287 storage. That is, the stored row and column indices can begin at 288 either one (as in Fortran) or zero. See the users' manual for details. 289 290 Specify the preallocated storage with either nz or nnz (not both). 291 Set nz = `PETSC_DEFAULT` and nnz = NULL for PETSc to control dynamic memory 292 allocation. For large problems you MUST preallocate memory or you 293 will get TERRIBLE performance, see the users' manual chapter on matrices. 294 295 Level: intermediate 296 297 .seealso: `MATSEQAIJVIENNACL`, `MatCreate()`, `MatCreateAIJ()`, `MatCreateAIJCUSPARSE()`, `MatSetValues()`, `MatSeqAIJSetColumnIndices()`, `MatCreateSeqAIJWithArrays()`, `MatCreateAIJ()` 298 @*/ 299 PetscErrorCode MatCreateSeqAIJViennaCL(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A) 300 { 301 PetscFunctionBegin; 302 PetscCall(MatCreate(comm, A)); 303 PetscCall(MatSetSizes(*A, m, n, m, n)); 304 PetscCall(MatSetType(*A, MATSEQAIJVIENNACL)); 305 PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(*A, nz, (PetscInt *)nnz)); 306 PetscFunctionReturn(PETSC_SUCCESS); 307 } 308 309 PetscErrorCode MatDestroy_SeqAIJViennaCL(Mat A) 310 { 311 Mat_SeqAIJViennaCL *viennaclcontainer = (Mat_SeqAIJViennaCL *)A->spptr; 312 313 PetscFunctionBegin; 314 try { 315 if (viennaclcontainer) { 316 delete viennaclcontainer->tempvec; 317 delete viennaclcontainer->mat; 318 delete viennaclcontainer->compressed_mat; 319 delete viennaclcontainer; 320 } 321 A->offloadmask = PETSC_OFFLOAD_UNALLOCATED; 322 } catch (std::exception const &ex) { 323 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what()); 324 } 325 326 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqaij_seqaijviennacl_C", NULL)); 327 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaijviennacl_seqdense_C", NULL)); 328 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaijviennacl_seqaij_C", NULL)); 329 330 /* this next line is because MatDestroy tries to PetscFree spptr if it is not zero, and PetscFree only works if the memory was allocated with PetscNew or PetscMalloc, which don't call the constructor */ 331 A->spptr = 0; 332 PetscCall(MatDestroy_SeqAIJ(A)); 333 PetscFunctionReturn(PETSC_SUCCESS); 334 } 335 336 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJViennaCL(Mat B) 337 { 338 PetscFunctionBegin; 339 PetscCall(MatCreate_SeqAIJ(B)); 340 PetscCall(MatConvert_SeqAIJ_SeqAIJViennaCL(B, MATSEQAIJVIENNACL, MAT_INPLACE_MATRIX, &B)); 341 PetscFunctionReturn(PETSC_SUCCESS); 342 } 343 344 static PetscErrorCode MatBindToCPU_SeqAIJViennaCL(Mat, PetscBool); 345 static PetscErrorCode MatDuplicate_SeqAIJViennaCL(Mat A, MatDuplicateOption cpvalues, Mat *B) 346 { 347 Mat C; 348 349 PetscFunctionBegin; 350 PetscCall(MatDuplicate_SeqAIJ(A, cpvalues, B)); 351 C = *B; 352 353 PetscCall(MatBindToCPU_SeqAIJViennaCL(A, PETSC_FALSE)); 354 C->ops->bindtocpu = MatBindToCPU_SeqAIJViennaCL; 355 356 C->spptr = new Mat_SeqAIJViennaCL(); 357 ((Mat_SeqAIJViennaCL *)C->spptr)->tempvec = NULL; 358 ((Mat_SeqAIJViennaCL *)C->spptr)->mat = NULL; 359 ((Mat_SeqAIJViennaCL *)C->spptr)->compressed_mat = NULL; 360 361 PetscCall(PetscObjectChangeTypeName((PetscObject)C, MATSEQAIJVIENNACL)); 362 363 C->offloadmask = PETSC_OFFLOAD_UNALLOCATED; 364 365 /* If the source matrix is already assembled, copy the destination matrix to the GPU */ 366 if (C->assembled) PetscCall(MatViennaCLCopyToGPU(C)); 367 PetscFunctionReturn(PETSC_SUCCESS); 368 } 369 370 static PetscErrorCode MatSeqAIJGetArray_SeqAIJViennaCL(Mat A, PetscScalar *array[]) 371 { 372 PetscFunctionBegin; 373 PetscCall(MatViennaCLCopyFromGPU(A, (const ViennaCLAIJMatrix *)NULL)); 374 *array = ((Mat_SeqAIJ *)A->data)->a; 375 PetscFunctionReturn(PETSC_SUCCESS); 376 } 377 378 static PetscErrorCode MatSeqAIJRestoreArray_SeqAIJViennaCL(Mat A, PetscScalar *array[]) 379 { 380 PetscFunctionBegin; 381 A->offloadmask = PETSC_OFFLOAD_CPU; 382 *array = NULL; 383 PetscFunctionReturn(PETSC_SUCCESS); 384 } 385 386 static PetscErrorCode MatSeqAIJGetArrayRead_SeqAIJViennaCL(Mat A, const PetscScalar *array[]) 387 { 388 PetscFunctionBegin; 389 PetscCall(MatViennaCLCopyFromGPU(A, (const ViennaCLAIJMatrix *)NULL)); 390 *array = ((Mat_SeqAIJ *)A->data)->a; 391 PetscFunctionReturn(PETSC_SUCCESS); 392 } 393 394 static PetscErrorCode MatSeqAIJRestoreArrayRead_SeqAIJViennaCL(Mat A, const PetscScalar *array[]) 395 { 396 PetscFunctionBegin; 397 *array = NULL; 398 /* No A->offloadmask = PETSC_OFFLOAD_CPU since if A->offloadmask was PETSC_OFFLOAD_BOTH, it is still BOTH */ 399 PetscFunctionReturn(PETSC_SUCCESS); 400 } 401 402 static PetscErrorCode MatSeqAIJGetArrayWrite_SeqAIJViennaCL(Mat A, PetscScalar *array[]) 403 { 404 PetscFunctionBegin; 405 *array = ((Mat_SeqAIJ *)A->data)->a; 406 PetscFunctionReturn(PETSC_SUCCESS); 407 } 408 409 static PetscErrorCode MatSeqAIJRestoreArrayWrite_SeqAIJViennaCL(Mat A, PetscScalar *array[]) 410 { 411 PetscFunctionBegin; 412 A->offloadmask = PETSC_OFFLOAD_CPU; 413 *array = NULL; 414 PetscFunctionReturn(PETSC_SUCCESS); 415 } 416 417 static PetscErrorCode MatBindToCPU_SeqAIJViennaCL(Mat A, PetscBool flg) 418 { 419 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 420 421 PetscFunctionBegin; 422 A->boundtocpu = flg; 423 if (flg && a->inode.size) { 424 a->inode.use = PETSC_TRUE; 425 } else { 426 a->inode.use = PETSC_FALSE; 427 } 428 if (flg) { 429 /* make sure we have an up-to-date copy on the CPU */ 430 PetscCall(MatViennaCLCopyFromGPU(A, (const ViennaCLAIJMatrix *)NULL)); 431 A->ops->mult = MatMult_SeqAIJ; 432 A->ops->multadd = MatMultAdd_SeqAIJ; 433 A->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 434 A->ops->duplicate = MatDuplicate_SeqAIJ; 435 PetscCall(PetscMemzero(a->ops, sizeof(Mat_SeqAIJOps))); 436 } else { 437 A->ops->mult = MatMult_SeqAIJViennaCL; 438 A->ops->multadd = MatMultAdd_SeqAIJViennaCL; 439 A->ops->assemblyend = MatAssemblyEnd_SeqAIJViennaCL; 440 A->ops->destroy = MatDestroy_SeqAIJViennaCL; 441 A->ops->duplicate = MatDuplicate_SeqAIJViennaCL; 442 443 a->ops->getarray = MatSeqAIJGetArray_SeqAIJViennaCL; 444 a->ops->restorearray = MatSeqAIJRestoreArray_SeqAIJViennaCL; 445 a->ops->getarrayread = MatSeqAIJGetArrayRead_SeqAIJViennaCL; 446 a->ops->restorearrayread = MatSeqAIJRestoreArrayRead_SeqAIJViennaCL; 447 a->ops->getarraywrite = MatSeqAIJGetArrayWrite_SeqAIJViennaCL; 448 a->ops->restorearraywrite = MatSeqAIJRestoreArrayWrite_SeqAIJViennaCL; 449 } 450 PetscFunctionReturn(PETSC_SUCCESS); 451 } 452 453 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJViennaCL(Mat A, MatType type, MatReuse reuse, Mat *newmat) 454 { 455 Mat B; 456 457 PetscFunctionBegin; 458 PetscCheck(reuse != MAT_REUSE_MATRIX, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MAT_REUSE_MATRIX is not supported. Consider using MAT_INPLACE_MATRIX instead"); 459 460 if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, newmat)); 461 462 B = *newmat; 463 464 B->spptr = new Mat_SeqAIJViennaCL(); 465 466 ((Mat_SeqAIJViennaCL *)B->spptr)->tempvec = NULL; 467 ((Mat_SeqAIJViennaCL *)B->spptr)->mat = NULL; 468 ((Mat_SeqAIJViennaCL *)B->spptr)->compressed_mat = NULL; 469 470 PetscCall(MatBindToCPU_SeqAIJViennaCL(A, PETSC_FALSE)); 471 A->ops->bindtocpu = MatBindToCPU_SeqAIJViennaCL; 472 473 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJVIENNACL)); 474 PetscCall(PetscFree(B->defaultvectype)); 475 PetscCall(PetscStrallocpy(VECVIENNACL, &B->defaultvectype)); 476 477 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqaij_seqaijviennacl_C", MatConvert_SeqAIJ_SeqAIJViennaCL)); 478 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaijviennacl_seqdense_C", MatProductSetFromOptions_SeqAIJ_SeqDense)); 479 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaijviennacl_seqaij_C", MatProductSetFromOptions_SeqAIJ)); 480 481 B->offloadmask = PETSC_OFFLOAD_UNALLOCATED; 482 483 /* If the source matrix is already assembled, copy the destination matrix to the GPU */ 484 if (B->assembled) PetscCall(MatViennaCLCopyToGPU(B)); 485 PetscFunctionReturn(PETSC_SUCCESS); 486 } 487 488 /*MC 489 MATSEQAIJVIENNACL - MATAIJVIENNACL = "aijviennacl" = "seqaijviennacl" - A matrix type to be used for sparse matrices. 490 491 A matrix type type whose data resides on GPUs. These matrices are in CSR format by 492 default. All matrix calculations are performed using the ViennaCL library. 493 494 Options Database Keys: 495 + -mat_type aijviennacl - sets the matrix type to `MATSEQAIJVIENNACL` during a call to `MatSetFromOptions() 496 . -mat_viennacl_storage_format csr - sets the storage format of matrices for `MatMult()` during a call to `MatSetFromOptions()`. 497 - -mat_viennacl_mult_storage_format csr - sets the storage format of matrices for `MatMult()` during a call to `MatSetFromOptions()`. 498 499 Level: beginner 500 501 .seealso: `MatCreateSeqAIJViennaCL()`, `MATAIJVIENNACL`, `MatCreateAIJViennaCL()` 502 M*/ 503 504 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ViennaCL(void) 505 { 506 PetscFunctionBegin; 507 PetscCall(MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_LU, MatGetFactor_seqaij_petsc)); 508 PetscCall(MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_CHOLESKY, MatGetFactor_seqaij_petsc)); 509 PetscCall(MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_ILU, MatGetFactor_seqaij_petsc)); 510 PetscCall(MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_ICC, MatGetFactor_seqaij_petsc)); 511 PetscFunctionReturn(PETSC_SUCCESS); 512 } 513