1 2 /* 3 Defines the basic matrix operations for the AIJ (compressed row) 4 matrix storage format. 5 */ 6 7 #include <petscconf.h> 8 #define PETSC_SKIP_IMMINTRIN_H_CUDAWORKAROUND 1 9 #include <../src/mat/impls/aij/seq/aij.h> /*I "petscmat.h" I*/ 10 #include <petscbt.h> 11 #include <../src/vec/vec/impls/dvecimpl.h> 12 #include <petsc/private/vecimpl.h> 13 14 #include <../src/mat/impls/aij/seq/seqviennacl/viennaclmatimpl.h> 15 16 #include <algorithm> 17 #include <vector> 18 #include <string> 19 20 #include "viennacl/linalg/prod.hpp" 21 22 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJViennaCL(Mat A, MatType type, MatReuse reuse, Mat *newmat); 23 PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat, MatFactorType, Mat *); 24 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqDense(Mat); 25 26 PetscErrorCode MatViennaCLCopyToGPU(Mat A) { 27 Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL *)A->spptr; 28 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 29 30 PetscFunctionBegin; 31 if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) { //some OpenCL SDKs have issues with buffers of size 0 32 if (A->offloadmask == PETSC_OFFLOAD_UNALLOCATED || A->offloadmask == PETSC_OFFLOAD_CPU) { 33 PetscCall(PetscLogEventBegin(MAT_ViennaCLCopyToGPU, A, 0, 0, 0)); 34 35 try { 36 if (a->compressedrow.use) { 37 if (!viennaclstruct->compressed_mat) viennaclstruct->compressed_mat = new ViennaCLCompressedAIJMatrix(); 38 39 // Since PetscInt is different from cl_uint, we have to convert: 40 viennacl::backend::mem_handle dummy; 41 42 viennacl::backend::typesafe_host_array<unsigned int> row_buffer; 43 row_buffer.raw_resize(dummy, a->compressedrow.nrows + 1); 44 for (PetscInt i = 0; i <= a->compressedrow.nrows; ++i) row_buffer.set(i, (a->compressedrow.i)[i]); 45 46 viennacl::backend::typesafe_host_array<unsigned int> row_indices; 47 row_indices.raw_resize(dummy, a->compressedrow.nrows); 48 for (PetscInt i = 0; i < a->compressedrow.nrows; ++i) row_indices.set(i, (a->compressedrow.rindex)[i]); 49 50 viennacl::backend::typesafe_host_array<unsigned int> col_buffer; 51 col_buffer.raw_resize(dummy, a->nz); 52 for (PetscInt i = 0; i < a->nz; ++i) col_buffer.set(i, (a->j)[i]); 53 54 viennaclstruct->compressed_mat->set(row_buffer.get(), row_indices.get(), col_buffer.get(), a->a, A->rmap->n, A->cmap->n, a->compressedrow.nrows, a->nz); 55 PetscCall(PetscLogCpuToGpu(((2 * a->compressedrow.nrows) + 1 + a->nz) * sizeof(PetscInt) + (a->nz) * sizeof(PetscScalar))); 56 } else { 57 if (!viennaclstruct->mat) viennaclstruct->mat = new ViennaCLAIJMatrix(); 58 59 // Since PetscInt is in general different from cl_uint, we have to convert: 60 viennacl::backend::mem_handle dummy; 61 62 viennacl::backend::typesafe_host_array<unsigned int> row_buffer; 63 row_buffer.raw_resize(dummy, A->rmap->n + 1); 64 for (PetscInt i = 0; i <= A->rmap->n; ++i) row_buffer.set(i, (a->i)[i]); 65 66 viennacl::backend::typesafe_host_array<unsigned int> col_buffer; 67 col_buffer.raw_resize(dummy, a->nz); 68 for (PetscInt i = 0; i < a->nz; ++i) col_buffer.set(i, (a->j)[i]); 69 70 viennaclstruct->mat->set(row_buffer.get(), col_buffer.get(), a->a, A->rmap->n, A->cmap->n, a->nz); 71 PetscCall(PetscLogCpuToGpu(((A->rmap->n + 1) + a->nz) * sizeof(PetscInt) + (a->nz) * sizeof(PetscScalar))); 72 } 73 ViennaCLWaitForGPU(); 74 } catch (std::exception const &ex) { SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what()); } 75 76 // Create temporary vector for v += A*x: 77 if (viennaclstruct->tempvec) { 78 if (viennaclstruct->tempvec->size() != static_cast<std::size_t>(A->rmap->n)) { 79 delete (ViennaCLVector *)viennaclstruct->tempvec; 80 viennaclstruct->tempvec = new ViennaCLVector(A->rmap->n); 81 } else { 82 viennaclstruct->tempvec->clear(); 83 } 84 } else { 85 viennaclstruct->tempvec = new ViennaCLVector(A->rmap->n); 86 } 87 88 A->offloadmask = PETSC_OFFLOAD_BOTH; 89 90 PetscCall(PetscLogEventEnd(MAT_ViennaCLCopyToGPU, A, 0, 0, 0)); 91 } 92 } 93 PetscFunctionReturn(0); 94 } 95 96 PetscErrorCode MatViennaCLCopyFromGPU(Mat A, const ViennaCLAIJMatrix *Agpu) { 97 Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL *)A->spptr; 98 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 99 PetscInt m = A->rmap->n; 100 101 PetscFunctionBegin; 102 if (A->offloadmask == PETSC_OFFLOAD_BOTH) PetscFunctionReturn(0); 103 if (A->offloadmask == PETSC_OFFLOAD_UNALLOCATED && Agpu) { 104 try { 105 PetscCheck(!a->compressedrow.use, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ViennaCL: Cannot handle row compression for GPU matrices"); 106 PetscCheck((PetscInt)Agpu->size1() == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "GPU matrix has %lu rows, should be %" PetscInt_FMT, Agpu->size1(), m); 107 a->nz = Agpu->nnz(); 108 a->maxnz = a->nz; /* Since we allocate exactly the right amount */ 109 A->preallocated = PETSC_TRUE; 110 if (a->singlemalloc) { 111 if (a->a) PetscCall(PetscFree3(a->a, a->j, a->i)); 112 } else { 113 if (a->i) PetscCall(PetscFree(a->i)); 114 if (a->j) PetscCall(PetscFree(a->j)); 115 if (a->a) PetscCall(PetscFree(a->a)); 116 } 117 PetscCall(PetscMalloc3(a->nz, &a->a, a->nz, &a->j, m + 1, &a->i)); 118 PetscCall(PetscLogObjectMemory((PetscObject)A, a->nz * (sizeof(PetscScalar) + sizeof(PetscInt)) + (m + 1) * sizeof(PetscInt))); 119 120 a->singlemalloc = PETSC_TRUE; 121 122 /* Setup row lengths */ 123 PetscCall(PetscFree(a->imax)); 124 PetscCall(PetscFree(a->ilen)); 125 PetscCall(PetscMalloc1(m, &a->imax)); 126 PetscCall(PetscMalloc1(m, &a->ilen)); 127 PetscCall(PetscLogObjectMemory((PetscObject)A, 2 * m * sizeof(PetscInt))); 128 129 /* Copy data back from GPU */ 130 viennacl::backend::typesafe_host_array<unsigned int> row_buffer; 131 row_buffer.raw_resize(Agpu->handle1(), Agpu->size1() + 1); 132 133 // copy row array 134 viennacl::backend::memory_read(Agpu->handle1(), 0, row_buffer.raw_size(), row_buffer.get()); 135 (a->i)[0] = row_buffer[0]; 136 for (PetscInt i = 0; i < (PetscInt)Agpu->size1(); ++i) { 137 (a->i)[i + 1] = row_buffer[i + 1]; 138 a->imax[i] = a->ilen[i] = a->i[i + 1] - a->i[i]; //Set imax[] and ilen[] arrays at the same time as i[] for better cache reuse 139 } 140 141 // copy column indices 142 viennacl::backend::typesafe_host_array<unsigned int> col_buffer; 143 col_buffer.raw_resize(Agpu->handle2(), Agpu->nnz()); 144 viennacl::backend::memory_read(Agpu->handle2(), 0, col_buffer.raw_size(), col_buffer.get()); 145 for (PetscInt i = 0; i < (PetscInt)Agpu->nnz(); ++i) (a->j)[i] = col_buffer[i]; 146 147 // copy nonzero entries directly to destination (no conversion required) 148 viennacl::backend::memory_read(Agpu->handle(), 0, sizeof(PetscScalar) * Agpu->nnz(), a->a); 149 150 PetscCall(PetscLogGpuToCpu(row_buffer.raw_size() + col_buffer.raw_size() + (Agpu->nnz() * sizeof(PetscScalar)))); 151 ViennaCLWaitForGPU(); 152 /* TODO: Once a->diag is moved out of MatAssemblyEnd(), invalidate it here. */ 153 } catch (std::exception const &ex) { SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what()); } 154 } else if (A->offloadmask == PETSC_OFFLOAD_UNALLOCATED) { 155 PetscFunctionReturn(0); 156 } else { 157 if (!Agpu && A->offloadmask != PETSC_OFFLOAD_GPU) PetscFunctionReturn(0); 158 159 PetscCheck(!a->compressedrow.use, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ViennaCL: Cannot handle row compression for GPU matrices"); 160 if (!Agpu) { 161 viennacl::backend::memory_read(viennaclstruct->mat->handle(), 0, sizeof(PetscScalar) * viennaclstruct->mat->nnz(), a->a); 162 } else { 163 viennacl::backend::memory_read(Agpu->handle(), 0, sizeof(PetscScalar) * Agpu->nnz(), a->a); 164 } 165 } 166 A->offloadmask = PETSC_OFFLOAD_BOTH; 167 /* This assembly prevents resetting the flag to PETSC_OFFLOAD_CPU and recopying */ 168 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 169 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 170 PetscFunctionReturn(0); 171 } 172 173 PetscErrorCode MatMult_SeqAIJViennaCL(Mat A, Vec xx, Vec yy) { 174 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 175 Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL *)A->spptr; 176 const ViennaCLVector *xgpu = NULL; 177 ViennaCLVector *ygpu = NULL; 178 179 PetscFunctionBegin; 180 /* The line below is necessary due to the operations that modify the matrix on the CPU (axpy, scale, etc) */ 181 PetscCall(MatViennaCLCopyToGPU(A)); 182 if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) { 183 PetscCall(VecViennaCLGetArrayRead(xx, &xgpu)); 184 PetscCall(VecViennaCLGetArrayWrite(yy, &ygpu)); 185 PetscCall(PetscLogGpuTimeBegin()); 186 try { 187 if (a->compressedrow.use) { 188 *ygpu = viennacl::linalg::prod(*viennaclstruct->compressed_mat, *xgpu); 189 } else { 190 *ygpu = viennacl::linalg::prod(*viennaclstruct->mat, *xgpu); 191 } 192 ViennaCLWaitForGPU(); 193 } catch (std::exception const &ex) { SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what()); } 194 PetscCall(PetscLogGpuTimeEnd()); 195 PetscCall(VecViennaCLRestoreArrayRead(xx, &xgpu)); 196 PetscCall(VecViennaCLRestoreArrayWrite(yy, &ygpu)); 197 PetscCall(PetscLogGpuFlops(2.0 * a->nz - a->nonzerorowcnt)); 198 } else { 199 PetscCall(VecSet_SeqViennaCL(yy, 0)); 200 } 201 PetscFunctionReturn(0); 202 } 203 204 PetscErrorCode MatMultAdd_SeqAIJViennaCL(Mat A, Vec xx, Vec yy, Vec zz) { 205 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 206 Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL *)A->spptr; 207 const ViennaCLVector *xgpu = NULL, *ygpu = NULL; 208 ViennaCLVector *zgpu = NULL; 209 210 PetscFunctionBegin; 211 /* The line below is necessary due to the operations that modify the matrix on the CPU (axpy, scale, etc) */ 212 PetscCall(MatViennaCLCopyToGPU(A)); 213 if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) { 214 try { 215 PetscCall(VecViennaCLGetArrayRead(xx, &xgpu)); 216 PetscCall(VecViennaCLGetArrayRead(yy, &ygpu)); 217 PetscCall(VecViennaCLGetArrayWrite(zz, &zgpu)); 218 PetscCall(PetscLogGpuTimeBegin()); 219 if (a->compressedrow.use) *viennaclstruct->tempvec = viennacl::linalg::prod(*viennaclstruct->compressed_mat, *xgpu); 220 else *viennaclstruct->tempvec = viennacl::linalg::prod(*viennaclstruct->mat, *xgpu); 221 if (zz != yy) *zgpu = *ygpu + *viennaclstruct->tempvec; 222 else *zgpu += *viennaclstruct->tempvec; 223 ViennaCLWaitForGPU(); 224 PetscCall(PetscLogGpuTimeEnd()); 225 PetscCall(VecViennaCLRestoreArrayRead(xx, &xgpu)); 226 PetscCall(VecViennaCLRestoreArrayRead(yy, &ygpu)); 227 PetscCall(VecViennaCLRestoreArrayWrite(zz, &zgpu)); 228 229 } catch (std::exception const &ex) { SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what()); } 230 PetscCall(PetscLogGpuFlops(2.0 * a->nz)); 231 } else { 232 PetscCall(VecCopy_SeqViennaCL(yy, zz)); 233 } 234 PetscFunctionReturn(0); 235 } 236 237 PetscErrorCode MatAssemblyEnd_SeqAIJViennaCL(Mat A, MatAssemblyType mode) { 238 PetscFunctionBegin; 239 PetscCall(MatAssemblyEnd_SeqAIJ(A, mode)); 240 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 241 if (!A->boundtocpu) PetscCall(MatViennaCLCopyToGPU(A)); 242 PetscFunctionReturn(0); 243 } 244 245 /* --------------------------------------------------------------------------------*/ 246 /*@C 247 MatCreateSeqAIJViennaCL - Creates a sparse matrix in AIJ (compressed row) format 248 (the default parallel PETSc format). This matrix will ultimately be pushed down 249 to GPUs and use the ViennaCL library for calculations. For good matrix 250 assembly performance the user should preallocate the matrix storage by setting 251 the parameter nz (or the array nnz). By setting these parameters accurately, 252 performance during matrix assembly can be increased substantially. 253 254 Collective 255 256 Input Parameters: 257 + comm - MPI communicator, set to PETSC_COMM_SELF 258 . m - number of rows 259 . n - number of columns 260 . nz - number of nonzeros per row (same for all rows) 261 - nnz - array containing the number of nonzeros in the various rows 262 (possibly different for each row) or NULL 263 264 Output Parameter: 265 . A - the matrix 266 267 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 268 MatXXXXSetPreallocation() paradigm instead of this routine directly. 269 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 270 271 Notes: 272 If nnz is given then nz is ignored 273 274 The AIJ format (also called the Yale sparse matrix format or 275 compressed row storage), is fully compatible with standard Fortran 77 276 storage. That is, the stored row and column indices can begin at 277 either one (as in Fortran) or zero. See the users' manual for details. 278 279 Specify the preallocated storage with either nz or nnz (not both). 280 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 281 allocation. For large problems you MUST preallocate memory or you 282 will get TERRIBLE performance, see the users' manual chapter on matrices. 283 284 Level: intermediate 285 286 .seealso: `MatCreate()`, `MatCreateAIJ()`, `MatCreateAIJCUSPARSE()`, `MatSetValues()`, `MatSeqAIJSetColumnIndices()`, `MatCreateSeqAIJWithArrays()`, `MatCreateAIJ()` 287 288 @*/ 289 PetscErrorCode MatCreateSeqAIJViennaCL(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A) { 290 PetscFunctionBegin; 291 PetscCall(MatCreate(comm, A)); 292 PetscCall(MatSetSizes(*A, m, n, m, n)); 293 PetscCall(MatSetType(*A, MATSEQAIJVIENNACL)); 294 PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(*A, nz, (PetscInt *)nnz)); 295 PetscFunctionReturn(0); 296 } 297 298 PetscErrorCode MatDestroy_SeqAIJViennaCL(Mat A) { 299 Mat_SeqAIJViennaCL *viennaclcontainer = (Mat_SeqAIJViennaCL *)A->spptr; 300 301 PetscFunctionBegin; 302 try { 303 if (viennaclcontainer) { 304 delete viennaclcontainer->tempvec; 305 delete viennaclcontainer->mat; 306 delete viennaclcontainer->compressed_mat; 307 delete viennaclcontainer; 308 } 309 A->offloadmask = PETSC_OFFLOAD_UNALLOCATED; 310 } catch (std::exception const &ex) { SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what()); } 311 312 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqaij_seqaijviennacl_C", NULL)); 313 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaijviennacl_seqdense_C", NULL)); 314 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaijviennacl_seqaij_C", NULL)); 315 316 /* this next line is because MatDestroy tries to PetscFree spptr if it is not zero, and PetscFree only works if the memory was allocated with PetscNew or PetscMalloc, which don't call the constructor */ 317 A->spptr = 0; 318 PetscCall(MatDestroy_SeqAIJ(A)); 319 PetscFunctionReturn(0); 320 } 321 322 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJViennaCL(Mat B) { 323 PetscFunctionBegin; 324 PetscCall(MatCreate_SeqAIJ(B)); 325 PetscCall(MatConvert_SeqAIJ_SeqAIJViennaCL(B, MATSEQAIJVIENNACL, MAT_INPLACE_MATRIX, &B)); 326 PetscFunctionReturn(0); 327 } 328 329 static PetscErrorCode MatBindToCPU_SeqAIJViennaCL(Mat, PetscBool); 330 static PetscErrorCode MatDuplicate_SeqAIJViennaCL(Mat A, MatDuplicateOption cpvalues, Mat *B) { 331 Mat C; 332 333 PetscFunctionBegin; 334 PetscCall(MatDuplicate_SeqAIJ(A, cpvalues, B)); 335 C = *B; 336 337 PetscCall(MatBindToCPU_SeqAIJViennaCL(A, PETSC_FALSE)); 338 C->ops->bindtocpu = MatBindToCPU_SeqAIJViennaCL; 339 340 C->spptr = new Mat_SeqAIJViennaCL(); 341 ((Mat_SeqAIJViennaCL *)C->spptr)->tempvec = NULL; 342 ((Mat_SeqAIJViennaCL *)C->spptr)->mat = NULL; 343 ((Mat_SeqAIJViennaCL *)C->spptr)->compressed_mat = NULL; 344 345 PetscCall(PetscObjectChangeTypeName((PetscObject)C, MATSEQAIJVIENNACL)); 346 347 C->offloadmask = PETSC_OFFLOAD_UNALLOCATED; 348 349 /* If the source matrix is already assembled, copy the destination matrix to the GPU */ 350 if (C->assembled) PetscCall(MatViennaCLCopyToGPU(C)); 351 PetscFunctionReturn(0); 352 } 353 354 static PetscErrorCode MatSeqAIJGetArray_SeqAIJViennaCL(Mat A, PetscScalar *array[]) { 355 PetscFunctionBegin; 356 PetscCall(MatViennaCLCopyFromGPU(A, (const ViennaCLAIJMatrix *)NULL)); 357 *array = ((Mat_SeqAIJ *)A->data)->a; 358 PetscFunctionReturn(0); 359 } 360 361 static PetscErrorCode MatSeqAIJRestoreArray_SeqAIJViennaCL(Mat A, PetscScalar *array[]) { 362 PetscFunctionBegin; 363 A->offloadmask = PETSC_OFFLOAD_CPU; 364 *array = NULL; 365 PetscFunctionReturn(0); 366 } 367 368 static PetscErrorCode MatSeqAIJGetArrayRead_SeqAIJViennaCL(Mat A, const PetscScalar *array[]) { 369 PetscFunctionBegin; 370 PetscCall(MatViennaCLCopyFromGPU(A, (const ViennaCLAIJMatrix *)NULL)); 371 *array = ((Mat_SeqAIJ *)A->data)->a; 372 PetscFunctionReturn(0); 373 } 374 375 static PetscErrorCode MatSeqAIJRestoreArrayRead_SeqAIJViennaCL(Mat A, const PetscScalar *array[]) { 376 PetscFunctionBegin; 377 *array = NULL; 378 /* No A->offloadmask = PETSC_OFFLOAD_CPU since if A->offloadmask was PETSC_OFFLOAD_BOTH, it is still BOTH */ 379 PetscFunctionReturn(0); 380 } 381 382 static PetscErrorCode MatSeqAIJGetArrayWrite_SeqAIJViennaCL(Mat A, PetscScalar *array[]) { 383 PetscFunctionBegin; 384 *array = ((Mat_SeqAIJ *)A->data)->a; 385 PetscFunctionReturn(0); 386 } 387 388 static PetscErrorCode MatSeqAIJRestoreArrayWrite_SeqAIJViennaCL(Mat A, PetscScalar *array[]) { 389 PetscFunctionBegin; 390 A->offloadmask = PETSC_OFFLOAD_CPU; 391 *array = NULL; 392 PetscFunctionReturn(0); 393 } 394 395 static PetscErrorCode MatBindToCPU_SeqAIJViennaCL(Mat A, PetscBool flg) { 396 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 397 398 PetscFunctionBegin; 399 A->boundtocpu = flg; 400 if (flg && a->inode.size) { 401 a->inode.use = PETSC_TRUE; 402 } else { 403 a->inode.use = PETSC_FALSE; 404 } 405 if (flg) { 406 /* make sure we have an up-to-date copy on the CPU */ 407 PetscCall(MatViennaCLCopyFromGPU(A, (const ViennaCLAIJMatrix *)NULL)); 408 A->ops->mult = MatMult_SeqAIJ; 409 A->ops->multadd = MatMultAdd_SeqAIJ; 410 A->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 411 A->ops->duplicate = MatDuplicate_SeqAIJ; 412 PetscCall(PetscMemzero(a->ops, sizeof(Mat_SeqAIJOps))); 413 } else { 414 A->ops->mult = MatMult_SeqAIJViennaCL; 415 A->ops->multadd = MatMultAdd_SeqAIJViennaCL; 416 A->ops->assemblyend = MatAssemblyEnd_SeqAIJViennaCL; 417 A->ops->destroy = MatDestroy_SeqAIJViennaCL; 418 A->ops->duplicate = MatDuplicate_SeqAIJViennaCL; 419 420 a->ops->getarray = MatSeqAIJGetArray_SeqAIJViennaCL; 421 a->ops->restorearray = MatSeqAIJRestoreArray_SeqAIJViennaCL; 422 a->ops->getarrayread = MatSeqAIJGetArrayRead_SeqAIJViennaCL; 423 a->ops->restorearrayread = MatSeqAIJRestoreArrayRead_SeqAIJViennaCL; 424 a->ops->getarraywrite = MatSeqAIJGetArrayWrite_SeqAIJViennaCL; 425 a->ops->restorearraywrite = MatSeqAIJRestoreArrayWrite_SeqAIJViennaCL; 426 } 427 PetscFunctionReturn(0); 428 } 429 430 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJViennaCL(Mat A, MatType type, MatReuse reuse, Mat *newmat) { 431 Mat B; 432 433 PetscFunctionBegin; 434 PetscCheck(reuse != MAT_REUSE_MATRIX, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MAT_REUSE_MATRIX is not supported. Consider using MAT_INPLACE_MATRIX instead"); 435 436 if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, newmat)); 437 438 B = *newmat; 439 440 B->spptr = new Mat_SeqAIJViennaCL(); 441 442 ((Mat_SeqAIJViennaCL *)B->spptr)->tempvec = NULL; 443 ((Mat_SeqAIJViennaCL *)B->spptr)->mat = NULL; 444 ((Mat_SeqAIJViennaCL *)B->spptr)->compressed_mat = NULL; 445 446 PetscCall(MatBindToCPU_SeqAIJViennaCL(A, PETSC_FALSE)); 447 A->ops->bindtocpu = MatBindToCPU_SeqAIJViennaCL; 448 449 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJVIENNACL)); 450 PetscCall(PetscFree(B->defaultvectype)); 451 PetscCall(PetscStrallocpy(VECVIENNACL, &B->defaultvectype)); 452 453 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqaij_seqaijviennacl_C", MatConvert_SeqAIJ_SeqAIJViennaCL)); 454 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaijviennacl_seqdense_C", MatProductSetFromOptions_SeqAIJ_SeqDense)); 455 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaijviennacl_seqaij_C", MatProductSetFromOptions_SeqAIJ)); 456 457 B->offloadmask = PETSC_OFFLOAD_UNALLOCATED; 458 459 /* If the source matrix is already assembled, copy the destination matrix to the GPU */ 460 if (B->assembled) PetscCall(MatViennaCLCopyToGPU(B)); 461 PetscFunctionReturn(0); 462 } 463 464 /*MC 465 MATSEQAIJVIENNACL - MATAIJVIENNACL = "aijviennacl" = "seqaijviennacl" - A matrix type to be used for sparse matrices. 466 467 A matrix type type whose data resides on GPUs. These matrices are in CSR format by 468 default. All matrix calculations are performed using the ViennaCL library. 469 470 Options Database Keys: 471 + -mat_type aijviennacl - sets the matrix type to "seqaijviennacl" during a call to MatSetFromOptions() 472 . -mat_viennacl_storage_format csr - sets the storage format of matrices for MatMult during a call to MatSetFromOptions(). 473 - -mat_viennacl_mult_storage_format csr - sets the storage format of matrices for MatMult during a call to MatSetFromOptions(). 474 475 Level: beginner 476 477 .seealso: `MatCreateSeqAIJViennaCL()`, `MATAIJVIENNACL`, `MatCreateAIJViennaCL()` 478 M*/ 479 480 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ViennaCL(void) { 481 PetscFunctionBegin; 482 PetscCall(MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_LU, MatGetFactor_seqaij_petsc)); 483 PetscCall(MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_CHOLESKY, MatGetFactor_seqaij_petsc)); 484 PetscCall(MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_ILU, MatGetFactor_seqaij_petsc)); 485 PetscCall(MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_ICC, MatGetFactor_seqaij_petsc)); 486 PetscFunctionReturn(0); 487 } 488