1 2 3 /* 4 Defines the basic matrix operations for the AIJ (compressed row) 5 matrix storage format. 6 */ 7 8 #include <petscconf.h> 9 #include <../src/mat/impls/aij/seq/aij.h> /*I "petscmat.h" I*/ 10 #include <petscbt.h> 11 #include <../src/vec/vec/impls/dvecimpl.h> 12 #include <petsc/private/vecimpl.h> 13 14 #include <../src/mat/impls/aij/seq/seqviennacl/viennaclmatimpl.h> 15 16 17 #include <algorithm> 18 #include <vector> 19 #include <string> 20 21 #include "viennacl/linalg/prod.hpp" 22 23 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJViennaCL(Mat A, MatType type, MatReuse reuse, Mat *newmat); 24 PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat,MatFactorType,Mat*); 25 26 27 PetscErrorCode MatViennaCLCopyToGPU(Mat A) 28 { 29 30 Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr; 31 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 32 PetscErrorCode ierr; 33 34 PetscFunctionBegin; 35 if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) { //some OpenCL SDKs have issues with buffers of size 0 36 if (A->valid_GPU_matrix == PETSC_OFFLOAD_UNALLOCATED || A->valid_GPU_matrix == PETSC_OFFLOAD_CPU) { 37 ierr = PetscLogEventBegin(MAT_ViennaCLCopyToGPU,A,0,0,0);CHKERRQ(ierr); 38 39 try { 40 if (a->compressedrow.use) { 41 if (!viennaclstruct->compressed_mat) viennaclstruct->compressed_mat = new ViennaCLCompressedAIJMatrix(); 42 43 // Since PetscInt is different from cl_uint, we have to convert: 44 viennacl::backend::mem_handle dummy; 45 46 viennacl::backend::typesafe_host_array<unsigned int> row_buffer; row_buffer.raw_resize(dummy, a->compressedrow.nrows+1); 47 for (PetscInt i=0; i<=a->compressedrow.nrows; ++i) 48 row_buffer.set(i, (a->compressedrow.i)[i]); 49 50 viennacl::backend::typesafe_host_array<unsigned int> row_indices; row_indices.raw_resize(dummy, a->compressedrow.nrows); 51 for (PetscInt i=0; i<a->compressedrow.nrows; ++i) 52 row_indices.set(i, (a->compressedrow.rindex)[i]); 53 54 viennacl::backend::typesafe_host_array<unsigned int> col_buffer; col_buffer.raw_resize(dummy, a->nz); 55 for (PetscInt i=0; i<a->nz; ++i) 56 col_buffer.set(i, (a->j)[i]); 57 58 viennaclstruct->compressed_mat->set(row_buffer.get(), row_indices.get(), col_buffer.get(), a->a, A->rmap->n, A->cmap->n, a->compressedrow.nrows, a->nz); 59 ierr = PetscLogCpuToGpu(((2*a->compressedrow.nrows)+1+a->nz)*sizeof(PetscInt) + (a->nz)*sizeof(PetscScalar));CHKERRQ(ierr); 60 } else { 61 if (!viennaclstruct->mat) viennaclstruct->mat = new ViennaCLAIJMatrix(); 62 63 // Since PetscInt is in general different from cl_uint, we have to convert: 64 viennacl::backend::mem_handle dummy; 65 66 viennacl::backend::typesafe_host_array<unsigned int> row_buffer; row_buffer.raw_resize(dummy, A->rmap->n+1); 67 for (PetscInt i=0; i<=A->rmap->n; ++i) 68 row_buffer.set(i, (a->i)[i]); 69 70 viennacl::backend::typesafe_host_array<unsigned int> col_buffer; col_buffer.raw_resize(dummy, a->nz); 71 for (PetscInt i=0; i<a->nz; ++i) 72 col_buffer.set(i, (a->j)[i]); 73 74 viennaclstruct->mat->set(row_buffer.get(), col_buffer.get(), a->a, A->rmap->n, A->cmap->n, a->nz); 75 ierr = PetscLogCpuToGpu(((A->rmap->n+1)+a->nz)*sizeof(PetscInt)+(a->nz)*sizeof(PetscScalar));CHKERRQ(ierr); 76 } 77 ViennaCLWaitForGPU(); 78 } catch(std::exception const & ex) { 79 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what()); 80 } 81 82 // Create temporary vector for v += A*x: 83 if (viennaclstruct->tempvec) { 84 if (viennaclstruct->tempvec->size() != static_cast<std::size_t>(A->rmap->n)) { 85 delete (ViennaCLVector*)viennaclstruct->tempvec; 86 viennaclstruct->tempvec = new ViennaCLVector(A->rmap->n); 87 } else { 88 viennaclstruct->tempvec->clear(); 89 } 90 } else { 91 viennaclstruct->tempvec = new ViennaCLVector(A->rmap->n); 92 } 93 94 A->valid_GPU_matrix = PETSC_OFFLOAD_BOTH; 95 96 ierr = PetscLogEventEnd(MAT_ViennaCLCopyToGPU,A,0,0,0);CHKERRQ(ierr); 97 } 98 } 99 PetscFunctionReturn(0); 100 } 101 102 PetscErrorCode MatViennaCLCopyFromGPU(Mat A, const ViennaCLAIJMatrix *Agpu) 103 { 104 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 105 PetscInt m = A->rmap->n; 106 PetscErrorCode ierr; 107 108 109 PetscFunctionBegin; 110 if (A->valid_GPU_matrix == PETSC_OFFLOAD_UNALLOCATED) { 111 try { 112 if (a->compressedrow.use) SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_ARG_WRONG, "ViennaCL: Cannot handle row compression for GPU matrices"); 113 else { 114 115 if ((PetscInt)Agpu->size1() != m) SETERRQ2(PETSC_COMM_WORLD, PETSC_ERR_ARG_SIZ, "GPU matrix has %d rows, should be %d", Agpu->size1(), m); 116 a->nz = Agpu->nnz(); 117 a->maxnz = a->nz; /* Since we allocate exactly the right amount */ 118 A->preallocated = PETSC_TRUE; 119 if (a->singlemalloc) { 120 if (a->a) {ierr = PetscFree3(a->a,a->j,a->i);CHKERRQ(ierr);} 121 } else { 122 if (a->i) {ierr = PetscFree(a->i);CHKERRQ(ierr);} 123 if (a->j) {ierr = PetscFree(a->j);CHKERRQ(ierr);} 124 if (a->a) {ierr = PetscFree(a->a);CHKERRQ(ierr);} 125 } 126 ierr = PetscMalloc3(a->nz,&a->a,a->nz,&a->j,m+1,&a->i);CHKERRQ(ierr); 127 ierr = PetscLogObjectMemory((PetscObject)A, a->nz*(sizeof(PetscScalar)+sizeof(PetscInt))+(m+1)*sizeof(PetscInt));CHKERRQ(ierr); 128 129 a->singlemalloc = PETSC_TRUE; 130 131 /* Setup row lengths */ 132 if (a->imax) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 133 ierr = PetscMalloc2(m,&a->imax,m,&a->ilen);CHKERRQ(ierr); 134 ierr = PetscLogObjectMemory((PetscObject)A, 2*m*sizeof(PetscInt));CHKERRQ(ierr); 135 136 /* Copy data back from GPU */ 137 viennacl::backend::typesafe_host_array<unsigned int> row_buffer; row_buffer.raw_resize(Agpu->handle1(), Agpu->size1() + 1); 138 139 // copy row array 140 viennacl::backend::memory_read(Agpu->handle1(), 0, row_buffer.raw_size(), row_buffer.get()); 141 (a->i)[0] = row_buffer[0]; 142 for (PetscInt i = 0; i < (PetscInt)Agpu->size1(); ++i) { 143 (a->i)[i+1] = row_buffer[i+1]; 144 a->imax[i] = a->ilen[i] = a->i[i+1] - a->i[i]; //Set imax[] and ilen[] arrays at the same time as i[] for better cache reuse 145 } 146 147 // copy column indices 148 viennacl::backend::typesafe_host_array<unsigned int> col_buffer; col_buffer.raw_resize(Agpu->handle2(), Agpu->nnz()); 149 viennacl::backend::memory_read(Agpu->handle2(), 0, col_buffer.raw_size(), col_buffer.get()); 150 for (PetscInt i=0; i < (PetscInt)Agpu->nnz(); ++i) 151 (a->j)[i] = col_buffer[i]; 152 153 // copy nonzero entries directly to destination (no conversion required) 154 viennacl::backend::memory_read(Agpu->handle(), 0, sizeof(PetscScalar)*Agpu->nnz(), a->a); 155 156 ierr = PetscLogGpuToCpu(row_buffer.raw_size()+col_buffer.raw_size()+(Agpu->nnz()*sizeof(PetscScalar)));CHKERRQ(ierr); 157 ViennaCLWaitForGPU(); 158 /* TODO: Once a->diag is moved out of MatAssemblyEnd(), invalidate it here. */ 159 } 160 } catch(std::exception const & ex) { 161 SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what()); 162 } 163 164 /* This assembly prevents resetting the flag to PETSC_OFFLOAD_CPU and recopying */ 165 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 166 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 167 168 A->valid_GPU_matrix = PETSC_OFFLOAD_BOTH; 169 } else SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_ARG_WRONG, "ViennaCL error: Only valid for unallocated GPU matrices"); 170 PetscFunctionReturn(0); 171 } 172 173 PetscErrorCode MatMult_SeqAIJViennaCL(Mat A,Vec xx,Vec yy) 174 { 175 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 176 PetscErrorCode ierr; 177 Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr; 178 const ViennaCLVector *xgpu=NULL; 179 ViennaCLVector *ygpu=NULL; 180 181 PetscFunctionBegin; 182 if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) { 183 ierr = VecViennaCLGetArrayRead(xx,&xgpu);CHKERRQ(ierr); 184 ierr = VecViennaCLGetArrayWrite(yy,&ygpu);CHKERRQ(ierr); 185 try { 186 if (a->compressedrow.use) { 187 *ygpu = viennacl::linalg::prod(*viennaclstruct->compressed_mat, *xgpu); 188 } else { 189 *ygpu = viennacl::linalg::prod(*viennaclstruct->mat,*xgpu); 190 } 191 ViennaCLWaitForGPU(); 192 } catch (std::exception const & ex) { 193 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what()); 194 } 195 ierr = VecViennaCLRestoreArrayRead(xx,&xgpu);CHKERRQ(ierr); 196 ierr = VecViennaCLRestoreArrayWrite(yy,&ygpu);CHKERRQ(ierr); 197 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 198 } else { 199 ierr = VecSet(yy,0);CHKERRQ(ierr); 200 } 201 PetscFunctionReturn(0); 202 } 203 204 PetscErrorCode MatMultAdd_SeqAIJViennaCL(Mat A,Vec xx,Vec yy,Vec zz) 205 { 206 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 207 PetscErrorCode ierr; 208 Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr; 209 const ViennaCLVector *xgpu=NULL,*ygpu=NULL; 210 ViennaCLVector *zgpu=NULL; 211 212 PetscFunctionBegin; 213 if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) { 214 try { 215 ierr = VecViennaCLGetArrayRead(xx,&xgpu);CHKERRQ(ierr); 216 ierr = VecViennaCLGetArrayRead(yy,&ygpu);CHKERRQ(ierr); 217 ierr = VecViennaCLGetArrayWrite(zz,&zgpu);CHKERRQ(ierr); 218 219 if (a->compressedrow.use) { 220 ViennaCLVector temp = viennacl::linalg::prod(*viennaclstruct->compressed_mat, *xgpu); 221 *zgpu = *ygpu + temp; 222 ViennaCLWaitForGPU(); 223 } else { 224 if (zz == xx || zz == yy) { //temporary required 225 ViennaCLVector temp = viennacl::linalg::prod(*viennaclstruct->mat, *xgpu); 226 *zgpu = *ygpu; 227 *zgpu += temp; 228 ViennaCLWaitForGPU(); 229 } else { 230 *viennaclstruct->tempvec = viennacl::linalg::prod(*viennaclstruct->mat, *xgpu); 231 *zgpu = *ygpu + *viennaclstruct->tempvec; 232 ViennaCLWaitForGPU(); 233 } 234 } 235 236 ierr = VecViennaCLRestoreArrayRead(xx,&xgpu);CHKERRQ(ierr); 237 ierr = VecViennaCLRestoreArrayRead(yy,&ygpu);CHKERRQ(ierr); 238 ierr = VecViennaCLRestoreArrayWrite(zz,&zgpu);CHKERRQ(ierr); 239 240 } catch(std::exception const & ex) { 241 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what()); 242 } 243 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 244 } else { 245 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 246 } 247 PetscFunctionReturn(0); 248 } 249 250 PetscErrorCode MatAssemblyEnd_SeqAIJViennaCL(Mat A,MatAssemblyType mode) 251 { 252 PetscErrorCode ierr; 253 254 PetscFunctionBegin; 255 ierr = MatAssemblyEnd_SeqAIJ(A,mode);CHKERRQ(ierr); 256 if (!A->pinnedtocpu) { 257 ierr = MatViennaCLCopyToGPU(A);CHKERRQ(ierr); 258 } 259 PetscFunctionReturn(0); 260 } 261 262 /* --------------------------------------------------------------------------------*/ 263 /*@ 264 MatCreateSeqAIJViennaCL - Creates a sparse matrix in AIJ (compressed row) format 265 (the default parallel PETSc format). This matrix will ultimately be pushed down 266 to GPUs and use the ViennaCL library for calculations. For good matrix 267 assembly performance the user should preallocate the matrix storage by setting 268 the parameter nz (or the array nnz). By setting these parameters accurately, 269 performance during matrix assembly can be increased substantially. 270 271 272 Collective 273 274 Input Parameters: 275 + comm - MPI communicator, set to PETSC_COMM_SELF 276 . m - number of rows 277 . n - number of columns 278 . nz - number of nonzeros per row (same for all rows) 279 - nnz - array containing the number of nonzeros in the various rows 280 (possibly different for each row) or NULL 281 282 Output Parameter: 283 . A - the matrix 284 285 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 286 MatXXXXSetPreallocation() paradigm instead of this routine directly. 287 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 288 289 Notes: 290 If nnz is given then nz is ignored 291 292 The AIJ format (also called the Yale sparse matrix format or 293 compressed row storage), is fully compatible with standard Fortran 77 294 storage. That is, the stored row and column indices can begin at 295 either one (as in Fortran) or zero. See the users' manual for details. 296 297 Specify the preallocated storage with either nz or nnz (not both). 298 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 299 allocation. For large problems you MUST preallocate memory or you 300 will get TERRIBLE performance, see the users' manual chapter on matrices. 301 302 Level: intermediate 303 304 .seealso: MatCreate(), MatCreateAIJ(), MatCreateAIJCUSPARSE(), MatSetValues(), MatSeqAIJSetColumnIndices(), MatCreateSeqAIJWithArrays(), MatCreateAIJ() 305 306 @*/ 307 PetscErrorCode MatCreateSeqAIJViennaCL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 308 { 309 PetscErrorCode ierr; 310 311 PetscFunctionBegin; 312 ierr = MatCreate(comm,A);CHKERRQ(ierr); 313 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 314 ierr = MatSetType(*A,MATSEQAIJVIENNACL);CHKERRQ(ierr); 315 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,(PetscInt*)nnz);CHKERRQ(ierr); 316 PetscFunctionReturn(0); 317 } 318 319 320 PetscErrorCode MatDestroy_SeqAIJViennaCL(Mat A) 321 { 322 PetscErrorCode ierr; 323 Mat_SeqAIJViennaCL *viennaclcontainer = (Mat_SeqAIJViennaCL*)A->spptr; 324 325 PetscFunctionBegin; 326 try { 327 if (viennaclcontainer) { 328 delete viennaclcontainer->tempvec; 329 delete viennaclcontainer->mat; 330 delete viennaclcontainer->compressed_mat; 331 delete viennaclcontainer; 332 } 333 A->valid_GPU_matrix = PETSC_OFFLOAD_UNALLOCATED; 334 } catch(std::exception const & ex) { 335 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what()); 336 } 337 338 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqaij_seqaijviennacl_C",NULL);CHKERRQ(ierr); 339 340 /* this next line is because MatDestroy tries to PetscFree spptr if it is not zero, and PetscFree only works if the memory was allocated with PetscNew or PetscMalloc, which don't call the constructor */ 341 A->spptr = 0; 342 ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 343 PetscFunctionReturn(0); 344 } 345 346 347 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJViennaCL(Mat B) 348 { 349 PetscErrorCode ierr; 350 351 PetscFunctionBegin; 352 ierr = MatCreate_SeqAIJ(B);CHKERRQ(ierr); 353 ierr = MatConvert_SeqAIJ_SeqAIJViennaCL(B,MATSEQAIJVIENNACL,MAT_INPLACE_MATRIX,&B); 354 PetscFunctionReturn(0); 355 } 356 357 static PetscErrorCode MatPinToCPU_SeqAIJViennaCL(Mat,PetscBool); 358 static PetscErrorCode MatDuplicate_SeqAIJViennaCL(Mat A,MatDuplicateOption cpvalues,Mat *B) 359 { 360 PetscErrorCode ierr; 361 Mat C; 362 363 PetscFunctionBegin; 364 ierr = MatDuplicate_SeqAIJ(A,cpvalues,B);CHKERRQ(ierr); 365 C = *B; 366 367 ierr = MatPinToCPU_SeqAIJViennaCL(A,PETSC_FALSE);CHKERRQ(ierr); 368 C->ops->pintocpu = MatPinToCPU_SeqAIJViennaCL; 369 370 C->spptr = new Mat_SeqAIJViennaCL(); 371 ((Mat_SeqAIJViennaCL*)C->spptr)->tempvec = NULL; 372 ((Mat_SeqAIJViennaCL*)C->spptr)->mat = NULL; 373 ((Mat_SeqAIJViennaCL*)C->spptr)->compressed_mat = NULL; 374 375 ierr = PetscObjectChangeTypeName((PetscObject)C,MATSEQAIJVIENNACL);CHKERRQ(ierr); 376 377 C->valid_GPU_matrix = PETSC_OFFLOAD_UNALLOCATED; 378 379 /* If the source matrix is already assembled, copy the destination matrix to the GPU */ 380 if (C->assembled) { 381 ierr = MatViennaCLCopyToGPU(C);CHKERRQ(ierr); 382 } 383 384 PetscFunctionReturn(0); 385 } 386 387 static PetscErrorCode MatPinToCPU_SeqAIJViennaCL(Mat A,PetscBool flg) 388 { 389 PetscFunctionBegin; 390 A->pinnedtocpu = flg; 391 if (flg) { 392 A->ops->mult = MatMult_SeqAIJ; 393 A->ops->multadd = MatMultAdd_SeqAIJ; 394 A->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 395 A->ops->destroy = MatDestroy_SeqAIJ; 396 A->ops->duplicate = MatDuplicate_SeqAIJ; 397 } else { 398 A->ops->mult = MatMult_SeqAIJViennaCL; 399 A->ops->multadd = MatMultAdd_SeqAIJViennaCL; 400 A->ops->assemblyend = MatAssemblyEnd_SeqAIJViennaCL; 401 A->ops->destroy = MatDestroy_SeqAIJViennaCL; 402 A->ops->duplicate = MatDuplicate_SeqAIJViennaCL; 403 } 404 PetscFunctionReturn(0); 405 } 406 407 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJViennaCL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 408 { 409 PetscErrorCode ierr; 410 Mat B; 411 Mat_SeqAIJ *aij; 412 413 PetscFunctionBegin; 414 415 if (reuse == MAT_REUSE_MATRIX) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"MAT_REUSE_MATRIX is not supported. Consider using MAT_INPLACE_MATRIX instead"); 416 417 if (reuse == MAT_INITIAL_MATRIX) { 418 ierr = MatDuplicate(A,MAT_COPY_VALUES,newmat);CHKERRQ(ierr); 419 } 420 421 B = *newmat; 422 423 aij = (Mat_SeqAIJ*)B->data; 424 aij->inode.use = PETSC_FALSE; 425 426 B->spptr = new Mat_SeqAIJViennaCL(); 427 428 ((Mat_SeqAIJViennaCL*)B->spptr)->tempvec = NULL; 429 ((Mat_SeqAIJViennaCL*)B->spptr)->mat = NULL; 430 ((Mat_SeqAIJViennaCL*)B->spptr)->compressed_mat = NULL; 431 432 ierr = MatPinToCPU_SeqAIJViennaCL(A,PETSC_FALSE);CHKERRQ(ierr); 433 A->ops->pintocpu = MatPinToCPU_SeqAIJViennaCL; 434 435 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJVIENNACL);CHKERRQ(ierr); 436 ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr); 437 ierr = PetscStrallocpy(VECVIENNACL,&B->defaultvectype);CHKERRQ(ierr); 438 439 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqaij_seqaijviennacl_C",MatConvert_SeqAIJ_SeqAIJViennaCL);CHKERRQ(ierr); 440 441 B->valid_GPU_matrix = PETSC_OFFLOAD_UNALLOCATED; 442 443 /* If the source matrix is already assembled, copy the destination matrix to the GPU */ 444 if (B->assembled) { 445 ierr = MatViennaCLCopyToGPU(B);CHKERRQ(ierr); 446 } 447 448 PetscFunctionReturn(0); 449 } 450 451 452 /*MC 453 MATSEQAIJVIENNACL - MATAIJVIENNACL = "aijviennacl" = "seqaijviennacl" - A matrix type to be used for sparse matrices. 454 455 A matrix type type whose data resides on GPUs. These matrices are in CSR format by 456 default. All matrix calculations are performed using the ViennaCL library. 457 458 Options Database Keys: 459 + -mat_type aijviennacl - sets the matrix type to "seqaijviennacl" during a call to MatSetFromOptions() 460 . -mat_viennacl_storage_format csr - sets the storage format of matrices for MatMult during a call to MatSetFromOptions(). 461 - -mat_viennacl_mult_storage_format csr - sets the storage format of matrices for MatMult during a call to MatSetFromOptions(). 462 463 Level: beginner 464 465 .seealso: MatCreateSeqAIJViennaCL(), MATAIJVIENNACL, MatCreateAIJViennaCL() 466 M*/ 467 468 469 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ViennaCL(void) 470 { 471 PetscErrorCode ierr; 472 473 PetscFunctionBegin; 474 ierr = MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_LU,MatGetFactor_seqaij_petsc);CHKERRQ(ierr); 475 ierr = MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_CHOLESKY,MatGetFactor_seqaij_petsc);CHKERRQ(ierr); 476 ierr = MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_ILU,MatGetFactor_seqaij_petsc);CHKERRQ(ierr); 477 ierr = MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_ICC,MatGetFactor_seqaij_petsc);CHKERRQ(ierr); 478 PetscFunctionReturn(0); 479 } 480