1 2 3 /* 4 Defines the basic matrix operations for the AIJ (compressed row) 5 matrix storage format. 6 */ 7 8 #include <petscconf.h> 9 #include <../src/mat/impls/aij/seq/aij.h> /*I "petscmat.h" I*/ 10 #include <petscbt.h> 11 #include <../src/vec/vec/impls/dvecimpl.h> 12 #include <petsc/private/vecimpl.h> 13 14 #include <../src/mat/impls/aij/seq/seqviennacl/viennaclmatimpl.h> 15 16 17 #include <algorithm> 18 #include <vector> 19 #include <string> 20 21 #include "viennacl/linalg/prod.hpp" 22 23 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJViennaCL(Mat A, MatType type, MatReuse reuse, Mat *newmat); 24 PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat,MatFactorType,Mat*); 25 26 27 PetscErrorCode MatViennaCLCopyToGPU(Mat A) 28 { 29 30 Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr; 31 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 32 PetscErrorCode ierr; 33 34 PetscFunctionBegin; 35 if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) { //some OpenCL SDKs have issues with buffers of size 0 36 if (A->valid_GPU_matrix == PETSC_OFFLOAD_UNALLOCATED || A->valid_GPU_matrix == PETSC_OFFLOAD_CPU) { 37 ierr = PetscLogEventBegin(MAT_ViennaCLCopyToGPU,A,0,0,0);CHKERRQ(ierr); 38 39 try { 40 if (a->compressedrow.use) { 41 if (!viennaclstruct->compressed_mat) viennaclstruct->compressed_mat = new ViennaCLCompressedAIJMatrix(); 42 43 // Since PetscInt is different from cl_uint, we have to convert: 44 viennacl::backend::mem_handle dummy; 45 46 viennacl::backend::typesafe_host_array<unsigned int> row_buffer; row_buffer.raw_resize(dummy, a->compressedrow.nrows+1); 47 for (PetscInt i=0; i<=a->compressedrow.nrows; ++i) 48 row_buffer.set(i, (a->compressedrow.i)[i]); 49 50 viennacl::backend::typesafe_host_array<unsigned int> row_indices; row_indices.raw_resize(dummy, a->compressedrow.nrows); 51 for (PetscInt i=0; i<a->compressedrow.nrows; ++i) 52 row_indices.set(i, (a->compressedrow.rindex)[i]); 53 54 viennacl::backend::typesafe_host_array<unsigned int> col_buffer; col_buffer.raw_resize(dummy, a->nz); 55 for (PetscInt i=0; i<a->nz; ++i) 56 col_buffer.set(i, (a->j)[i]); 57 58 viennaclstruct->compressed_mat->set(row_buffer.get(), row_indices.get(), col_buffer.get(), a->a, A->rmap->n, A->cmap->n, a->compressedrow.nrows, a->nz); 59 ierr = PetscLogCpuToGpu(((2*a->compressedrow.nrows)+1+a->nz)*sizeof(PetscInt) + (a->nz)*sizeof(PetscScalar));CHKERRQ(ierr); 60 } else { 61 if (!viennaclstruct->mat) viennaclstruct->mat = new ViennaCLAIJMatrix(); 62 63 // Since PetscInt is in general different from cl_uint, we have to convert: 64 viennacl::backend::mem_handle dummy; 65 66 viennacl::backend::typesafe_host_array<unsigned int> row_buffer; row_buffer.raw_resize(dummy, A->rmap->n+1); 67 for (PetscInt i=0; i<=A->rmap->n; ++i) 68 row_buffer.set(i, (a->i)[i]); 69 70 viennacl::backend::typesafe_host_array<unsigned int> col_buffer; col_buffer.raw_resize(dummy, a->nz); 71 for (PetscInt i=0; i<a->nz; ++i) 72 col_buffer.set(i, (a->j)[i]); 73 74 viennaclstruct->mat->set(row_buffer.get(), col_buffer.get(), a->a, A->rmap->n, A->cmap->n, a->nz); 75 ierr = PetscLogCpuToGpu(((A->rmap->n+1)+a->nz)*sizeof(PetscInt)+(a->nz)*sizeof(PetscScalar));CHKERRQ(ierr); 76 } 77 ViennaCLWaitForGPU(); 78 } catch(std::exception const & ex) { 79 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what()); 80 } 81 82 // Create temporary vector for v += A*x: 83 if (viennaclstruct->tempvec) { 84 if (viennaclstruct->tempvec->size() != static_cast<std::size_t>(A->rmap->n)) { 85 delete (ViennaCLVector*)viennaclstruct->tempvec; 86 viennaclstruct->tempvec = new ViennaCLVector(A->rmap->n); 87 } else { 88 viennaclstruct->tempvec->clear(); 89 } 90 } else { 91 viennaclstruct->tempvec = new ViennaCLVector(A->rmap->n); 92 } 93 94 A->valid_GPU_matrix = PETSC_OFFLOAD_BOTH; 95 96 ierr = PetscLogEventEnd(MAT_ViennaCLCopyToGPU,A,0,0,0);CHKERRQ(ierr); 97 } 98 } 99 PetscFunctionReturn(0); 100 } 101 102 PetscErrorCode MatViennaCLCopyFromGPU(Mat A, const ViennaCLAIJMatrix *Agpu) 103 { 104 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 105 PetscInt m = A->rmap->n; 106 PetscErrorCode ierr; 107 108 109 PetscFunctionBegin; 110 if (A->valid_GPU_matrix == PETSC_OFFLOAD_UNALLOCATED) { 111 try { 112 if (a->compressedrow.use) SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_ARG_WRONG, "ViennaCL: Cannot handle row compression for GPU matrices"); 113 else { 114 115 if ((PetscInt)Agpu->size1() != m) SETERRQ2(PETSC_COMM_WORLD, PETSC_ERR_ARG_SIZ, "GPU matrix has %d rows, should be %d", Agpu->size1(), m); 116 a->nz = Agpu->nnz(); 117 a->maxnz = a->nz; /* Since we allocate exactly the right amount */ 118 A->preallocated = PETSC_TRUE; 119 if (a->singlemalloc) { 120 if (a->a) {ierr = PetscFree3(a->a,a->j,a->i);CHKERRQ(ierr);} 121 } else { 122 if (a->i) {ierr = PetscFree(a->i);CHKERRQ(ierr);} 123 if (a->j) {ierr = PetscFree(a->j);CHKERRQ(ierr);} 124 if (a->a) {ierr = PetscFree(a->a);CHKERRQ(ierr);} 125 } 126 ierr = PetscMalloc3(a->nz,&a->a,a->nz,&a->j,m+1,&a->i);CHKERRQ(ierr); 127 ierr = PetscLogObjectMemory((PetscObject)A, a->nz*(sizeof(PetscScalar)+sizeof(PetscInt))+(m+1)*sizeof(PetscInt));CHKERRQ(ierr); 128 129 a->singlemalloc = PETSC_TRUE; 130 131 /* Setup row lengths */ 132 ierr = PetscFree(a->imax);CHKERRQ(ierr); 133 ierr = PetscFree(a->ilen);CHKERRQ(ierr); 134 ierr = PetscMalloc1(m,&a->imax);CHKERRQ(ierr); 135 ierr = PetscMalloc1(m,&a->ilen);CHKERRQ(ierr); 136 ierr = PetscLogObjectMemory((PetscObject)A, 2*m*sizeof(PetscInt));CHKERRQ(ierr); 137 138 /* Copy data back from GPU */ 139 viennacl::backend::typesafe_host_array<unsigned int> row_buffer; row_buffer.raw_resize(Agpu->handle1(), Agpu->size1() + 1); 140 141 // copy row array 142 viennacl::backend::memory_read(Agpu->handle1(), 0, row_buffer.raw_size(), row_buffer.get()); 143 (a->i)[0] = row_buffer[0]; 144 for (PetscInt i = 0; i < (PetscInt)Agpu->size1(); ++i) { 145 (a->i)[i+1] = row_buffer[i+1]; 146 a->imax[i] = a->ilen[i] = a->i[i+1] - a->i[i]; //Set imax[] and ilen[] arrays at the same time as i[] for better cache reuse 147 } 148 149 // copy column indices 150 viennacl::backend::typesafe_host_array<unsigned int> col_buffer; col_buffer.raw_resize(Agpu->handle2(), Agpu->nnz()); 151 viennacl::backend::memory_read(Agpu->handle2(), 0, col_buffer.raw_size(), col_buffer.get()); 152 for (PetscInt i=0; i < (PetscInt)Agpu->nnz(); ++i) 153 (a->j)[i] = col_buffer[i]; 154 155 // copy nonzero entries directly to destination (no conversion required) 156 viennacl::backend::memory_read(Agpu->handle(), 0, sizeof(PetscScalar)*Agpu->nnz(), a->a); 157 158 ierr = PetscLogGpuToCpu(row_buffer.raw_size()+col_buffer.raw_size()+(Agpu->nnz()*sizeof(PetscScalar)));CHKERRQ(ierr); 159 ViennaCLWaitForGPU(); 160 /* TODO: Once a->diag is moved out of MatAssemblyEnd(), invalidate it here. */ 161 } 162 } catch(std::exception const & ex) { 163 SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what()); 164 } 165 166 /* This assembly prevents resetting the flag to PETSC_OFFLOAD_CPU and recopying */ 167 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 168 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 169 170 A->valid_GPU_matrix = PETSC_OFFLOAD_BOTH; 171 } else SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_ARG_WRONG, "ViennaCL error: Only valid for unallocated GPU matrices"); 172 PetscFunctionReturn(0); 173 } 174 175 PetscErrorCode MatMult_SeqAIJViennaCL(Mat A,Vec xx,Vec yy) 176 { 177 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 178 PetscErrorCode ierr; 179 Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr; 180 const ViennaCLVector *xgpu=NULL; 181 ViennaCLVector *ygpu=NULL; 182 183 PetscFunctionBegin; 184 if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) { 185 ierr = VecViennaCLGetArrayRead(xx,&xgpu);CHKERRQ(ierr); 186 ierr = VecViennaCLGetArrayWrite(yy,&ygpu);CHKERRQ(ierr); 187 try { 188 if (a->compressedrow.use) { 189 *ygpu = viennacl::linalg::prod(*viennaclstruct->compressed_mat, *xgpu); 190 } else { 191 *ygpu = viennacl::linalg::prod(*viennaclstruct->mat,*xgpu); 192 } 193 ViennaCLWaitForGPU(); 194 } catch (std::exception const & ex) { 195 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what()); 196 } 197 ierr = VecViennaCLRestoreArrayRead(xx,&xgpu);CHKERRQ(ierr); 198 ierr = VecViennaCLRestoreArrayWrite(yy,&ygpu);CHKERRQ(ierr); 199 ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 200 } else { 201 ierr = VecSet(yy,0);CHKERRQ(ierr); 202 } 203 PetscFunctionReturn(0); 204 } 205 206 PetscErrorCode MatMultAdd_SeqAIJViennaCL(Mat A,Vec xx,Vec yy,Vec zz) 207 { 208 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 209 PetscErrorCode ierr; 210 Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr; 211 const ViennaCLVector *xgpu=NULL,*ygpu=NULL; 212 ViennaCLVector *zgpu=NULL; 213 214 PetscFunctionBegin; 215 if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) { 216 try { 217 ierr = VecViennaCLGetArrayRead(xx,&xgpu);CHKERRQ(ierr); 218 ierr = VecViennaCLGetArrayRead(yy,&ygpu);CHKERRQ(ierr); 219 ierr = VecViennaCLGetArrayWrite(zz,&zgpu);CHKERRQ(ierr); 220 221 if (a->compressedrow.use) { 222 ViennaCLVector temp = viennacl::linalg::prod(*viennaclstruct->compressed_mat, *xgpu); 223 *zgpu = *ygpu + temp; 224 ViennaCLWaitForGPU(); 225 } else { 226 if (zz == xx || zz == yy) { //temporary required 227 ViennaCLVector temp = viennacl::linalg::prod(*viennaclstruct->mat, *xgpu); 228 *zgpu = *ygpu; 229 *zgpu += temp; 230 ViennaCLWaitForGPU(); 231 } else { 232 *viennaclstruct->tempvec = viennacl::linalg::prod(*viennaclstruct->mat, *xgpu); 233 *zgpu = *ygpu + *viennaclstruct->tempvec; 234 ViennaCLWaitForGPU(); 235 } 236 } 237 238 ierr = VecViennaCLRestoreArrayRead(xx,&xgpu);CHKERRQ(ierr); 239 ierr = VecViennaCLRestoreArrayRead(yy,&ygpu);CHKERRQ(ierr); 240 ierr = VecViennaCLRestoreArrayWrite(zz,&zgpu);CHKERRQ(ierr); 241 242 } catch(std::exception const & ex) { 243 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what()); 244 } 245 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 246 } else { 247 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 248 } 249 PetscFunctionReturn(0); 250 } 251 252 PetscErrorCode MatAssemblyEnd_SeqAIJViennaCL(Mat A,MatAssemblyType mode) 253 { 254 PetscErrorCode ierr; 255 256 PetscFunctionBegin; 257 ierr = MatAssemblyEnd_SeqAIJ(A,mode);CHKERRQ(ierr); 258 if (!A->pinnedtocpu) { 259 ierr = MatViennaCLCopyToGPU(A);CHKERRQ(ierr); 260 } 261 PetscFunctionReturn(0); 262 } 263 264 /* --------------------------------------------------------------------------------*/ 265 /*@ 266 MatCreateSeqAIJViennaCL - Creates a sparse matrix in AIJ (compressed row) format 267 (the default parallel PETSc format). This matrix will ultimately be pushed down 268 to GPUs and use the ViennaCL library for calculations. For good matrix 269 assembly performance the user should preallocate the matrix storage by setting 270 the parameter nz (or the array nnz). By setting these parameters accurately, 271 performance during matrix assembly can be increased substantially. 272 273 274 Collective 275 276 Input Parameters: 277 + comm - MPI communicator, set to PETSC_COMM_SELF 278 . m - number of rows 279 . n - number of columns 280 . nz - number of nonzeros per row (same for all rows) 281 - nnz - array containing the number of nonzeros in the various rows 282 (possibly different for each row) or NULL 283 284 Output Parameter: 285 . A - the matrix 286 287 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 288 MatXXXXSetPreallocation() paradigm instead of this routine directly. 289 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 290 291 Notes: 292 If nnz is given then nz is ignored 293 294 The AIJ format (also called the Yale sparse matrix format or 295 compressed row storage), is fully compatible with standard Fortran 77 296 storage. That is, the stored row and column indices can begin at 297 either one (as in Fortran) or zero. See the users' manual for details. 298 299 Specify the preallocated storage with either nz or nnz (not both). 300 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 301 allocation. For large problems you MUST preallocate memory or you 302 will get TERRIBLE performance, see the users' manual chapter on matrices. 303 304 Level: intermediate 305 306 .seealso: MatCreate(), MatCreateAIJ(), MatCreateAIJCUSPARSE(), MatSetValues(), MatSeqAIJSetColumnIndices(), MatCreateSeqAIJWithArrays(), MatCreateAIJ() 307 308 @*/ 309 PetscErrorCode MatCreateSeqAIJViennaCL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 310 { 311 PetscErrorCode ierr; 312 313 PetscFunctionBegin; 314 ierr = MatCreate(comm,A);CHKERRQ(ierr); 315 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 316 ierr = MatSetType(*A,MATSEQAIJVIENNACL);CHKERRQ(ierr); 317 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,(PetscInt*)nnz);CHKERRQ(ierr); 318 PetscFunctionReturn(0); 319 } 320 321 322 PetscErrorCode MatDestroy_SeqAIJViennaCL(Mat A) 323 { 324 PetscErrorCode ierr; 325 Mat_SeqAIJViennaCL *viennaclcontainer = (Mat_SeqAIJViennaCL*)A->spptr; 326 327 PetscFunctionBegin; 328 try { 329 if (viennaclcontainer) { 330 delete viennaclcontainer->tempvec; 331 delete viennaclcontainer->mat; 332 delete viennaclcontainer->compressed_mat; 333 delete viennaclcontainer; 334 } 335 A->valid_GPU_matrix = PETSC_OFFLOAD_UNALLOCATED; 336 } catch(std::exception const & ex) { 337 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what()); 338 } 339 340 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqaij_seqaijviennacl_C",NULL);CHKERRQ(ierr); 341 342 /* this next line is because MatDestroy tries to PetscFree spptr if it is not zero, and PetscFree only works if the memory was allocated with PetscNew or PetscMalloc, which don't call the constructor */ 343 A->spptr = 0; 344 ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 345 PetscFunctionReturn(0); 346 } 347 348 349 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJViennaCL(Mat B) 350 { 351 PetscErrorCode ierr; 352 353 PetscFunctionBegin; 354 ierr = MatCreate_SeqAIJ(B);CHKERRQ(ierr); 355 ierr = MatConvert_SeqAIJ_SeqAIJViennaCL(B,MATSEQAIJVIENNACL,MAT_INPLACE_MATRIX,&B); 356 PetscFunctionReturn(0); 357 } 358 359 static PetscErrorCode MatPinToCPU_SeqAIJViennaCL(Mat,PetscBool); 360 static PetscErrorCode MatDuplicate_SeqAIJViennaCL(Mat A,MatDuplicateOption cpvalues,Mat *B) 361 { 362 PetscErrorCode ierr; 363 Mat C; 364 365 PetscFunctionBegin; 366 ierr = MatDuplicate_SeqAIJ(A,cpvalues,B);CHKERRQ(ierr); 367 C = *B; 368 369 ierr = MatPinToCPU_SeqAIJViennaCL(A,PETSC_FALSE);CHKERRQ(ierr); 370 C->ops->pintocpu = MatPinToCPU_SeqAIJViennaCL; 371 372 C->spptr = new Mat_SeqAIJViennaCL(); 373 ((Mat_SeqAIJViennaCL*)C->spptr)->tempvec = NULL; 374 ((Mat_SeqAIJViennaCL*)C->spptr)->mat = NULL; 375 ((Mat_SeqAIJViennaCL*)C->spptr)->compressed_mat = NULL; 376 377 ierr = PetscObjectChangeTypeName((PetscObject)C,MATSEQAIJVIENNACL);CHKERRQ(ierr); 378 379 C->valid_GPU_matrix = PETSC_OFFLOAD_UNALLOCATED; 380 381 /* If the source matrix is already assembled, copy the destination matrix to the GPU */ 382 if (C->assembled) { 383 ierr = MatViennaCLCopyToGPU(C);CHKERRQ(ierr); 384 } 385 386 PetscFunctionReturn(0); 387 } 388 389 static PetscErrorCode MatPinToCPU_SeqAIJViennaCL(Mat A,PetscBool flg) 390 { 391 PetscFunctionBegin; 392 A->pinnedtocpu = flg; 393 if (flg) { 394 A->ops->mult = MatMult_SeqAIJ; 395 A->ops->multadd = MatMultAdd_SeqAIJ; 396 A->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 397 A->ops->duplicate = MatDuplicate_SeqAIJ; 398 } else { 399 A->ops->mult = MatMult_SeqAIJViennaCL; 400 A->ops->multadd = MatMultAdd_SeqAIJViennaCL; 401 A->ops->assemblyend = MatAssemblyEnd_SeqAIJViennaCL; 402 A->ops->destroy = MatDestroy_SeqAIJViennaCL; 403 A->ops->duplicate = MatDuplicate_SeqAIJViennaCL; 404 } 405 PetscFunctionReturn(0); 406 } 407 408 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJViennaCL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 409 { 410 PetscErrorCode ierr; 411 Mat B; 412 Mat_SeqAIJ *aij; 413 414 PetscFunctionBegin; 415 416 if (reuse == MAT_REUSE_MATRIX) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"MAT_REUSE_MATRIX is not supported. Consider using MAT_INPLACE_MATRIX instead"); 417 418 if (reuse == MAT_INITIAL_MATRIX) { 419 ierr = MatDuplicate(A,MAT_COPY_VALUES,newmat);CHKERRQ(ierr); 420 } 421 422 B = *newmat; 423 424 aij = (Mat_SeqAIJ*)B->data; 425 aij->inode.use = PETSC_FALSE; 426 427 B->spptr = new Mat_SeqAIJViennaCL(); 428 429 ((Mat_SeqAIJViennaCL*)B->spptr)->tempvec = NULL; 430 ((Mat_SeqAIJViennaCL*)B->spptr)->mat = NULL; 431 ((Mat_SeqAIJViennaCL*)B->spptr)->compressed_mat = NULL; 432 433 ierr = MatPinToCPU_SeqAIJViennaCL(A,PETSC_FALSE);CHKERRQ(ierr); 434 A->ops->pintocpu = MatPinToCPU_SeqAIJViennaCL; 435 436 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJVIENNACL);CHKERRQ(ierr); 437 ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr); 438 ierr = PetscStrallocpy(VECVIENNACL,&B->defaultvectype);CHKERRQ(ierr); 439 440 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqaij_seqaijviennacl_C",MatConvert_SeqAIJ_SeqAIJViennaCL);CHKERRQ(ierr); 441 442 B->valid_GPU_matrix = PETSC_OFFLOAD_UNALLOCATED; 443 444 /* If the source matrix is already assembled, copy the destination matrix to the GPU */ 445 if (B->assembled) { 446 ierr = MatViennaCLCopyToGPU(B);CHKERRQ(ierr); 447 } 448 449 PetscFunctionReturn(0); 450 } 451 452 453 /*MC 454 MATSEQAIJVIENNACL - MATAIJVIENNACL = "aijviennacl" = "seqaijviennacl" - A matrix type to be used for sparse matrices. 455 456 A matrix type type whose data resides on GPUs. These matrices are in CSR format by 457 default. All matrix calculations are performed using the ViennaCL library. 458 459 Options Database Keys: 460 + -mat_type aijviennacl - sets the matrix type to "seqaijviennacl" during a call to MatSetFromOptions() 461 . -mat_viennacl_storage_format csr - sets the storage format of matrices for MatMult during a call to MatSetFromOptions(). 462 - -mat_viennacl_mult_storage_format csr - sets the storage format of matrices for MatMult during a call to MatSetFromOptions(). 463 464 Level: beginner 465 466 .seealso: MatCreateSeqAIJViennaCL(), MATAIJVIENNACL, MatCreateAIJViennaCL() 467 M*/ 468 469 470 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ViennaCL(void) 471 { 472 PetscErrorCode ierr; 473 474 PetscFunctionBegin; 475 ierr = MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_LU,MatGetFactor_seqaij_petsc);CHKERRQ(ierr); 476 ierr = MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_CHOLESKY,MatGetFactor_seqaij_petsc);CHKERRQ(ierr); 477 ierr = MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_ILU,MatGetFactor_seqaij_petsc);CHKERRQ(ierr); 478 ierr = MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_ICC,MatGetFactor_seqaij_petsc);CHKERRQ(ierr); 479 PetscFunctionReturn(0); 480 } 481