1 2 3 /* 4 Defines the basic matrix operations for the AIJ (compressed row) 5 matrix storage format. 6 */ 7 8 #include <petscconf.h> 9 #include <../src/mat/impls/aij/seq/aij.h> /*I "petscmat.h" I*/ 10 #include <petscbt.h> 11 #include <../src/vec/vec/impls/dvecimpl.h> 12 #include <petsc/private/vecimpl.h> 13 14 #include <../src/mat/impls/aij/seq/seqviennacl/viennaclmatimpl.h> 15 16 17 #include <algorithm> 18 #include <vector> 19 #include <string> 20 21 #include "viennacl/linalg/prod.hpp" 22 23 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJViennaCL(Mat A, MatType type, MatReuse reuse, Mat *newmat); 24 PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat,MatFactorType,Mat*); 25 26 27 PetscErrorCode MatViennaCLCopyToGPU(Mat A) 28 { 29 30 Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr; 31 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 32 PetscErrorCode ierr; 33 34 PetscFunctionBegin; 35 if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) { //some OpenCL SDKs have issues with buffers of size 0 36 if (A->valid_GPU_matrix == PETSC_OFFLOAD_UNALLOCATED || A->valid_GPU_matrix == PETSC_OFFLOAD_CPU) { 37 ierr = PetscLogEventBegin(MAT_ViennaCLCopyToGPU,A,0,0,0);CHKERRQ(ierr); 38 39 try { 40 if (a->compressedrow.use) { 41 if (!viennaclstruct->compressed_mat) viennaclstruct->compressed_mat = new ViennaCLCompressedAIJMatrix(); 42 43 // Since PetscInt is different from cl_uint, we have to convert: 44 viennacl::backend::mem_handle dummy; 45 46 viennacl::backend::typesafe_host_array<unsigned int> row_buffer; row_buffer.raw_resize(dummy, a->compressedrow.nrows+1); 47 for (PetscInt i=0; i<=a->compressedrow.nrows; ++i) 48 row_buffer.set(i, (a->compressedrow.i)[i]); 49 50 viennacl::backend::typesafe_host_array<unsigned int> row_indices; row_indices.raw_resize(dummy, a->compressedrow.nrows); 51 for (PetscInt i=0; i<a->compressedrow.nrows; ++i) 52 row_indices.set(i, (a->compressedrow.rindex)[i]); 53 54 viennacl::backend::typesafe_host_array<unsigned int> col_buffer; col_buffer.raw_resize(dummy, a->nz); 55 for (PetscInt i=0; i<a->nz; ++i) 56 col_buffer.set(i, (a->j)[i]); 57 58 viennaclstruct->compressed_mat->set(row_buffer.get(), row_indices.get(), col_buffer.get(), a->a, A->rmap->n, A->cmap->n, a->compressedrow.nrows, a->nz); 59 ierr = PetscLogCpuToGpu(((2*a->compressedrow.nrows)+1+a->nz)*sizeof(PetscInt) + (a->nz)*sizeof(PetscScalar));CHKERRQ(ierr); 60 } else { 61 if (!viennaclstruct->mat) viennaclstruct->mat = new ViennaCLAIJMatrix(); 62 63 // Since PetscInt is in general different from cl_uint, we have to convert: 64 viennacl::backend::mem_handle dummy; 65 66 viennacl::backend::typesafe_host_array<unsigned int> row_buffer; row_buffer.raw_resize(dummy, A->rmap->n+1); 67 for (PetscInt i=0; i<=A->rmap->n; ++i) 68 row_buffer.set(i, (a->i)[i]); 69 70 viennacl::backend::typesafe_host_array<unsigned int> col_buffer; col_buffer.raw_resize(dummy, a->nz); 71 for (PetscInt i=0; i<a->nz; ++i) 72 col_buffer.set(i, (a->j)[i]); 73 74 viennaclstruct->mat->set(row_buffer.get(), col_buffer.get(), a->a, A->rmap->n, A->cmap->n, a->nz); 75 ierr = PetscLogCpuToGpu(((A->rmap->n+1)+a->nz)*sizeof(PetscInt)+(a->nz)*sizeof(PetscScalar));CHKERRQ(ierr); 76 } 77 ViennaCLWaitForGPU(); 78 } catch(std::exception const & ex) { 79 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what()); 80 } 81 82 // Create temporary vector for v += A*x: 83 if (viennaclstruct->tempvec) { 84 if (viennaclstruct->tempvec->size() != static_cast<std::size_t>(A->rmap->n)) { 85 delete (ViennaCLVector*)viennaclstruct->tempvec; 86 viennaclstruct->tempvec = new ViennaCLVector(A->rmap->n); 87 } else { 88 viennaclstruct->tempvec->clear(); 89 } 90 } else { 91 viennaclstruct->tempvec = new ViennaCLVector(A->rmap->n); 92 } 93 94 A->valid_GPU_matrix = PETSC_OFFLOAD_BOTH; 95 96 ierr = PetscLogEventEnd(MAT_ViennaCLCopyToGPU,A,0,0,0);CHKERRQ(ierr); 97 } 98 } 99 PetscFunctionReturn(0); 100 } 101 102 PetscErrorCode MatViennaCLCopyFromGPU(Mat A, const ViennaCLAIJMatrix *Agpu) 103 { 104 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 105 PetscInt m = A->rmap->n; 106 PetscErrorCode ierr; 107 108 109 PetscFunctionBegin; 110 if (A->valid_GPU_matrix == PETSC_OFFLOAD_UNALLOCATED) { 111 try { 112 if (a->compressedrow.use) SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_ARG_WRONG, "ViennaCL: Cannot handle row compression for GPU matrices"); 113 else { 114 115 if ((PetscInt)Agpu->size1() != m) SETERRQ2(PETSC_COMM_WORLD, PETSC_ERR_ARG_SIZ, "GPU matrix has %d rows, should be %d", Agpu->size1(), m); 116 a->nz = Agpu->nnz(); 117 a->maxnz = a->nz; /* Since we allocate exactly the right amount */ 118 A->preallocated = PETSC_TRUE; 119 if (a->singlemalloc) { 120 if (a->a) {ierr = PetscFree3(a->a,a->j,a->i);CHKERRQ(ierr);} 121 } else { 122 if (a->i) {ierr = PetscFree(a->i);CHKERRQ(ierr);} 123 if (a->j) {ierr = PetscFree(a->j);CHKERRQ(ierr);} 124 if (a->a) {ierr = PetscFree(a->a);CHKERRQ(ierr);} 125 } 126 ierr = PetscMalloc3(a->nz,&a->a,a->nz,&a->j,m+1,&a->i);CHKERRQ(ierr); 127 ierr = PetscLogObjectMemory((PetscObject)A, a->nz*(sizeof(PetscScalar)+sizeof(PetscInt))+(m+1)*sizeof(PetscInt));CHKERRQ(ierr); 128 129 a->singlemalloc = PETSC_TRUE; 130 131 /* Setup row lengths */ 132 ierr = PetscFree(a->imax);CHKERRQ(ierr); 133 ierr = PetscFree(a->ilen);CHKERRQ(ierr); 134 ierr = PetscMalloc1(m,&a->imax);CHKERRQ(ierr); 135 ierr = PetscMalloc1(m,&a->ilen);CHKERRQ(ierr); 136 ierr = PetscLogObjectMemory((PetscObject)A, 2*m*sizeof(PetscInt));CHKERRQ(ierr); 137 138 /* Copy data back from GPU */ 139 viennacl::backend::typesafe_host_array<unsigned int> row_buffer; row_buffer.raw_resize(Agpu->handle1(), Agpu->size1() + 1); 140 141 // copy row array 142 viennacl::backend::memory_read(Agpu->handle1(), 0, row_buffer.raw_size(), row_buffer.get()); 143 (a->i)[0] = row_buffer[0]; 144 for (PetscInt i = 0; i < (PetscInt)Agpu->size1(); ++i) { 145 (a->i)[i+1] = row_buffer[i+1]; 146 a->imax[i] = a->ilen[i] = a->i[i+1] - a->i[i]; //Set imax[] and ilen[] arrays at the same time as i[] for better cache reuse 147 } 148 149 // copy column indices 150 viennacl::backend::typesafe_host_array<unsigned int> col_buffer; col_buffer.raw_resize(Agpu->handle2(), Agpu->nnz()); 151 viennacl::backend::memory_read(Agpu->handle2(), 0, col_buffer.raw_size(), col_buffer.get()); 152 for (PetscInt i=0; i < (PetscInt)Agpu->nnz(); ++i) 153 (a->j)[i] = col_buffer[i]; 154 155 // copy nonzero entries directly to destination (no conversion required) 156 viennacl::backend::memory_read(Agpu->handle(), 0, sizeof(PetscScalar)*Agpu->nnz(), a->a); 157 158 ierr = PetscLogGpuToCpu(row_buffer.raw_size()+col_buffer.raw_size()+(Agpu->nnz()*sizeof(PetscScalar)));CHKERRQ(ierr); 159 ViennaCLWaitForGPU(); 160 /* TODO: Once a->diag is moved out of MatAssemblyEnd(), invalidate it here. */ 161 } 162 } catch(std::exception const & ex) { 163 SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what()); 164 } 165 166 /* This assembly prevents resetting the flag to PETSC_OFFLOAD_CPU and recopying */ 167 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 168 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 169 170 A->valid_GPU_matrix = PETSC_OFFLOAD_BOTH; 171 } else SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_ARG_WRONG, "ViennaCL error: Only valid for unallocated GPU matrices"); 172 PetscFunctionReturn(0); 173 } 174 175 PetscErrorCode MatMult_SeqAIJViennaCL(Mat A,Vec xx,Vec yy) 176 { 177 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 178 PetscErrorCode ierr; 179 Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr; 180 const ViennaCLVector *xgpu=NULL; 181 ViennaCLVector *ygpu=NULL; 182 183 PetscFunctionBegin; 184 if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) { 185 ierr = VecViennaCLGetArrayRead(xx,&xgpu);CHKERRQ(ierr); 186 ierr = VecViennaCLGetArrayWrite(yy,&ygpu);CHKERRQ(ierr); 187 ierr = PetscLogGpuTimeBegin();CHKERRQ(ierr); 188 try { 189 if (a->compressedrow.use) { 190 *ygpu = viennacl::linalg::prod(*viennaclstruct->compressed_mat, *xgpu); 191 } else { 192 *ygpu = viennacl::linalg::prod(*viennaclstruct->mat,*xgpu); 193 } 194 ViennaCLWaitForGPU(); 195 } catch (std::exception const & ex) { 196 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what()); 197 } 198 ierr = PetscLogGpuTimeEnd();CHKERRQ(ierr); 199 ierr = VecViennaCLRestoreArrayRead(xx,&xgpu);CHKERRQ(ierr); 200 ierr = VecViennaCLRestoreArrayWrite(yy,&ygpu);CHKERRQ(ierr); 201 ierr = PetscLogGpuFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 202 } else { 203 ierr = VecSet(yy,0);CHKERRQ(ierr); 204 } 205 PetscFunctionReturn(0); 206 } 207 208 PetscErrorCode MatMultAdd_SeqAIJViennaCL(Mat A,Vec xx,Vec yy,Vec zz) 209 { 210 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 211 PetscErrorCode ierr; 212 Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr; 213 const ViennaCLVector *xgpu=NULL,*ygpu=NULL; 214 ViennaCLVector *zgpu=NULL; 215 216 PetscFunctionBegin; 217 if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) { 218 try { 219 ierr = VecViennaCLGetArrayRead(xx,&xgpu);CHKERRQ(ierr); 220 ierr = VecViennaCLGetArrayRead(yy,&ygpu);CHKERRQ(ierr); 221 ierr = VecViennaCLGetArrayWrite(zz,&zgpu);CHKERRQ(ierr); 222 ierr = PetscLogGpuTimeBegin();CHKERRQ(ierr); 223 if (a->compressedrow.use) { 224 ViennaCLVector temp = viennacl::linalg::prod(*viennaclstruct->compressed_mat, *xgpu); 225 *zgpu = *ygpu + temp; 226 ViennaCLWaitForGPU(); 227 } else { 228 if (zz == xx || zz == yy) { //temporary required 229 ViennaCLVector temp = viennacl::linalg::prod(*viennaclstruct->mat, *xgpu); 230 *zgpu = *ygpu; 231 *zgpu += temp; 232 ViennaCLWaitForGPU(); 233 } else { 234 *viennaclstruct->tempvec = viennacl::linalg::prod(*viennaclstruct->mat, *xgpu); 235 *zgpu = *ygpu + *viennaclstruct->tempvec; 236 ViennaCLWaitForGPU(); 237 } 238 } 239 ierr = PetscLogGpuTimeEnd();CHKERRQ(ierr); 240 ierr = VecViennaCLRestoreArrayRead(xx,&xgpu);CHKERRQ(ierr); 241 ierr = VecViennaCLRestoreArrayRead(yy,&ygpu);CHKERRQ(ierr); 242 ierr = VecViennaCLRestoreArrayWrite(zz,&zgpu);CHKERRQ(ierr); 243 244 } catch(std::exception const & ex) { 245 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what()); 246 } 247 ierr = PetscLogGpuFlops(2.0*a->nz);CHKERRQ(ierr); 248 } else { 249 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 250 } 251 PetscFunctionReturn(0); 252 } 253 254 PetscErrorCode MatAssemblyEnd_SeqAIJViennaCL(Mat A,MatAssemblyType mode) 255 { 256 PetscErrorCode ierr; 257 258 PetscFunctionBegin; 259 ierr = MatAssemblyEnd_SeqAIJ(A,mode);CHKERRQ(ierr); 260 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 261 if (!A->pinnedtocpu) { 262 ierr = MatViennaCLCopyToGPU(A);CHKERRQ(ierr); 263 } 264 PetscFunctionReturn(0); 265 } 266 267 /* --------------------------------------------------------------------------------*/ 268 /*@ 269 MatCreateSeqAIJViennaCL - Creates a sparse matrix in AIJ (compressed row) format 270 (the default parallel PETSc format). This matrix will ultimately be pushed down 271 to GPUs and use the ViennaCL library for calculations. For good matrix 272 assembly performance the user should preallocate the matrix storage by setting 273 the parameter nz (or the array nnz). By setting these parameters accurately, 274 performance during matrix assembly can be increased substantially. 275 276 277 Collective 278 279 Input Parameters: 280 + comm - MPI communicator, set to PETSC_COMM_SELF 281 . m - number of rows 282 . n - number of columns 283 . nz - number of nonzeros per row (same for all rows) 284 - nnz - array containing the number of nonzeros in the various rows 285 (possibly different for each row) or NULL 286 287 Output Parameter: 288 . A - the matrix 289 290 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 291 MatXXXXSetPreallocation() paradigm instead of this routine directly. 292 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 293 294 Notes: 295 If nnz is given then nz is ignored 296 297 The AIJ format (also called the Yale sparse matrix format or 298 compressed row storage), is fully compatible with standard Fortran 77 299 storage. That is, the stored row and column indices can begin at 300 either one (as in Fortran) or zero. See the users' manual for details. 301 302 Specify the preallocated storage with either nz or nnz (not both). 303 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 304 allocation. For large problems you MUST preallocate memory or you 305 will get TERRIBLE performance, see the users' manual chapter on matrices. 306 307 Level: intermediate 308 309 .seealso: MatCreate(), MatCreateAIJ(), MatCreateAIJCUSPARSE(), MatSetValues(), MatSeqAIJSetColumnIndices(), MatCreateSeqAIJWithArrays(), MatCreateAIJ() 310 311 @*/ 312 PetscErrorCode MatCreateSeqAIJViennaCL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 313 { 314 PetscErrorCode ierr; 315 316 PetscFunctionBegin; 317 ierr = MatCreate(comm,A);CHKERRQ(ierr); 318 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 319 ierr = MatSetType(*A,MATSEQAIJVIENNACL);CHKERRQ(ierr); 320 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,(PetscInt*)nnz);CHKERRQ(ierr); 321 PetscFunctionReturn(0); 322 } 323 324 325 PetscErrorCode MatDestroy_SeqAIJViennaCL(Mat A) 326 { 327 PetscErrorCode ierr; 328 Mat_SeqAIJViennaCL *viennaclcontainer = (Mat_SeqAIJViennaCL*)A->spptr; 329 330 PetscFunctionBegin; 331 try { 332 if (viennaclcontainer) { 333 delete viennaclcontainer->tempvec; 334 delete viennaclcontainer->mat; 335 delete viennaclcontainer->compressed_mat; 336 delete viennaclcontainer; 337 } 338 A->valid_GPU_matrix = PETSC_OFFLOAD_UNALLOCATED; 339 } catch(std::exception const & ex) { 340 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what()); 341 } 342 343 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqaij_seqaijviennacl_C",NULL);CHKERRQ(ierr); 344 345 /* this next line is because MatDestroy tries to PetscFree spptr if it is not zero, and PetscFree only works if the memory was allocated with PetscNew or PetscMalloc, which don't call the constructor */ 346 A->spptr = 0; 347 ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 348 PetscFunctionReturn(0); 349 } 350 351 352 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJViennaCL(Mat B) 353 { 354 PetscErrorCode ierr; 355 356 PetscFunctionBegin; 357 ierr = MatCreate_SeqAIJ(B);CHKERRQ(ierr); 358 ierr = MatConvert_SeqAIJ_SeqAIJViennaCL(B,MATSEQAIJVIENNACL,MAT_INPLACE_MATRIX,&B); 359 PetscFunctionReturn(0); 360 } 361 362 static PetscErrorCode MatPinToCPU_SeqAIJViennaCL(Mat,PetscBool); 363 static PetscErrorCode MatDuplicate_SeqAIJViennaCL(Mat A,MatDuplicateOption cpvalues,Mat *B) 364 { 365 PetscErrorCode ierr; 366 Mat C; 367 368 PetscFunctionBegin; 369 ierr = MatDuplicate_SeqAIJ(A,cpvalues,B);CHKERRQ(ierr); 370 C = *B; 371 372 ierr = MatPinToCPU_SeqAIJViennaCL(A,PETSC_FALSE);CHKERRQ(ierr); 373 C->ops->pintocpu = MatPinToCPU_SeqAIJViennaCL; 374 375 C->spptr = new Mat_SeqAIJViennaCL(); 376 ((Mat_SeqAIJViennaCL*)C->spptr)->tempvec = NULL; 377 ((Mat_SeqAIJViennaCL*)C->spptr)->mat = NULL; 378 ((Mat_SeqAIJViennaCL*)C->spptr)->compressed_mat = NULL; 379 380 ierr = PetscObjectChangeTypeName((PetscObject)C,MATSEQAIJVIENNACL);CHKERRQ(ierr); 381 382 C->valid_GPU_matrix = PETSC_OFFLOAD_UNALLOCATED; 383 384 /* If the source matrix is already assembled, copy the destination matrix to the GPU */ 385 if (C->assembled) { 386 ierr = MatViennaCLCopyToGPU(C);CHKERRQ(ierr); 387 } 388 389 PetscFunctionReturn(0); 390 } 391 392 static PetscErrorCode MatPinToCPU_SeqAIJViennaCL(Mat A,PetscBool flg) 393 { 394 PetscFunctionBegin; 395 A->pinnedtocpu = flg; 396 if (flg) { 397 A->ops->mult = MatMult_SeqAIJ; 398 A->ops->multadd = MatMultAdd_SeqAIJ; 399 A->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 400 A->ops->duplicate = MatDuplicate_SeqAIJ; 401 } else { 402 A->ops->mult = MatMult_SeqAIJViennaCL; 403 A->ops->multadd = MatMultAdd_SeqAIJViennaCL; 404 A->ops->assemblyend = MatAssemblyEnd_SeqAIJViennaCL; 405 A->ops->destroy = MatDestroy_SeqAIJViennaCL; 406 A->ops->duplicate = MatDuplicate_SeqAIJViennaCL; 407 } 408 PetscFunctionReturn(0); 409 } 410 411 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJViennaCL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 412 { 413 PetscErrorCode ierr; 414 Mat B; 415 Mat_SeqAIJ *aij; 416 417 PetscFunctionBegin; 418 419 if (reuse == MAT_REUSE_MATRIX) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"MAT_REUSE_MATRIX is not supported. Consider using MAT_INPLACE_MATRIX instead"); 420 421 if (reuse == MAT_INITIAL_MATRIX) { 422 ierr = MatDuplicate(A,MAT_COPY_VALUES,newmat);CHKERRQ(ierr); 423 } 424 425 B = *newmat; 426 427 aij = (Mat_SeqAIJ*)B->data; 428 aij->inode.use = PETSC_FALSE; 429 430 B->spptr = new Mat_SeqAIJViennaCL(); 431 432 ((Mat_SeqAIJViennaCL*)B->spptr)->tempvec = NULL; 433 ((Mat_SeqAIJViennaCL*)B->spptr)->mat = NULL; 434 ((Mat_SeqAIJViennaCL*)B->spptr)->compressed_mat = NULL; 435 436 ierr = MatPinToCPU_SeqAIJViennaCL(A,PETSC_FALSE);CHKERRQ(ierr); 437 A->ops->pintocpu = MatPinToCPU_SeqAIJViennaCL; 438 439 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJVIENNACL);CHKERRQ(ierr); 440 ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr); 441 ierr = PetscStrallocpy(VECVIENNACL,&B->defaultvectype);CHKERRQ(ierr); 442 443 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqaij_seqaijviennacl_C",MatConvert_SeqAIJ_SeqAIJViennaCL);CHKERRQ(ierr); 444 445 B->valid_GPU_matrix = PETSC_OFFLOAD_UNALLOCATED; 446 447 /* If the source matrix is already assembled, copy the destination matrix to the GPU */ 448 if (B->assembled) { 449 ierr = MatViennaCLCopyToGPU(B);CHKERRQ(ierr); 450 } 451 452 PetscFunctionReturn(0); 453 } 454 455 456 /*MC 457 MATSEQAIJVIENNACL - MATAIJVIENNACL = "aijviennacl" = "seqaijviennacl" - A matrix type to be used for sparse matrices. 458 459 A matrix type type whose data resides on GPUs. These matrices are in CSR format by 460 default. All matrix calculations are performed using the ViennaCL library. 461 462 Options Database Keys: 463 + -mat_type aijviennacl - sets the matrix type to "seqaijviennacl" during a call to MatSetFromOptions() 464 . -mat_viennacl_storage_format csr - sets the storage format of matrices for MatMult during a call to MatSetFromOptions(). 465 - -mat_viennacl_mult_storage_format csr - sets the storage format of matrices for MatMult during a call to MatSetFromOptions(). 466 467 Level: beginner 468 469 .seealso: MatCreateSeqAIJViennaCL(), MATAIJVIENNACL, MatCreateAIJViennaCL() 470 M*/ 471 472 473 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ViennaCL(void) 474 { 475 PetscErrorCode ierr; 476 477 PetscFunctionBegin; 478 ierr = MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_LU,MatGetFactor_seqaij_petsc);CHKERRQ(ierr); 479 ierr = MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_CHOLESKY,MatGetFactor_seqaij_petsc);CHKERRQ(ierr); 480 ierr = MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_ILU,MatGetFactor_seqaij_petsc);CHKERRQ(ierr); 481 ierr = MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_ICC,MatGetFactor_seqaij_petsc);CHKERRQ(ierr); 482 PetscFunctionReturn(0); 483 } 484