1 2 /* 3 Defines the basic matrix operations for the AIJ (compressed row) 4 matrix storage format. 5 */ 6 7 #include <petscconf.h> 8 #define PETSC_SKIP_IMMINTRIN_H_CUDAWORKAROUND 1 9 #include <../src/mat/impls/aij/seq/aij.h> /*I "petscmat.h" I*/ 10 #include <petscbt.h> 11 #include <../src/vec/vec/impls/dvecimpl.h> 12 #include <petsc/private/vecimpl.h> 13 14 #include <../src/mat/impls/aij/seq/seqviennacl/viennaclmatimpl.h> 15 16 #include <algorithm> 17 #include <vector> 18 #include <string> 19 20 #include "viennacl/linalg/prod.hpp" 21 22 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJViennaCL(Mat A, MatType type, MatReuse reuse, Mat *newmat); 23 PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat,MatFactorType,Mat*); 24 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqDense(Mat); 25 26 PetscErrorCode MatViennaCLCopyToGPU(Mat A) 27 { 28 Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr; 29 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 30 31 PetscFunctionBegin; 32 if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) { //some OpenCL SDKs have issues with buffers of size 0 33 if (A->offloadmask == PETSC_OFFLOAD_UNALLOCATED || A->offloadmask == PETSC_OFFLOAD_CPU) { 34 PetscCall(PetscLogEventBegin(MAT_ViennaCLCopyToGPU,A,0,0,0)); 35 36 try { 37 if (a->compressedrow.use) { 38 if (!viennaclstruct->compressed_mat) viennaclstruct->compressed_mat = new ViennaCLCompressedAIJMatrix(); 39 40 // Since PetscInt is different from cl_uint, we have to convert: 41 viennacl::backend::mem_handle dummy; 42 43 viennacl::backend::typesafe_host_array<unsigned int> row_buffer; row_buffer.raw_resize(dummy, a->compressedrow.nrows+1); 44 for (PetscInt i=0; i<=a->compressedrow.nrows; ++i) 45 row_buffer.set(i, (a->compressedrow.i)[i]); 46 47 viennacl::backend::typesafe_host_array<unsigned int> row_indices; row_indices.raw_resize(dummy, a->compressedrow.nrows); 48 for (PetscInt i=0; i<a->compressedrow.nrows; ++i) 49 row_indices.set(i, (a->compressedrow.rindex)[i]); 50 51 viennacl::backend::typesafe_host_array<unsigned int> col_buffer; col_buffer.raw_resize(dummy, a->nz); 52 for (PetscInt i=0; i<a->nz; ++i) 53 col_buffer.set(i, (a->j)[i]); 54 55 viennaclstruct->compressed_mat->set(row_buffer.get(), row_indices.get(), col_buffer.get(), a->a, A->rmap->n, A->cmap->n, a->compressedrow.nrows, a->nz); 56 PetscCall(PetscLogCpuToGpu(((2*a->compressedrow.nrows)+1+a->nz)*sizeof(PetscInt) + (a->nz)*sizeof(PetscScalar))); 57 } else { 58 if (!viennaclstruct->mat) viennaclstruct->mat = new ViennaCLAIJMatrix(); 59 60 // Since PetscInt is in general different from cl_uint, we have to convert: 61 viennacl::backend::mem_handle dummy; 62 63 viennacl::backend::typesafe_host_array<unsigned int> row_buffer; row_buffer.raw_resize(dummy, A->rmap->n+1); 64 for (PetscInt i=0; i<=A->rmap->n; ++i) 65 row_buffer.set(i, (a->i)[i]); 66 67 viennacl::backend::typesafe_host_array<unsigned int> col_buffer; col_buffer.raw_resize(dummy, a->nz); 68 for (PetscInt i=0; i<a->nz; ++i) 69 col_buffer.set(i, (a->j)[i]); 70 71 viennaclstruct->mat->set(row_buffer.get(), col_buffer.get(), a->a, A->rmap->n, A->cmap->n, a->nz); 72 PetscCall(PetscLogCpuToGpu(((A->rmap->n+1)+a->nz)*sizeof(PetscInt)+(a->nz)*sizeof(PetscScalar))); 73 } 74 ViennaCLWaitForGPU(); 75 } catch(std::exception const & ex) { 76 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what()); 77 } 78 79 // Create temporary vector for v += A*x: 80 if (viennaclstruct->tempvec) { 81 if (viennaclstruct->tempvec->size() != static_cast<std::size_t>(A->rmap->n)) { 82 delete (ViennaCLVector*)viennaclstruct->tempvec; 83 viennaclstruct->tempvec = new ViennaCLVector(A->rmap->n); 84 } else { 85 viennaclstruct->tempvec->clear(); 86 } 87 } else { 88 viennaclstruct->tempvec = new ViennaCLVector(A->rmap->n); 89 } 90 91 A->offloadmask = PETSC_OFFLOAD_BOTH; 92 93 PetscCall(PetscLogEventEnd(MAT_ViennaCLCopyToGPU,A,0,0,0)); 94 } 95 } 96 PetscFunctionReturn(0); 97 } 98 99 PetscErrorCode MatViennaCLCopyFromGPU(Mat A, const ViennaCLAIJMatrix *Agpu) 100 { 101 Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr; 102 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 103 PetscInt m = A->rmap->n; 104 105 PetscFunctionBegin; 106 if (A->offloadmask == PETSC_OFFLOAD_BOTH) PetscFunctionReturn(0); 107 if (A->offloadmask == PETSC_OFFLOAD_UNALLOCATED && Agpu) { 108 try { 109 PetscCheck(!a->compressedrow.use,PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ViennaCL: Cannot handle row compression for GPU matrices"); 110 PetscCheck((PetscInt)Agpu->size1() == m,PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "GPU matrix has %lu rows, should be %" PetscInt_FMT, Agpu->size1(), m); 111 a->nz = Agpu->nnz(); 112 a->maxnz = a->nz; /* Since we allocate exactly the right amount */ 113 A->preallocated = PETSC_TRUE; 114 if (a->singlemalloc) { 115 if (a->a) PetscCall(PetscFree3(a->a,a->j,a->i)); 116 } else { 117 if (a->i) PetscCall(PetscFree(a->i)); 118 if (a->j) PetscCall(PetscFree(a->j)); 119 if (a->a) PetscCall(PetscFree(a->a)); 120 } 121 PetscCall(PetscMalloc3(a->nz,&a->a,a->nz,&a->j,m+1,&a->i)); 122 PetscCall(PetscLogObjectMemory((PetscObject)A, a->nz*(sizeof(PetscScalar)+sizeof(PetscInt))+(m+1)*sizeof(PetscInt))); 123 124 a->singlemalloc = PETSC_TRUE; 125 126 /* Setup row lengths */ 127 PetscCall(PetscFree(a->imax)); 128 PetscCall(PetscFree(a->ilen)); 129 PetscCall(PetscMalloc1(m,&a->imax)); 130 PetscCall(PetscMalloc1(m,&a->ilen)); 131 PetscCall(PetscLogObjectMemory((PetscObject)A, 2*m*sizeof(PetscInt))); 132 133 /* Copy data back from GPU */ 134 viennacl::backend::typesafe_host_array<unsigned int> row_buffer; row_buffer.raw_resize(Agpu->handle1(), Agpu->size1() + 1); 135 136 // copy row array 137 viennacl::backend::memory_read(Agpu->handle1(), 0, row_buffer.raw_size(), row_buffer.get()); 138 (a->i)[0] = row_buffer[0]; 139 for (PetscInt i = 0; i < (PetscInt)Agpu->size1(); ++i) { 140 (a->i)[i+1] = row_buffer[i+1]; 141 a->imax[i] = a->ilen[i] = a->i[i+1] - a->i[i]; //Set imax[] and ilen[] arrays at the same time as i[] for better cache reuse 142 } 143 144 // copy column indices 145 viennacl::backend::typesafe_host_array<unsigned int> col_buffer; col_buffer.raw_resize(Agpu->handle2(), Agpu->nnz()); 146 viennacl::backend::memory_read(Agpu->handle2(), 0, col_buffer.raw_size(), col_buffer.get()); 147 for (PetscInt i=0; i < (PetscInt)Agpu->nnz(); ++i) 148 (a->j)[i] = col_buffer[i]; 149 150 // copy nonzero entries directly to destination (no conversion required) 151 viennacl::backend::memory_read(Agpu->handle(), 0, sizeof(PetscScalar)*Agpu->nnz(), a->a); 152 153 PetscCall(PetscLogGpuToCpu(row_buffer.raw_size()+col_buffer.raw_size()+(Agpu->nnz()*sizeof(PetscScalar)))); 154 ViennaCLWaitForGPU(); 155 /* TODO: Once a->diag is moved out of MatAssemblyEnd(), invalidate it here. */ 156 } catch(std::exception const & ex) { 157 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what()); 158 } 159 } else if (A->offloadmask == PETSC_OFFLOAD_UNALLOCATED) { 160 PetscFunctionReturn(0); 161 } else { 162 if (!Agpu && A->offloadmask != PETSC_OFFLOAD_GPU) PetscFunctionReturn(0); 163 164 PetscCheck(!a->compressedrow.use,PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ViennaCL: Cannot handle row compression for GPU matrices"); 165 if (!Agpu) { 166 viennacl::backend::memory_read(viennaclstruct->mat->handle(), 0, sizeof(PetscScalar)*viennaclstruct->mat->nnz(), a->a); 167 } else { 168 viennacl::backend::memory_read(Agpu->handle(), 0, sizeof(PetscScalar)*Agpu->nnz(), a->a); 169 } 170 } 171 A->offloadmask = PETSC_OFFLOAD_BOTH; 172 /* This assembly prevents resetting the flag to PETSC_OFFLOAD_CPU and recopying */ 173 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 174 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 175 PetscFunctionReturn(0); 176 } 177 178 PetscErrorCode MatMult_SeqAIJViennaCL(Mat A,Vec xx,Vec yy) 179 { 180 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 181 Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr; 182 const ViennaCLVector *xgpu=NULL; 183 ViennaCLVector *ygpu=NULL; 184 185 PetscFunctionBegin; 186 /* The line below is necessary due to the operations that modify the matrix on the CPU (axpy, scale, etc) */ 187 PetscCall(MatViennaCLCopyToGPU(A)); 188 if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) { 189 PetscCall(VecViennaCLGetArrayRead(xx,&xgpu)); 190 PetscCall(VecViennaCLGetArrayWrite(yy,&ygpu)); 191 PetscCall(PetscLogGpuTimeBegin()); 192 try { 193 if (a->compressedrow.use) { 194 *ygpu = viennacl::linalg::prod(*viennaclstruct->compressed_mat, *xgpu); 195 } else { 196 *ygpu = viennacl::linalg::prod(*viennaclstruct->mat,*xgpu); 197 } 198 ViennaCLWaitForGPU(); 199 } catch (std::exception const & ex) { 200 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what()); 201 } 202 PetscCall(PetscLogGpuTimeEnd()); 203 PetscCall(VecViennaCLRestoreArrayRead(xx,&xgpu)); 204 PetscCall(VecViennaCLRestoreArrayWrite(yy,&ygpu)); 205 PetscCall(PetscLogGpuFlops(2.0*a->nz - a->nonzerorowcnt)); 206 } else { 207 PetscCall(VecSet_SeqViennaCL(yy,0)); 208 } 209 PetscFunctionReturn(0); 210 } 211 212 PetscErrorCode MatMultAdd_SeqAIJViennaCL(Mat A,Vec xx,Vec yy,Vec zz) 213 { 214 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 215 Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr; 216 const ViennaCLVector *xgpu=NULL,*ygpu=NULL; 217 ViennaCLVector *zgpu=NULL; 218 219 PetscFunctionBegin; 220 /* The line below is necessary due to the operations that modify the matrix on the CPU (axpy, scale, etc) */ 221 PetscCall(MatViennaCLCopyToGPU(A)); 222 if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) { 223 try { 224 PetscCall(VecViennaCLGetArrayRead(xx,&xgpu)); 225 PetscCall(VecViennaCLGetArrayRead(yy,&ygpu)); 226 PetscCall(VecViennaCLGetArrayWrite(zz,&zgpu)); 227 PetscCall(PetscLogGpuTimeBegin()); 228 if (a->compressedrow.use) *viennaclstruct->tempvec = viennacl::linalg::prod(*viennaclstruct->compressed_mat, *xgpu); 229 else *viennaclstruct->tempvec = viennacl::linalg::prod(*viennaclstruct->mat, *xgpu); 230 if (zz != yy) *zgpu = *ygpu + *viennaclstruct->tempvec; 231 else *zgpu += *viennaclstruct->tempvec; 232 ViennaCLWaitForGPU(); 233 PetscCall(PetscLogGpuTimeEnd()); 234 PetscCall(VecViennaCLRestoreArrayRead(xx,&xgpu)); 235 PetscCall(VecViennaCLRestoreArrayRead(yy,&ygpu)); 236 PetscCall(VecViennaCLRestoreArrayWrite(zz,&zgpu)); 237 238 } catch(std::exception const & ex) { 239 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what()); 240 } 241 PetscCall(PetscLogGpuFlops(2.0*a->nz)); 242 } else { 243 PetscCall(VecCopy_SeqViennaCL(yy,zz)); 244 } 245 PetscFunctionReturn(0); 246 } 247 248 PetscErrorCode MatAssemblyEnd_SeqAIJViennaCL(Mat A,MatAssemblyType mode) 249 { 250 PetscFunctionBegin; 251 PetscCall(MatAssemblyEnd_SeqAIJ(A,mode)); 252 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 253 if (!A->boundtocpu) PetscCall(MatViennaCLCopyToGPU(A)); 254 PetscFunctionReturn(0); 255 } 256 257 /* --------------------------------------------------------------------------------*/ 258 /*@C 259 MatCreateSeqAIJViennaCL - Creates a sparse matrix in AIJ (compressed row) format 260 (the default parallel PETSc format). This matrix will ultimately be pushed down 261 to GPUs and use the ViennaCL library for calculations. For good matrix 262 assembly performance the user should preallocate the matrix storage by setting 263 the parameter nz (or the array nnz). By setting these parameters accurately, 264 performance during matrix assembly can be increased substantially. 265 266 Collective 267 268 Input Parameters: 269 + comm - MPI communicator, set to PETSC_COMM_SELF 270 . m - number of rows 271 . n - number of columns 272 . nz - number of nonzeros per row (same for all rows) 273 - nnz - array containing the number of nonzeros in the various rows 274 (possibly different for each row) or NULL 275 276 Output Parameter: 277 . A - the matrix 278 279 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 280 MatXXXXSetPreallocation() paradigm instead of this routine directly. 281 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 282 283 Notes: 284 If nnz is given then nz is ignored 285 286 The AIJ format (also called the Yale sparse matrix format or 287 compressed row storage), is fully compatible with standard Fortran 77 288 storage. That is, the stored row and column indices can begin at 289 either one (as in Fortran) or zero. See the users' manual for details. 290 291 Specify the preallocated storage with either nz or nnz (not both). 292 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 293 allocation. For large problems you MUST preallocate memory or you 294 will get TERRIBLE performance, see the users' manual chapter on matrices. 295 296 Level: intermediate 297 298 .seealso: `MatCreate()`, `MatCreateAIJ()`, `MatCreateAIJCUSPARSE()`, `MatSetValues()`, `MatSeqAIJSetColumnIndices()`, `MatCreateSeqAIJWithArrays()`, `MatCreateAIJ()` 299 300 @*/ 301 PetscErrorCode MatCreateSeqAIJViennaCL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 302 { 303 PetscFunctionBegin; 304 PetscCall(MatCreate(comm,A)); 305 PetscCall(MatSetSizes(*A,m,n,m,n)); 306 PetscCall(MatSetType(*A,MATSEQAIJVIENNACL)); 307 PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,(PetscInt*)nnz)); 308 PetscFunctionReturn(0); 309 } 310 311 PetscErrorCode MatDestroy_SeqAIJViennaCL(Mat A) 312 { 313 Mat_SeqAIJViennaCL *viennaclcontainer = (Mat_SeqAIJViennaCL*)A->spptr; 314 315 PetscFunctionBegin; 316 try { 317 if (viennaclcontainer) { 318 delete viennaclcontainer->tempvec; 319 delete viennaclcontainer->mat; 320 delete viennaclcontainer->compressed_mat; 321 delete viennaclcontainer; 322 } 323 A->offloadmask = PETSC_OFFLOAD_UNALLOCATED; 324 } catch(std::exception const & ex) { 325 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what()); 326 } 327 328 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqaij_seqaijviennacl_C",NULL)); 329 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_seqaijviennacl_seqdense_C",NULL)); 330 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_seqaijviennacl_seqaij_C",NULL)); 331 332 /* this next line is because MatDestroy tries to PetscFree spptr if it is not zero, and PetscFree only works if the memory was allocated with PetscNew or PetscMalloc, which don't call the constructor */ 333 A->spptr = 0; 334 PetscCall(MatDestroy_SeqAIJ(A)); 335 PetscFunctionReturn(0); 336 } 337 338 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJViennaCL(Mat B) 339 { 340 PetscFunctionBegin; 341 PetscCall(MatCreate_SeqAIJ(B)); 342 PetscCall(MatConvert_SeqAIJ_SeqAIJViennaCL(B,MATSEQAIJVIENNACL,MAT_INPLACE_MATRIX,&B)); 343 PetscFunctionReturn(0); 344 } 345 346 static PetscErrorCode MatBindToCPU_SeqAIJViennaCL(Mat,PetscBool); 347 static PetscErrorCode MatDuplicate_SeqAIJViennaCL(Mat A,MatDuplicateOption cpvalues,Mat *B) 348 { 349 Mat C; 350 351 PetscFunctionBegin; 352 PetscCall(MatDuplicate_SeqAIJ(A,cpvalues,B)); 353 C = *B; 354 355 PetscCall(MatBindToCPU_SeqAIJViennaCL(A,PETSC_FALSE)); 356 C->ops->bindtocpu = MatBindToCPU_SeqAIJViennaCL; 357 358 C->spptr = new Mat_SeqAIJViennaCL(); 359 ((Mat_SeqAIJViennaCL*)C->spptr)->tempvec = NULL; 360 ((Mat_SeqAIJViennaCL*)C->spptr)->mat = NULL; 361 ((Mat_SeqAIJViennaCL*)C->spptr)->compressed_mat = NULL; 362 363 PetscCall(PetscObjectChangeTypeName((PetscObject)C,MATSEQAIJVIENNACL)); 364 365 C->offloadmask = PETSC_OFFLOAD_UNALLOCATED; 366 367 /* If the source matrix is already assembled, copy the destination matrix to the GPU */ 368 if (C->assembled) PetscCall(MatViennaCLCopyToGPU(C)); 369 PetscFunctionReturn(0); 370 } 371 372 static PetscErrorCode MatSeqAIJGetArray_SeqAIJViennaCL(Mat A,PetscScalar *array[]) 373 { 374 PetscFunctionBegin; 375 PetscCall(MatViennaCLCopyFromGPU(A,(const ViennaCLAIJMatrix *)NULL)); 376 *array = ((Mat_SeqAIJ*)A->data)->a; 377 PetscFunctionReturn(0); 378 } 379 380 static PetscErrorCode MatSeqAIJRestoreArray_SeqAIJViennaCL(Mat A,PetscScalar *array[]) 381 { 382 PetscFunctionBegin; 383 A->offloadmask = PETSC_OFFLOAD_CPU; 384 *array = NULL; 385 PetscFunctionReturn(0); 386 } 387 388 static PetscErrorCode MatSeqAIJGetArrayRead_SeqAIJViennaCL(Mat A,const PetscScalar *array[]) 389 { 390 PetscFunctionBegin; 391 PetscCall(MatViennaCLCopyFromGPU(A,(const ViennaCLAIJMatrix *)NULL)); 392 *array = ((Mat_SeqAIJ*)A->data)->a; 393 PetscFunctionReturn(0); 394 } 395 396 static PetscErrorCode MatSeqAIJRestoreArrayRead_SeqAIJViennaCL(Mat A,const PetscScalar *array[]) 397 { 398 PetscFunctionBegin; 399 *array = NULL; 400 /* No A->offloadmask = PETSC_OFFLOAD_CPU since if A->offloadmask was PETSC_OFFLOAD_BOTH, it is still BOTH */ 401 PetscFunctionReturn(0); 402 } 403 404 static PetscErrorCode MatSeqAIJGetArrayWrite_SeqAIJViennaCL(Mat A,PetscScalar *array[]) 405 { 406 PetscFunctionBegin; 407 *array = ((Mat_SeqAIJ*)A->data)->a; 408 PetscFunctionReturn(0); 409 } 410 411 static PetscErrorCode MatSeqAIJRestoreArrayWrite_SeqAIJViennaCL(Mat A,PetscScalar *array[]) 412 { 413 PetscFunctionBegin; 414 A->offloadmask = PETSC_OFFLOAD_CPU; 415 *array = NULL; 416 PetscFunctionReturn(0); 417 } 418 419 static PetscErrorCode MatBindToCPU_SeqAIJViennaCL(Mat A,PetscBool flg) 420 { 421 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 422 423 PetscFunctionBegin; 424 A->boundtocpu = flg; 425 if (flg && a->inode.size) { 426 a->inode.use = PETSC_TRUE; 427 } else { 428 a->inode.use = PETSC_FALSE; 429 } 430 if (flg) { 431 /* make sure we have an up-to-date copy on the CPU */ 432 PetscCall(MatViennaCLCopyFromGPU(A,(const ViennaCLAIJMatrix *)NULL)); 433 A->ops->mult = MatMult_SeqAIJ; 434 A->ops->multadd = MatMultAdd_SeqAIJ; 435 A->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 436 A->ops->duplicate = MatDuplicate_SeqAIJ; 437 PetscCall(PetscMemzero(a->ops,sizeof(Mat_SeqAIJOps))); 438 } else { 439 A->ops->mult = MatMult_SeqAIJViennaCL; 440 A->ops->multadd = MatMultAdd_SeqAIJViennaCL; 441 A->ops->assemblyend = MatAssemblyEnd_SeqAIJViennaCL; 442 A->ops->destroy = MatDestroy_SeqAIJViennaCL; 443 A->ops->duplicate = MatDuplicate_SeqAIJViennaCL; 444 445 a->ops->getarray = MatSeqAIJGetArray_SeqAIJViennaCL; 446 a->ops->restorearray = MatSeqAIJRestoreArray_SeqAIJViennaCL; 447 a->ops->getarrayread = MatSeqAIJGetArrayRead_SeqAIJViennaCL; 448 a->ops->restorearrayread = MatSeqAIJRestoreArrayRead_SeqAIJViennaCL; 449 a->ops->getarraywrite = MatSeqAIJGetArrayWrite_SeqAIJViennaCL; 450 a->ops->restorearraywrite = MatSeqAIJRestoreArrayWrite_SeqAIJViennaCL; 451 } 452 PetscFunctionReturn(0); 453 } 454 455 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJViennaCL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 456 { 457 Mat B; 458 459 PetscFunctionBegin; 460 PetscCheck(reuse != MAT_REUSE_MATRIX,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MAT_REUSE_MATRIX is not supported. Consider using MAT_INPLACE_MATRIX instead"); 461 462 if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A,MAT_COPY_VALUES,newmat)); 463 464 B = *newmat; 465 466 B->spptr = new Mat_SeqAIJViennaCL(); 467 468 ((Mat_SeqAIJViennaCL*)B->spptr)->tempvec = NULL; 469 ((Mat_SeqAIJViennaCL*)B->spptr)->mat = NULL; 470 ((Mat_SeqAIJViennaCL*)B->spptr)->compressed_mat = NULL; 471 472 PetscCall(MatBindToCPU_SeqAIJViennaCL(A,PETSC_FALSE)); 473 A->ops->bindtocpu = MatBindToCPU_SeqAIJViennaCL; 474 475 PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJVIENNACL)); 476 PetscCall(PetscFree(B->defaultvectype)); 477 PetscCall(PetscStrallocpy(VECVIENNACL,&B->defaultvectype)); 478 479 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqaij_seqaijviennacl_C",MatConvert_SeqAIJ_SeqAIJViennaCL)); 480 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_seqaijviennacl_seqdense_C",MatProductSetFromOptions_SeqAIJ_SeqDense)); 481 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_seqaijviennacl_seqaij_C",MatProductSetFromOptions_SeqAIJ)); 482 483 B->offloadmask = PETSC_OFFLOAD_UNALLOCATED; 484 485 /* If the source matrix is already assembled, copy the destination matrix to the GPU */ 486 if (B->assembled) PetscCall(MatViennaCLCopyToGPU(B)); 487 PetscFunctionReturn(0); 488 } 489 490 /*MC 491 MATSEQAIJVIENNACL - MATAIJVIENNACL = "aijviennacl" = "seqaijviennacl" - A matrix type to be used for sparse matrices. 492 493 A matrix type type whose data resides on GPUs. These matrices are in CSR format by 494 default. All matrix calculations are performed using the ViennaCL library. 495 496 Options Database Keys: 497 + -mat_type aijviennacl - sets the matrix type to "seqaijviennacl" during a call to MatSetFromOptions() 498 . -mat_viennacl_storage_format csr - sets the storage format of matrices for MatMult during a call to MatSetFromOptions(). 499 - -mat_viennacl_mult_storage_format csr - sets the storage format of matrices for MatMult during a call to MatSetFromOptions(). 500 501 Level: beginner 502 503 .seealso: `MatCreateSeqAIJViennaCL()`, `MATAIJVIENNACL`, `MatCreateAIJViennaCL()` 504 M*/ 505 506 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ViennaCL(void) 507 { 508 PetscFunctionBegin; 509 PetscCall(MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_LU,MatGetFactor_seqaij_petsc)); 510 PetscCall(MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_CHOLESKY,MatGetFactor_seqaij_petsc)); 511 PetscCall(MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_ILU,MatGetFactor_seqaij_petsc)); 512 PetscCall(MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_ICC,MatGetFactor_seqaij_petsc)); 513 PetscFunctionReturn(0); 514 } 515