1 2 /* 3 Defines the basic matrix operations for the AIJ (compressed row) 4 matrix storage format. 5 */ 6 7 #include <petscconf.h> 8 #define PETSC_SKIP_IMMINTRIN_H_CUDAWORKAROUND 1 9 #include <../src/mat/impls/aij/seq/aij.h> /*I "petscmat.h" I*/ 10 #include <petscbt.h> 11 #include <../src/vec/vec/impls/dvecimpl.h> 12 #include <petsc/private/vecimpl.h> 13 14 #include <../src/mat/impls/aij/seq/seqviennacl/viennaclmatimpl.h> 15 16 #include <algorithm> 17 #include <vector> 18 #include <string> 19 20 #include "viennacl/linalg/prod.hpp" 21 22 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJViennaCL(Mat A, MatType type, MatReuse reuse, Mat *newmat); 23 PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat,MatFactorType,Mat*); 24 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqDense(Mat); 25 26 PetscErrorCode MatViennaCLCopyToGPU(Mat A) 27 { 28 Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr; 29 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 30 31 PetscFunctionBegin; 32 if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) { //some OpenCL SDKs have issues with buffers of size 0 33 if (A->offloadmask == PETSC_OFFLOAD_UNALLOCATED || A->offloadmask == PETSC_OFFLOAD_CPU) { 34 PetscCall(PetscLogEventBegin(MAT_ViennaCLCopyToGPU,A,0,0,0)); 35 36 try { 37 if (a->compressedrow.use) { 38 if (!viennaclstruct->compressed_mat) viennaclstruct->compressed_mat = new ViennaCLCompressedAIJMatrix(); 39 40 // Since PetscInt is different from cl_uint, we have to convert: 41 viennacl::backend::mem_handle dummy; 42 43 viennacl::backend::typesafe_host_array<unsigned int> row_buffer; row_buffer.raw_resize(dummy, a->compressedrow.nrows+1); 44 for (PetscInt i=0; i<=a->compressedrow.nrows; ++i) 45 row_buffer.set(i, (a->compressedrow.i)[i]); 46 47 viennacl::backend::typesafe_host_array<unsigned int> row_indices; row_indices.raw_resize(dummy, a->compressedrow.nrows); 48 for (PetscInt i=0; i<a->compressedrow.nrows; ++i) 49 row_indices.set(i, (a->compressedrow.rindex)[i]); 50 51 viennacl::backend::typesafe_host_array<unsigned int> col_buffer; col_buffer.raw_resize(dummy, a->nz); 52 for (PetscInt i=0; i<a->nz; ++i) 53 col_buffer.set(i, (a->j)[i]); 54 55 viennaclstruct->compressed_mat->set(row_buffer.get(), row_indices.get(), col_buffer.get(), a->a, A->rmap->n, A->cmap->n, a->compressedrow.nrows, a->nz); 56 PetscCall(PetscLogCpuToGpu(((2*a->compressedrow.nrows)+1+a->nz)*sizeof(PetscInt) + (a->nz)*sizeof(PetscScalar))); 57 } else { 58 if (!viennaclstruct->mat) viennaclstruct->mat = new ViennaCLAIJMatrix(); 59 60 // Since PetscInt is in general different from cl_uint, we have to convert: 61 viennacl::backend::mem_handle dummy; 62 63 viennacl::backend::typesafe_host_array<unsigned int> row_buffer; row_buffer.raw_resize(dummy, A->rmap->n+1); 64 for (PetscInt i=0; i<=A->rmap->n; ++i) 65 row_buffer.set(i, (a->i)[i]); 66 67 viennacl::backend::typesafe_host_array<unsigned int> col_buffer; col_buffer.raw_resize(dummy, a->nz); 68 for (PetscInt i=0; i<a->nz; ++i) 69 col_buffer.set(i, (a->j)[i]); 70 71 viennaclstruct->mat->set(row_buffer.get(), col_buffer.get(), a->a, A->rmap->n, A->cmap->n, a->nz); 72 PetscCall(PetscLogCpuToGpu(((A->rmap->n+1)+a->nz)*sizeof(PetscInt)+(a->nz)*sizeof(PetscScalar))); 73 } 74 ViennaCLWaitForGPU(); 75 } catch(std::exception const & ex) { 76 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what()); 77 } 78 79 // Create temporary vector for v += A*x: 80 if (viennaclstruct->tempvec) { 81 if (viennaclstruct->tempvec->size() != static_cast<std::size_t>(A->rmap->n)) { 82 delete (ViennaCLVector*)viennaclstruct->tempvec; 83 viennaclstruct->tempvec = new ViennaCLVector(A->rmap->n); 84 } else { 85 viennaclstruct->tempvec->clear(); 86 } 87 } else { 88 viennaclstruct->tempvec = new ViennaCLVector(A->rmap->n); 89 } 90 91 A->offloadmask = PETSC_OFFLOAD_BOTH; 92 93 PetscCall(PetscLogEventEnd(MAT_ViennaCLCopyToGPU,A,0,0,0)); 94 } 95 } 96 PetscFunctionReturn(0); 97 } 98 99 PetscErrorCode MatViennaCLCopyFromGPU(Mat A, const ViennaCLAIJMatrix *Agpu) 100 { 101 Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr; 102 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 103 PetscInt m = A->rmap->n; 104 105 PetscFunctionBegin; 106 if (A->offloadmask == PETSC_OFFLOAD_BOTH) PetscFunctionReturn(0); 107 if (A->offloadmask == PETSC_OFFLOAD_UNALLOCATED && Agpu) { 108 try { 109 PetscCheck(!a->compressedrow.use,PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ViennaCL: Cannot handle row compression for GPU matrices"); 110 else { 111 112 PetscCheck((PetscInt)Agpu->size1() == m,PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "GPU matrix has %lu rows, should be %" PetscInt_FMT, Agpu->size1(), m); 113 a->nz = Agpu->nnz(); 114 a->maxnz = a->nz; /* Since we allocate exactly the right amount */ 115 A->preallocated = PETSC_TRUE; 116 if (a->singlemalloc) { 117 if (a->a) PetscCall(PetscFree3(a->a,a->j,a->i)); 118 } else { 119 if (a->i) PetscCall(PetscFree(a->i)); 120 if (a->j) PetscCall(PetscFree(a->j)); 121 if (a->a) PetscCall(PetscFree(a->a)); 122 } 123 PetscCall(PetscMalloc3(a->nz,&a->a,a->nz,&a->j,m+1,&a->i)); 124 PetscCall(PetscLogObjectMemory((PetscObject)A, a->nz*(sizeof(PetscScalar)+sizeof(PetscInt))+(m+1)*sizeof(PetscInt))); 125 126 a->singlemalloc = PETSC_TRUE; 127 128 /* Setup row lengths */ 129 PetscCall(PetscFree(a->imax)); 130 PetscCall(PetscFree(a->ilen)); 131 PetscCall(PetscMalloc1(m,&a->imax)); 132 PetscCall(PetscMalloc1(m,&a->ilen)); 133 PetscCall(PetscLogObjectMemory((PetscObject)A, 2*m*sizeof(PetscInt))); 134 135 /* Copy data back from GPU */ 136 viennacl::backend::typesafe_host_array<unsigned int> row_buffer; row_buffer.raw_resize(Agpu->handle1(), Agpu->size1() + 1); 137 138 // copy row array 139 viennacl::backend::memory_read(Agpu->handle1(), 0, row_buffer.raw_size(), row_buffer.get()); 140 (a->i)[0] = row_buffer[0]; 141 for (PetscInt i = 0; i < (PetscInt)Agpu->size1(); ++i) { 142 (a->i)[i+1] = row_buffer[i+1]; 143 a->imax[i] = a->ilen[i] = a->i[i+1] - a->i[i]; //Set imax[] and ilen[] arrays at the same time as i[] for better cache reuse 144 } 145 146 // copy column indices 147 viennacl::backend::typesafe_host_array<unsigned int> col_buffer; col_buffer.raw_resize(Agpu->handle2(), Agpu->nnz()); 148 viennacl::backend::memory_read(Agpu->handle2(), 0, col_buffer.raw_size(), col_buffer.get()); 149 for (PetscInt i=0; i < (PetscInt)Agpu->nnz(); ++i) 150 (a->j)[i] = col_buffer[i]; 151 152 // copy nonzero entries directly to destination (no conversion required) 153 viennacl::backend::memory_read(Agpu->handle(), 0, sizeof(PetscScalar)*Agpu->nnz(), a->a); 154 155 PetscCall(PetscLogGpuToCpu(row_buffer.raw_size()+col_buffer.raw_size()+(Agpu->nnz()*sizeof(PetscScalar)))); 156 ViennaCLWaitForGPU(); 157 /* TODO: Once a->diag is moved out of MatAssemblyEnd(), invalidate it here. */ 158 } 159 } catch(std::exception const & ex) { 160 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what()); 161 } 162 } else if (A->offloadmask == PETSC_OFFLOAD_UNALLOCATED) { 163 PetscFunctionReturn(0); 164 } else { 165 if (!Agpu && A->offloadmask != PETSC_OFFLOAD_GPU) PetscFunctionReturn(0); 166 167 PetscCheck(!a->compressedrow.use,PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ViennaCL: Cannot handle row compression for GPU matrices"); 168 if (!Agpu) { 169 viennacl::backend::memory_read(viennaclstruct->mat->handle(), 0, sizeof(PetscScalar)*viennaclstruct->mat->nnz(), a->a); 170 } else { 171 viennacl::backend::memory_read(Agpu->handle(), 0, sizeof(PetscScalar)*Agpu->nnz(), a->a); 172 } 173 } 174 A->offloadmask = PETSC_OFFLOAD_BOTH; 175 /* This assembly prevents resetting the flag to PETSC_OFFLOAD_CPU and recopying */ 176 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 177 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 178 PetscFunctionReturn(0); 179 } 180 181 PetscErrorCode MatMult_SeqAIJViennaCL(Mat A,Vec xx,Vec yy) 182 { 183 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 184 Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr; 185 const ViennaCLVector *xgpu=NULL; 186 ViennaCLVector *ygpu=NULL; 187 188 PetscFunctionBegin; 189 /* The line below is necessary due to the operations that modify the matrix on the CPU (axpy, scale, etc) */ 190 PetscCall(MatViennaCLCopyToGPU(A)); 191 if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) { 192 PetscCall(VecViennaCLGetArrayRead(xx,&xgpu)); 193 PetscCall(VecViennaCLGetArrayWrite(yy,&ygpu)); 194 PetscCall(PetscLogGpuTimeBegin()); 195 try { 196 if (a->compressedrow.use) { 197 *ygpu = viennacl::linalg::prod(*viennaclstruct->compressed_mat, *xgpu); 198 } else { 199 *ygpu = viennacl::linalg::prod(*viennaclstruct->mat,*xgpu); 200 } 201 ViennaCLWaitForGPU(); 202 } catch (std::exception const & ex) { 203 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what()); 204 } 205 PetscCall(PetscLogGpuTimeEnd()); 206 PetscCall(VecViennaCLRestoreArrayRead(xx,&xgpu)); 207 PetscCall(VecViennaCLRestoreArrayWrite(yy,&ygpu)); 208 PetscCall(PetscLogGpuFlops(2.0*a->nz - a->nonzerorowcnt)); 209 } else { 210 PetscCall(VecSet_SeqViennaCL(yy,0)); 211 } 212 PetscFunctionReturn(0); 213 } 214 215 PetscErrorCode MatMultAdd_SeqAIJViennaCL(Mat A,Vec xx,Vec yy,Vec zz) 216 { 217 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 218 Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr; 219 const ViennaCLVector *xgpu=NULL,*ygpu=NULL; 220 ViennaCLVector *zgpu=NULL; 221 222 PetscFunctionBegin; 223 /* The line below is necessary due to the operations that modify the matrix on the CPU (axpy, scale, etc) */ 224 PetscCall(MatViennaCLCopyToGPU(A)); 225 if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) { 226 try { 227 PetscCall(VecViennaCLGetArrayRead(xx,&xgpu)); 228 PetscCall(VecViennaCLGetArrayRead(yy,&ygpu)); 229 PetscCall(VecViennaCLGetArrayWrite(zz,&zgpu)); 230 PetscCall(PetscLogGpuTimeBegin()); 231 if (a->compressedrow.use) *viennaclstruct->tempvec = viennacl::linalg::prod(*viennaclstruct->compressed_mat, *xgpu); 232 else *viennaclstruct->tempvec = viennacl::linalg::prod(*viennaclstruct->mat, *xgpu); 233 if (zz != yy) *zgpu = *ygpu + *viennaclstruct->tempvec; 234 else *zgpu += *viennaclstruct->tempvec; 235 ViennaCLWaitForGPU(); 236 PetscCall(PetscLogGpuTimeEnd()); 237 PetscCall(VecViennaCLRestoreArrayRead(xx,&xgpu)); 238 PetscCall(VecViennaCLRestoreArrayRead(yy,&ygpu)); 239 PetscCall(VecViennaCLRestoreArrayWrite(zz,&zgpu)); 240 241 } catch(std::exception const & ex) { 242 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what()); 243 } 244 PetscCall(PetscLogGpuFlops(2.0*a->nz)); 245 } else { 246 PetscCall(VecCopy_SeqViennaCL(yy,zz)); 247 } 248 PetscFunctionReturn(0); 249 } 250 251 PetscErrorCode MatAssemblyEnd_SeqAIJViennaCL(Mat A,MatAssemblyType mode) 252 { 253 PetscFunctionBegin; 254 PetscCall(MatAssemblyEnd_SeqAIJ(A,mode)); 255 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 256 if (!A->boundtocpu) PetscCall(MatViennaCLCopyToGPU(A)); 257 PetscFunctionReturn(0); 258 } 259 260 /* --------------------------------------------------------------------------------*/ 261 /*@C 262 MatCreateSeqAIJViennaCL - Creates a sparse matrix in AIJ (compressed row) format 263 (the default parallel PETSc format). This matrix will ultimately be pushed down 264 to GPUs and use the ViennaCL library for calculations. For good matrix 265 assembly performance the user should preallocate the matrix storage by setting 266 the parameter nz (or the array nnz). By setting these parameters accurately, 267 performance during matrix assembly can be increased substantially. 268 269 Collective 270 271 Input Parameters: 272 + comm - MPI communicator, set to PETSC_COMM_SELF 273 . m - number of rows 274 . n - number of columns 275 . nz - number of nonzeros per row (same for all rows) 276 - nnz - array containing the number of nonzeros in the various rows 277 (possibly different for each row) or NULL 278 279 Output Parameter: 280 . A - the matrix 281 282 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 283 MatXXXXSetPreallocation() paradigm instead of this routine directly. 284 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 285 286 Notes: 287 If nnz is given then nz is ignored 288 289 The AIJ format (also called the Yale sparse matrix format or 290 compressed row storage), is fully compatible with standard Fortran 77 291 storage. That is, the stored row and column indices can begin at 292 either one (as in Fortran) or zero. See the users' manual for details. 293 294 Specify the preallocated storage with either nz or nnz (not both). 295 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 296 allocation. For large problems you MUST preallocate memory or you 297 will get TERRIBLE performance, see the users' manual chapter on matrices. 298 299 Level: intermediate 300 301 .seealso: MatCreate(), MatCreateAIJ(), MatCreateAIJCUSPARSE(), MatSetValues(), MatSeqAIJSetColumnIndices(), MatCreateSeqAIJWithArrays(), MatCreateAIJ() 302 303 @*/ 304 PetscErrorCode MatCreateSeqAIJViennaCL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 305 { 306 PetscFunctionBegin; 307 PetscCall(MatCreate(comm,A)); 308 PetscCall(MatSetSizes(*A,m,n,m,n)); 309 PetscCall(MatSetType(*A,MATSEQAIJVIENNACL)); 310 PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,(PetscInt*)nnz)); 311 PetscFunctionReturn(0); 312 } 313 314 PetscErrorCode MatDestroy_SeqAIJViennaCL(Mat A) 315 { 316 Mat_SeqAIJViennaCL *viennaclcontainer = (Mat_SeqAIJViennaCL*)A->spptr; 317 318 PetscFunctionBegin; 319 try { 320 if (viennaclcontainer) { 321 delete viennaclcontainer->tempvec; 322 delete viennaclcontainer->mat; 323 delete viennaclcontainer->compressed_mat; 324 delete viennaclcontainer; 325 } 326 A->offloadmask = PETSC_OFFLOAD_UNALLOCATED; 327 } catch(std::exception const & ex) { 328 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what()); 329 } 330 331 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqaij_seqaijviennacl_C",NULL)); 332 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_seqaijviennacl_seqdense_C",NULL)); 333 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_seqaijviennacl_seqaij_C",NULL)); 334 335 /* this next line is because MatDestroy tries to PetscFree spptr if it is not zero, and PetscFree only works if the memory was allocated with PetscNew or PetscMalloc, which don't call the constructor */ 336 A->spptr = 0; 337 PetscCall(MatDestroy_SeqAIJ(A)); 338 PetscFunctionReturn(0); 339 } 340 341 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJViennaCL(Mat B) 342 { 343 PetscFunctionBegin; 344 PetscCall(MatCreate_SeqAIJ(B)); 345 PetscCall(MatConvert_SeqAIJ_SeqAIJViennaCL(B,MATSEQAIJVIENNACL,MAT_INPLACE_MATRIX,&B)); 346 PetscFunctionReturn(0); 347 } 348 349 static PetscErrorCode MatBindToCPU_SeqAIJViennaCL(Mat,PetscBool); 350 static PetscErrorCode MatDuplicate_SeqAIJViennaCL(Mat A,MatDuplicateOption cpvalues,Mat *B) 351 { 352 Mat C; 353 354 PetscFunctionBegin; 355 PetscCall(MatDuplicate_SeqAIJ(A,cpvalues,B)); 356 C = *B; 357 358 PetscCall(MatBindToCPU_SeqAIJViennaCL(A,PETSC_FALSE)); 359 C->ops->bindtocpu = MatBindToCPU_SeqAIJViennaCL; 360 361 C->spptr = new Mat_SeqAIJViennaCL(); 362 ((Mat_SeqAIJViennaCL*)C->spptr)->tempvec = NULL; 363 ((Mat_SeqAIJViennaCL*)C->spptr)->mat = NULL; 364 ((Mat_SeqAIJViennaCL*)C->spptr)->compressed_mat = NULL; 365 366 PetscCall(PetscObjectChangeTypeName((PetscObject)C,MATSEQAIJVIENNACL)); 367 368 C->offloadmask = PETSC_OFFLOAD_UNALLOCATED; 369 370 /* If the source matrix is already assembled, copy the destination matrix to the GPU */ 371 if (C->assembled) PetscCall(MatViennaCLCopyToGPU(C)); 372 PetscFunctionReturn(0); 373 } 374 375 static PetscErrorCode MatSeqAIJGetArray_SeqAIJViennaCL(Mat A,PetscScalar *array[]) 376 { 377 PetscFunctionBegin; 378 PetscCall(MatViennaCLCopyFromGPU(A,(const ViennaCLAIJMatrix *)NULL)); 379 *array = ((Mat_SeqAIJ*)A->data)->a; 380 PetscFunctionReturn(0); 381 } 382 383 static PetscErrorCode MatSeqAIJRestoreArray_SeqAIJViennaCL(Mat A,PetscScalar *array[]) 384 { 385 PetscFunctionBegin; 386 A->offloadmask = PETSC_OFFLOAD_CPU; 387 *array = NULL; 388 PetscFunctionReturn(0); 389 } 390 391 static PetscErrorCode MatSeqAIJGetArrayRead_SeqAIJViennaCL(Mat A,const PetscScalar *array[]) 392 { 393 PetscFunctionBegin; 394 PetscCall(MatViennaCLCopyFromGPU(A,(const ViennaCLAIJMatrix *)NULL)); 395 *array = ((Mat_SeqAIJ*)A->data)->a; 396 PetscFunctionReturn(0); 397 } 398 399 static PetscErrorCode MatSeqAIJRestoreArrayRead_SeqAIJViennaCL(Mat A,const PetscScalar *array[]) 400 { 401 PetscFunctionBegin; 402 *array = NULL; 403 /* No A->offloadmask = PETSC_OFFLOAD_CPU since if A->offloadmask was PETSC_OFFLOAD_BOTH, it is still BOTH */ 404 PetscFunctionReturn(0); 405 } 406 407 static PetscErrorCode MatSeqAIJGetArrayWrite_SeqAIJViennaCL(Mat A,PetscScalar *array[]) 408 { 409 PetscFunctionBegin; 410 *array = ((Mat_SeqAIJ*)A->data)->a; 411 PetscFunctionReturn(0); 412 } 413 414 static PetscErrorCode MatSeqAIJRestoreArrayWrite_SeqAIJViennaCL(Mat A,PetscScalar *array[]) 415 { 416 PetscFunctionBegin; 417 A->offloadmask = PETSC_OFFLOAD_CPU; 418 *array = NULL; 419 PetscFunctionReturn(0); 420 } 421 422 static PetscErrorCode MatBindToCPU_SeqAIJViennaCL(Mat A,PetscBool flg) 423 { 424 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 425 426 PetscFunctionBegin; 427 A->boundtocpu = flg; 428 if (flg && a->inode.size) { 429 a->inode.use = PETSC_TRUE; 430 } else { 431 a->inode.use = PETSC_FALSE; 432 } 433 if (flg) { 434 /* make sure we have an up-to-date copy on the CPU */ 435 PetscCall(MatViennaCLCopyFromGPU(A,(const ViennaCLAIJMatrix *)NULL)); 436 A->ops->mult = MatMult_SeqAIJ; 437 A->ops->multadd = MatMultAdd_SeqAIJ; 438 A->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 439 A->ops->duplicate = MatDuplicate_SeqAIJ; 440 PetscCall(PetscMemzero(a->ops,sizeof(Mat_SeqAIJOps))); 441 } else { 442 A->ops->mult = MatMult_SeqAIJViennaCL; 443 A->ops->multadd = MatMultAdd_SeqAIJViennaCL; 444 A->ops->assemblyend = MatAssemblyEnd_SeqAIJViennaCL; 445 A->ops->destroy = MatDestroy_SeqAIJViennaCL; 446 A->ops->duplicate = MatDuplicate_SeqAIJViennaCL; 447 448 a->ops->getarray = MatSeqAIJGetArray_SeqAIJViennaCL; 449 a->ops->restorearray = MatSeqAIJRestoreArray_SeqAIJViennaCL; 450 a->ops->getarrayread = MatSeqAIJGetArrayRead_SeqAIJViennaCL; 451 a->ops->restorearrayread = MatSeqAIJRestoreArrayRead_SeqAIJViennaCL; 452 a->ops->getarraywrite = MatSeqAIJGetArrayWrite_SeqAIJViennaCL; 453 a->ops->restorearraywrite = MatSeqAIJRestoreArrayWrite_SeqAIJViennaCL; 454 } 455 PetscFunctionReturn(0); 456 } 457 458 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJViennaCL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 459 { 460 Mat B; 461 462 PetscFunctionBegin; 463 PetscCheck(reuse != MAT_REUSE_MATRIX,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MAT_REUSE_MATRIX is not supported. Consider using MAT_INPLACE_MATRIX instead"); 464 465 if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A,MAT_COPY_VALUES,newmat)); 466 467 B = *newmat; 468 469 B->spptr = new Mat_SeqAIJViennaCL(); 470 471 ((Mat_SeqAIJViennaCL*)B->spptr)->tempvec = NULL; 472 ((Mat_SeqAIJViennaCL*)B->spptr)->mat = NULL; 473 ((Mat_SeqAIJViennaCL*)B->spptr)->compressed_mat = NULL; 474 475 PetscCall(MatBindToCPU_SeqAIJViennaCL(A,PETSC_FALSE)); 476 A->ops->bindtocpu = MatBindToCPU_SeqAIJViennaCL; 477 478 PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJVIENNACL)); 479 PetscCall(PetscFree(B->defaultvectype)); 480 PetscCall(PetscStrallocpy(VECVIENNACL,&B->defaultvectype)); 481 482 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqaij_seqaijviennacl_C",MatConvert_SeqAIJ_SeqAIJViennaCL)); 483 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_seqaijviennacl_seqdense_C",MatProductSetFromOptions_SeqAIJ_SeqDense)); 484 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_seqaijviennacl_seqaij_C",MatProductSetFromOptions_SeqAIJ)); 485 486 B->offloadmask = PETSC_OFFLOAD_UNALLOCATED; 487 488 /* If the source matrix is already assembled, copy the destination matrix to the GPU */ 489 if (B->assembled) PetscCall(MatViennaCLCopyToGPU(B)); 490 PetscFunctionReturn(0); 491 } 492 493 /*MC 494 MATSEQAIJVIENNACL - MATAIJVIENNACL = "aijviennacl" = "seqaijviennacl" - A matrix type to be used for sparse matrices. 495 496 A matrix type type whose data resides on GPUs. These matrices are in CSR format by 497 default. All matrix calculations are performed using the ViennaCL library. 498 499 Options Database Keys: 500 + -mat_type aijviennacl - sets the matrix type to "seqaijviennacl" during a call to MatSetFromOptions() 501 . -mat_viennacl_storage_format csr - sets the storage format of matrices for MatMult during a call to MatSetFromOptions(). 502 - -mat_viennacl_mult_storage_format csr - sets the storage format of matrices for MatMult during a call to MatSetFromOptions(). 503 504 Level: beginner 505 506 .seealso: MatCreateSeqAIJViennaCL(), MATAIJVIENNACL, MatCreateAIJViennaCL() 507 M*/ 508 509 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ViennaCL(void) 510 { 511 PetscFunctionBegin; 512 PetscCall(MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_LU,MatGetFactor_seqaij_petsc)); 513 PetscCall(MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_CHOLESKY,MatGetFactor_seqaij_petsc)); 514 PetscCall(MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_ILU,MatGetFactor_seqaij_petsc)); 515 PetscCall(MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_ICC,MatGetFactor_seqaij_petsc)); 516 PetscFunctionReturn(0); 517 } 518