1 2 /* 3 Defines the basic matrix operations for the AIJ (compressed row) 4 matrix storage format. 5 */ 6 7 #include <petscconf.h> 8 #define PETSC_SKIP_IMMINTRIN_H_CUDAWORKAROUND 1 9 #include <../src/mat/impls/aij/seq/aij.h> /*I "petscmat.h" I*/ 10 #include <petscbt.h> 11 #include <../src/vec/vec/impls/dvecimpl.h> 12 #include <petsc/private/vecimpl.h> 13 14 #include <../src/mat/impls/aij/seq/seqviennacl/viennaclmatimpl.h> 15 16 #include <algorithm> 17 #include <vector> 18 #include <string> 19 20 #include "viennacl/linalg/prod.hpp" 21 22 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJViennaCL(Mat A, MatType type, MatReuse reuse, Mat *newmat); 23 PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat,MatFactorType,Mat*); 24 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqDense(Mat); 25 26 PetscErrorCode MatViennaCLCopyToGPU(Mat A) 27 { 28 Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr; 29 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 30 PetscErrorCode ierr; 31 32 PetscFunctionBegin; 33 if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) { //some OpenCL SDKs have issues with buffers of size 0 34 if (A->offloadmask == PETSC_OFFLOAD_UNALLOCATED || A->offloadmask == PETSC_OFFLOAD_CPU) { 35 ierr = PetscLogEventBegin(MAT_ViennaCLCopyToGPU,A,0,0,0);CHKERRQ(ierr); 36 37 try { 38 if (a->compressedrow.use) { 39 if (!viennaclstruct->compressed_mat) viennaclstruct->compressed_mat = new ViennaCLCompressedAIJMatrix(); 40 41 // Since PetscInt is different from cl_uint, we have to convert: 42 viennacl::backend::mem_handle dummy; 43 44 viennacl::backend::typesafe_host_array<unsigned int> row_buffer; row_buffer.raw_resize(dummy, a->compressedrow.nrows+1); 45 for (PetscInt i=0; i<=a->compressedrow.nrows; ++i) 46 row_buffer.set(i, (a->compressedrow.i)[i]); 47 48 viennacl::backend::typesafe_host_array<unsigned int> row_indices; row_indices.raw_resize(dummy, a->compressedrow.nrows); 49 for (PetscInt i=0; i<a->compressedrow.nrows; ++i) 50 row_indices.set(i, (a->compressedrow.rindex)[i]); 51 52 viennacl::backend::typesafe_host_array<unsigned int> col_buffer; col_buffer.raw_resize(dummy, a->nz); 53 for (PetscInt i=0; i<a->nz; ++i) 54 col_buffer.set(i, (a->j)[i]); 55 56 viennaclstruct->compressed_mat->set(row_buffer.get(), row_indices.get(), col_buffer.get(), a->a, A->rmap->n, A->cmap->n, a->compressedrow.nrows, a->nz); 57 ierr = PetscLogCpuToGpu(((2*a->compressedrow.nrows)+1+a->nz)*sizeof(PetscInt) + (a->nz)*sizeof(PetscScalar));CHKERRQ(ierr); 58 } else { 59 if (!viennaclstruct->mat) viennaclstruct->mat = new ViennaCLAIJMatrix(); 60 61 // Since PetscInt is in general different from cl_uint, we have to convert: 62 viennacl::backend::mem_handle dummy; 63 64 viennacl::backend::typesafe_host_array<unsigned int> row_buffer; row_buffer.raw_resize(dummy, A->rmap->n+1); 65 for (PetscInt i=0; i<=A->rmap->n; ++i) 66 row_buffer.set(i, (a->i)[i]); 67 68 viennacl::backend::typesafe_host_array<unsigned int> col_buffer; col_buffer.raw_resize(dummy, a->nz); 69 for (PetscInt i=0; i<a->nz; ++i) 70 col_buffer.set(i, (a->j)[i]); 71 72 viennaclstruct->mat->set(row_buffer.get(), col_buffer.get(), a->a, A->rmap->n, A->cmap->n, a->nz); 73 ierr = PetscLogCpuToGpu(((A->rmap->n+1)+a->nz)*sizeof(PetscInt)+(a->nz)*sizeof(PetscScalar));CHKERRQ(ierr); 74 } 75 ViennaCLWaitForGPU(); 76 } catch(std::exception const & ex) { 77 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what()); 78 } 79 80 // Create temporary vector for v += A*x: 81 if (viennaclstruct->tempvec) { 82 if (viennaclstruct->tempvec->size() != static_cast<std::size_t>(A->rmap->n)) { 83 delete (ViennaCLVector*)viennaclstruct->tempvec; 84 viennaclstruct->tempvec = new ViennaCLVector(A->rmap->n); 85 } else { 86 viennaclstruct->tempvec->clear(); 87 } 88 } else { 89 viennaclstruct->tempvec = new ViennaCLVector(A->rmap->n); 90 } 91 92 A->offloadmask = PETSC_OFFLOAD_BOTH; 93 94 ierr = PetscLogEventEnd(MAT_ViennaCLCopyToGPU,A,0,0,0);CHKERRQ(ierr); 95 } 96 } 97 PetscFunctionReturn(0); 98 } 99 100 PetscErrorCode MatViennaCLCopyFromGPU(Mat A, const ViennaCLAIJMatrix *Agpu) 101 { 102 Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr; 103 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 104 PetscInt m = A->rmap->n; 105 PetscErrorCode ierr; 106 107 PetscFunctionBegin; 108 if (A->offloadmask == PETSC_OFFLOAD_BOTH) PetscFunctionReturn(0); 109 if (A->offloadmask == PETSC_OFFLOAD_UNALLOCATED && Agpu) { 110 try { 111 if (a->compressedrow.use) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ViennaCL: Cannot handle row compression for GPU matrices"); 112 else { 113 114 if ((PetscInt)Agpu->size1() != m) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "GPU matrix has %d rows, should be %d", Agpu->size1(), m); 115 a->nz = Agpu->nnz(); 116 a->maxnz = a->nz; /* Since we allocate exactly the right amount */ 117 A->preallocated = PETSC_TRUE; 118 if (a->singlemalloc) { 119 if (a->a) {ierr = PetscFree3(a->a,a->j,a->i);CHKERRQ(ierr);} 120 } else { 121 if (a->i) {ierr = PetscFree(a->i);CHKERRQ(ierr);} 122 if (a->j) {ierr = PetscFree(a->j);CHKERRQ(ierr);} 123 if (a->a) {ierr = PetscFree(a->a);CHKERRQ(ierr);} 124 } 125 ierr = PetscMalloc3(a->nz,&a->a,a->nz,&a->j,m+1,&a->i);CHKERRQ(ierr); 126 ierr = PetscLogObjectMemory((PetscObject)A, a->nz*(sizeof(PetscScalar)+sizeof(PetscInt))+(m+1)*sizeof(PetscInt));CHKERRQ(ierr); 127 128 a->singlemalloc = PETSC_TRUE; 129 130 /* Setup row lengths */ 131 ierr = PetscFree(a->imax);CHKERRQ(ierr); 132 ierr = PetscFree(a->ilen);CHKERRQ(ierr); 133 ierr = PetscMalloc1(m,&a->imax);CHKERRQ(ierr); 134 ierr = PetscMalloc1(m,&a->ilen);CHKERRQ(ierr); 135 ierr = PetscLogObjectMemory((PetscObject)A, 2*m*sizeof(PetscInt));CHKERRQ(ierr); 136 137 /* Copy data back from GPU */ 138 viennacl::backend::typesafe_host_array<unsigned int> row_buffer; row_buffer.raw_resize(Agpu->handle1(), Agpu->size1() + 1); 139 140 // copy row array 141 viennacl::backend::memory_read(Agpu->handle1(), 0, row_buffer.raw_size(), row_buffer.get()); 142 (a->i)[0] = row_buffer[0]; 143 for (PetscInt i = 0; i < (PetscInt)Agpu->size1(); ++i) { 144 (a->i)[i+1] = row_buffer[i+1]; 145 a->imax[i] = a->ilen[i] = a->i[i+1] - a->i[i]; //Set imax[] and ilen[] arrays at the same time as i[] for better cache reuse 146 } 147 148 // copy column indices 149 viennacl::backend::typesafe_host_array<unsigned int> col_buffer; col_buffer.raw_resize(Agpu->handle2(), Agpu->nnz()); 150 viennacl::backend::memory_read(Agpu->handle2(), 0, col_buffer.raw_size(), col_buffer.get()); 151 for (PetscInt i=0; i < (PetscInt)Agpu->nnz(); ++i) 152 (a->j)[i] = col_buffer[i]; 153 154 // copy nonzero entries directly to destination (no conversion required) 155 viennacl::backend::memory_read(Agpu->handle(), 0, sizeof(PetscScalar)*Agpu->nnz(), a->a); 156 157 ierr = PetscLogGpuToCpu(row_buffer.raw_size()+col_buffer.raw_size()+(Agpu->nnz()*sizeof(PetscScalar)));CHKERRQ(ierr); 158 ViennaCLWaitForGPU(); 159 /* TODO: Once a->diag is moved out of MatAssemblyEnd(), invalidate it here. */ 160 } 161 } catch(std::exception const & ex) { 162 SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what()); 163 } 164 } else if (A->offloadmask == PETSC_OFFLOAD_UNALLOCATED) { 165 PetscFunctionReturn(0); 166 } else { 167 if (!Agpu && A->offloadmask != PETSC_OFFLOAD_GPU) PetscFunctionReturn(0); 168 169 if (a->compressedrow.use) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ViennaCL: Cannot handle row compression for GPU matrices"); 170 if (!Agpu) { 171 viennacl::backend::memory_read(viennaclstruct->mat->handle(), 0, sizeof(PetscScalar)*viennaclstruct->mat->nnz(), a->a); 172 } else { 173 viennacl::backend::memory_read(Agpu->handle(), 0, sizeof(PetscScalar)*Agpu->nnz(), a->a); 174 } 175 } 176 A->offloadmask = PETSC_OFFLOAD_BOTH; 177 /* This assembly prevents resetting the flag to PETSC_OFFLOAD_CPU and recopying */ 178 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 179 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 180 PetscFunctionReturn(0); 181 } 182 183 PetscErrorCode MatMult_SeqAIJViennaCL(Mat A,Vec xx,Vec yy) 184 { 185 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 186 PetscErrorCode ierr; 187 Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr; 188 const ViennaCLVector *xgpu=NULL; 189 ViennaCLVector *ygpu=NULL; 190 191 PetscFunctionBegin; 192 /* The line below is necessary due to the operations that modify the matrix on the CPU (axpy, scale, etc) */ 193 ierr = MatViennaCLCopyToGPU(A);CHKERRQ(ierr); 194 if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) { 195 ierr = VecViennaCLGetArrayRead(xx,&xgpu);CHKERRQ(ierr); 196 ierr = VecViennaCLGetArrayWrite(yy,&ygpu);CHKERRQ(ierr); 197 ierr = PetscLogGpuTimeBegin();CHKERRQ(ierr); 198 try { 199 if (a->compressedrow.use) { 200 *ygpu = viennacl::linalg::prod(*viennaclstruct->compressed_mat, *xgpu); 201 } else { 202 *ygpu = viennacl::linalg::prod(*viennaclstruct->mat,*xgpu); 203 } 204 ViennaCLWaitForGPU(); 205 } catch (std::exception const & ex) { 206 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what()); 207 } 208 ierr = PetscLogGpuTimeEnd();CHKERRQ(ierr); 209 ierr = VecViennaCLRestoreArrayRead(xx,&xgpu);CHKERRQ(ierr); 210 ierr = VecViennaCLRestoreArrayWrite(yy,&ygpu);CHKERRQ(ierr); 211 ierr = PetscLogGpuFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 212 } else { 213 ierr = VecSet_SeqViennaCL(yy,0);CHKERRQ(ierr); 214 } 215 PetscFunctionReturn(0); 216 } 217 218 PetscErrorCode MatMultAdd_SeqAIJViennaCL(Mat A,Vec xx,Vec yy,Vec zz) 219 { 220 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 221 PetscErrorCode ierr; 222 Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr; 223 const ViennaCLVector *xgpu=NULL,*ygpu=NULL; 224 ViennaCLVector *zgpu=NULL; 225 226 PetscFunctionBegin; 227 /* The line below is necessary due to the operations that modify the matrix on the CPU (axpy, scale, etc) */ 228 ierr = MatViennaCLCopyToGPU(A);CHKERRQ(ierr); 229 if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) { 230 try { 231 ierr = VecViennaCLGetArrayRead(xx,&xgpu);CHKERRQ(ierr); 232 ierr = VecViennaCLGetArrayRead(yy,&ygpu);CHKERRQ(ierr); 233 ierr = VecViennaCLGetArrayWrite(zz,&zgpu);CHKERRQ(ierr); 234 ierr = PetscLogGpuTimeBegin();CHKERRQ(ierr); 235 if (a->compressedrow.use) *viennaclstruct->tempvec = viennacl::linalg::prod(*viennaclstruct->compressed_mat, *xgpu); 236 else *viennaclstruct->tempvec = viennacl::linalg::prod(*viennaclstruct->mat, *xgpu); 237 if (zz != yy) *zgpu = *ygpu + *viennaclstruct->tempvec; 238 else *zgpu += *viennaclstruct->tempvec; 239 ViennaCLWaitForGPU(); 240 ierr = PetscLogGpuTimeEnd();CHKERRQ(ierr); 241 ierr = VecViennaCLRestoreArrayRead(xx,&xgpu);CHKERRQ(ierr); 242 ierr = VecViennaCLRestoreArrayRead(yy,&ygpu);CHKERRQ(ierr); 243 ierr = VecViennaCLRestoreArrayWrite(zz,&zgpu);CHKERRQ(ierr); 244 245 } catch(std::exception const & ex) { 246 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what()); 247 } 248 ierr = PetscLogGpuFlops(2.0*a->nz);CHKERRQ(ierr); 249 } else { 250 ierr = VecCopy_SeqViennaCL(yy,zz);CHKERRQ(ierr); 251 } 252 PetscFunctionReturn(0); 253 } 254 255 PetscErrorCode MatAssemblyEnd_SeqAIJViennaCL(Mat A,MatAssemblyType mode) 256 { 257 PetscErrorCode ierr; 258 259 PetscFunctionBegin; 260 ierr = MatAssemblyEnd_SeqAIJ(A,mode);CHKERRQ(ierr); 261 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 262 if (!A->boundtocpu) { 263 ierr = MatViennaCLCopyToGPU(A);CHKERRQ(ierr); 264 } 265 PetscFunctionReturn(0); 266 } 267 268 /* --------------------------------------------------------------------------------*/ 269 /*@C 270 MatCreateSeqAIJViennaCL - Creates a sparse matrix in AIJ (compressed row) format 271 (the default parallel PETSc format). This matrix will ultimately be pushed down 272 to GPUs and use the ViennaCL library for calculations. For good matrix 273 assembly performance the user should preallocate the matrix storage by setting 274 the parameter nz (or the array nnz). By setting these parameters accurately, 275 performance during matrix assembly can be increased substantially. 276 277 Collective 278 279 Input Parameters: 280 + comm - MPI communicator, set to PETSC_COMM_SELF 281 . m - number of rows 282 . n - number of columns 283 . nz - number of nonzeros per row (same for all rows) 284 - nnz - array containing the number of nonzeros in the various rows 285 (possibly different for each row) or NULL 286 287 Output Parameter: 288 . A - the matrix 289 290 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 291 MatXXXXSetPreallocation() paradigm instead of this routine directly. 292 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 293 294 Notes: 295 If nnz is given then nz is ignored 296 297 The AIJ format (also called the Yale sparse matrix format or 298 compressed row storage), is fully compatible with standard Fortran 77 299 storage. That is, the stored row and column indices can begin at 300 either one (as in Fortran) or zero. See the users' manual for details. 301 302 Specify the preallocated storage with either nz or nnz (not both). 303 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 304 allocation. For large problems you MUST preallocate memory or you 305 will get TERRIBLE performance, see the users' manual chapter on matrices. 306 307 Level: intermediate 308 309 .seealso: MatCreate(), MatCreateAIJ(), MatCreateAIJCUSPARSE(), MatSetValues(), MatSeqAIJSetColumnIndices(), MatCreateSeqAIJWithArrays(), MatCreateAIJ() 310 311 @*/ 312 PetscErrorCode MatCreateSeqAIJViennaCL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 313 { 314 PetscErrorCode ierr; 315 316 PetscFunctionBegin; 317 ierr = MatCreate(comm,A);CHKERRQ(ierr); 318 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 319 ierr = MatSetType(*A,MATSEQAIJVIENNACL);CHKERRQ(ierr); 320 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,(PetscInt*)nnz);CHKERRQ(ierr); 321 PetscFunctionReturn(0); 322 } 323 324 PetscErrorCode MatDestroy_SeqAIJViennaCL(Mat A) 325 { 326 PetscErrorCode ierr; 327 Mat_SeqAIJViennaCL *viennaclcontainer = (Mat_SeqAIJViennaCL*)A->spptr; 328 329 PetscFunctionBegin; 330 try { 331 if (viennaclcontainer) { 332 delete viennaclcontainer->tempvec; 333 delete viennaclcontainer->mat; 334 delete viennaclcontainer->compressed_mat; 335 delete viennaclcontainer; 336 } 337 A->offloadmask = PETSC_OFFLOAD_UNALLOCATED; 338 } catch(std::exception const & ex) { 339 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what()); 340 } 341 342 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqaij_seqaijviennacl_C",NULL);CHKERRQ(ierr); 343 ierr = PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_seqaijviennacl_seqdense_C",NULL);CHKERRQ(ierr); 344 ierr = PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_seqaijviennacl_seqaij_C",NULL);CHKERRQ(ierr); 345 346 /* this next line is because MatDestroy tries to PetscFree spptr if it is not zero, and PetscFree only works if the memory was allocated with PetscNew or PetscMalloc, which don't call the constructor */ 347 A->spptr = 0; 348 ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 349 PetscFunctionReturn(0); 350 } 351 352 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJViennaCL(Mat B) 353 { 354 PetscErrorCode ierr; 355 356 PetscFunctionBegin; 357 ierr = MatCreate_SeqAIJ(B);CHKERRQ(ierr); 358 ierr = MatConvert_SeqAIJ_SeqAIJViennaCL(B,MATSEQAIJVIENNACL,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 359 PetscFunctionReturn(0); 360 } 361 362 static PetscErrorCode MatBindToCPU_SeqAIJViennaCL(Mat,PetscBool); 363 static PetscErrorCode MatDuplicate_SeqAIJViennaCL(Mat A,MatDuplicateOption cpvalues,Mat *B) 364 { 365 PetscErrorCode ierr; 366 Mat C; 367 368 PetscFunctionBegin; 369 ierr = MatDuplicate_SeqAIJ(A,cpvalues,B);CHKERRQ(ierr); 370 C = *B; 371 372 ierr = MatBindToCPU_SeqAIJViennaCL(A,PETSC_FALSE);CHKERRQ(ierr); 373 C->ops->bindtocpu = MatBindToCPU_SeqAIJViennaCL; 374 375 C->spptr = new Mat_SeqAIJViennaCL(); 376 ((Mat_SeqAIJViennaCL*)C->spptr)->tempvec = NULL; 377 ((Mat_SeqAIJViennaCL*)C->spptr)->mat = NULL; 378 ((Mat_SeqAIJViennaCL*)C->spptr)->compressed_mat = NULL; 379 380 ierr = PetscObjectChangeTypeName((PetscObject)C,MATSEQAIJVIENNACL);CHKERRQ(ierr); 381 382 C->offloadmask = PETSC_OFFLOAD_UNALLOCATED; 383 384 /* If the source matrix is already assembled, copy the destination matrix to the GPU */ 385 if (C->assembled) { 386 ierr = MatViennaCLCopyToGPU(C);CHKERRQ(ierr); 387 } 388 389 PetscFunctionReturn(0); 390 } 391 392 static PetscErrorCode MatSeqAIJGetArray_SeqAIJViennaCL(Mat A,PetscScalar *array[]) 393 { 394 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 395 PetscErrorCode ierr; 396 397 PetscFunctionBegin; 398 ierr = MatViennaCLCopyFromGPU(A,(const ViennaCLAIJMatrix *)NULL);CHKERRQ(ierr); 399 *array = a->a; 400 A->offloadmask = PETSC_OFFLOAD_CPU; 401 PetscFunctionReturn(0); 402 } 403 404 static PetscErrorCode MatSeqAIJRestoreArray_SeqAIJViennaCL(Mat A,PetscScalar *array[]) 405 { 406 PetscFunctionBegin; 407 *array = NULL; 408 PetscFunctionReturn(0); 409 } 410 411 static PetscErrorCode MatBindToCPU_SeqAIJViennaCL(Mat A,PetscBool flg) 412 { 413 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)A->data; 414 PetscErrorCode ierr; 415 416 PetscFunctionBegin; 417 A->boundtocpu = flg; 418 if (flg && aij->inode.size) { 419 aij->inode.use = PETSC_TRUE; 420 } else { 421 aij->inode.use = PETSC_FALSE; 422 } 423 if (flg) { 424 /* make sure we have an up-to-date copy on the CPU */ 425 ierr = MatViennaCLCopyFromGPU(A,(const ViennaCLAIJMatrix *)NULL);CHKERRQ(ierr); 426 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqAIJGetArray_C",MatSeqAIJGetArray_SeqAIJ);CHKERRQ(ierr); 427 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqAIJRestoreArray_C",MatSeqAIJRestoreArray_SeqAIJ);CHKERRQ(ierr); 428 429 A->ops->mult = MatMult_SeqAIJ; 430 A->ops->multadd = MatMultAdd_SeqAIJ; 431 A->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 432 A->ops->duplicate = MatDuplicate_SeqAIJ; 433 } else { 434 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqAIJGetArray_C",MatSeqAIJGetArray_SeqAIJViennaCL);CHKERRQ(ierr); 435 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqAIJRestoreArray_C",MatSeqAIJRestoreArray_SeqAIJViennaCL);CHKERRQ(ierr); 436 437 A->ops->mult = MatMult_SeqAIJViennaCL; 438 A->ops->multadd = MatMultAdd_SeqAIJViennaCL; 439 A->ops->assemblyend = MatAssemblyEnd_SeqAIJViennaCL; 440 A->ops->destroy = MatDestroy_SeqAIJViennaCL; 441 A->ops->duplicate = MatDuplicate_SeqAIJViennaCL; 442 } 443 PetscFunctionReturn(0); 444 } 445 446 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJViennaCL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 447 { 448 PetscErrorCode ierr; 449 Mat B; 450 451 PetscFunctionBegin; 452 453 if (reuse == MAT_REUSE_MATRIX) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MAT_REUSE_MATRIX is not supported. Consider using MAT_INPLACE_MATRIX instead"); 454 455 if (reuse == MAT_INITIAL_MATRIX) { 456 ierr = MatDuplicate(A,MAT_COPY_VALUES,newmat);CHKERRQ(ierr); 457 } 458 459 B = *newmat; 460 461 B->spptr = new Mat_SeqAIJViennaCL(); 462 463 ((Mat_SeqAIJViennaCL*)B->spptr)->tempvec = NULL; 464 ((Mat_SeqAIJViennaCL*)B->spptr)->mat = NULL; 465 ((Mat_SeqAIJViennaCL*)B->spptr)->compressed_mat = NULL; 466 467 ierr = MatBindToCPU_SeqAIJViennaCL(A,PETSC_FALSE);CHKERRQ(ierr); 468 A->ops->bindtocpu = MatBindToCPU_SeqAIJViennaCL; 469 470 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJVIENNACL);CHKERRQ(ierr); 471 ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr); 472 ierr = PetscStrallocpy(VECVIENNACL,&B->defaultvectype);CHKERRQ(ierr); 473 474 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqaij_seqaijviennacl_C",MatConvert_SeqAIJ_SeqAIJViennaCL);CHKERRQ(ierr); 475 ierr = PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_seqaijviennacl_seqdense_C",MatProductSetFromOptions_SeqAIJ_SeqDense);CHKERRQ(ierr); 476 ierr = PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_seqaijviennacl_seqaij_C",MatProductSetFromOptions_SeqAIJ);CHKERRQ(ierr); 477 478 B->offloadmask = PETSC_OFFLOAD_UNALLOCATED; 479 480 /* If the source matrix is already assembled, copy the destination matrix to the GPU */ 481 if (B->assembled) { 482 ierr = MatViennaCLCopyToGPU(B);CHKERRQ(ierr); 483 } 484 485 PetscFunctionReturn(0); 486 } 487 488 /*MC 489 MATSEQAIJVIENNACL - MATAIJVIENNACL = "aijviennacl" = "seqaijviennacl" - A matrix type to be used for sparse matrices. 490 491 A matrix type type whose data resides on GPUs. These matrices are in CSR format by 492 default. All matrix calculations are performed using the ViennaCL library. 493 494 Options Database Keys: 495 + -mat_type aijviennacl - sets the matrix type to "seqaijviennacl" during a call to MatSetFromOptions() 496 . -mat_viennacl_storage_format csr - sets the storage format of matrices for MatMult during a call to MatSetFromOptions(). 497 - -mat_viennacl_mult_storage_format csr - sets the storage format of matrices for MatMult during a call to MatSetFromOptions(). 498 499 Level: beginner 500 501 .seealso: MatCreateSeqAIJViennaCL(), MATAIJVIENNACL, MatCreateAIJViennaCL() 502 M*/ 503 504 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ViennaCL(void) 505 { 506 PetscErrorCode ierr; 507 508 PetscFunctionBegin; 509 ierr = MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_LU,MatGetFactor_seqaij_petsc);CHKERRQ(ierr); 510 ierr = MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_CHOLESKY,MatGetFactor_seqaij_petsc);CHKERRQ(ierr); 511 ierr = MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_ILU,MatGetFactor_seqaij_petsc);CHKERRQ(ierr); 512 ierr = MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_ICC,MatGetFactor_seqaij_petsc);CHKERRQ(ierr); 513 PetscFunctionReturn(0); 514 } 515