1 2 3 /* 4 Defines the basic matrix operations for the AIJ (compressed row) 5 matrix storage format. 6 */ 7 8 #include <petscconf.h> 9 #define PETSC_SKIP_IMMINTRIN_H_CUDAWORKAROUND 1 10 #include <../src/mat/impls/aij/seq/aij.h> /*I "petscmat.h" I*/ 11 #include <petscbt.h> 12 #include <../src/vec/vec/impls/dvecimpl.h> 13 #include <petsc/private/vecimpl.h> 14 15 #include <../src/mat/impls/aij/seq/seqviennacl/viennaclmatimpl.h> 16 17 18 #include <algorithm> 19 #include <vector> 20 #include <string> 21 22 #include "viennacl/linalg/prod.hpp" 23 24 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJViennaCL(Mat A, MatType type, MatReuse reuse, Mat *newmat); 25 PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat,MatFactorType,Mat*); 26 27 28 PetscErrorCode MatViennaCLCopyToGPU(Mat A) 29 { 30 Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr; 31 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 32 PetscErrorCode ierr; 33 34 PetscFunctionBegin; 35 if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) { //some OpenCL SDKs have issues with buffers of size 0 36 if (A->offloadmask == PETSC_OFFLOAD_UNALLOCATED || A->offloadmask == PETSC_OFFLOAD_CPU) { 37 ierr = PetscLogEventBegin(MAT_ViennaCLCopyToGPU,A,0,0,0);CHKERRQ(ierr); 38 39 try { 40 if (a->compressedrow.use) { 41 if (!viennaclstruct->compressed_mat) viennaclstruct->compressed_mat = new ViennaCLCompressedAIJMatrix(); 42 43 // Since PetscInt is different from cl_uint, we have to convert: 44 viennacl::backend::mem_handle dummy; 45 46 viennacl::backend::typesafe_host_array<unsigned int> row_buffer; row_buffer.raw_resize(dummy, a->compressedrow.nrows+1); 47 for (PetscInt i=0; i<=a->compressedrow.nrows; ++i) 48 row_buffer.set(i, (a->compressedrow.i)[i]); 49 50 viennacl::backend::typesafe_host_array<unsigned int> row_indices; row_indices.raw_resize(dummy, a->compressedrow.nrows); 51 for (PetscInt i=0; i<a->compressedrow.nrows; ++i) 52 row_indices.set(i, (a->compressedrow.rindex)[i]); 53 54 viennacl::backend::typesafe_host_array<unsigned int> col_buffer; col_buffer.raw_resize(dummy, a->nz); 55 for (PetscInt i=0; i<a->nz; ++i) 56 col_buffer.set(i, (a->j)[i]); 57 58 viennaclstruct->compressed_mat->set(row_buffer.get(), row_indices.get(), col_buffer.get(), a->a, A->rmap->n, A->cmap->n, a->compressedrow.nrows, a->nz); 59 ierr = PetscLogCpuToGpu(((2*a->compressedrow.nrows)+1+a->nz)*sizeof(PetscInt) + (a->nz)*sizeof(PetscScalar));CHKERRQ(ierr); 60 } else { 61 if (!viennaclstruct->mat) viennaclstruct->mat = new ViennaCLAIJMatrix(); 62 63 // Since PetscInt is in general different from cl_uint, we have to convert: 64 viennacl::backend::mem_handle dummy; 65 66 viennacl::backend::typesafe_host_array<unsigned int> row_buffer; row_buffer.raw_resize(dummy, A->rmap->n+1); 67 for (PetscInt i=0; i<=A->rmap->n; ++i) 68 row_buffer.set(i, (a->i)[i]); 69 70 viennacl::backend::typesafe_host_array<unsigned int> col_buffer; col_buffer.raw_resize(dummy, a->nz); 71 for (PetscInt i=0; i<a->nz; ++i) 72 col_buffer.set(i, (a->j)[i]); 73 74 viennaclstruct->mat->set(row_buffer.get(), col_buffer.get(), a->a, A->rmap->n, A->cmap->n, a->nz); 75 ierr = PetscLogCpuToGpu(((A->rmap->n+1)+a->nz)*sizeof(PetscInt)+(a->nz)*sizeof(PetscScalar));CHKERRQ(ierr); 76 } 77 ViennaCLWaitForGPU(); 78 } catch(std::exception const & ex) { 79 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what()); 80 } 81 82 // Create temporary vector for v += A*x: 83 if (viennaclstruct->tempvec) { 84 if (viennaclstruct->tempvec->size() != static_cast<std::size_t>(A->rmap->n)) { 85 delete (ViennaCLVector*)viennaclstruct->tempvec; 86 viennaclstruct->tempvec = new ViennaCLVector(A->rmap->n); 87 } else { 88 viennaclstruct->tempvec->clear(); 89 } 90 } else { 91 viennaclstruct->tempvec = new ViennaCLVector(A->rmap->n); 92 } 93 94 A->offloadmask = PETSC_OFFLOAD_BOTH; 95 96 ierr = PetscLogEventEnd(MAT_ViennaCLCopyToGPU,A,0,0,0);CHKERRQ(ierr); 97 } 98 } 99 PetscFunctionReturn(0); 100 } 101 102 PetscErrorCode MatViennaCLCopyFromGPU(Mat A, const ViennaCLAIJMatrix *Agpu) 103 { 104 Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr; 105 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 106 PetscInt m = A->rmap->n; 107 PetscErrorCode ierr; 108 109 PetscFunctionBegin; 110 if (A->offloadmask == PETSC_OFFLOAD_BOTH) PetscFunctionReturn(0); 111 if (A->offloadmask == PETSC_OFFLOAD_UNALLOCATED && Agpu) { 112 try { 113 if (a->compressedrow.use) SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_ARG_WRONG, "ViennaCL: Cannot handle row compression for GPU matrices"); 114 else { 115 116 if ((PetscInt)Agpu->size1() != m) SETERRQ2(PETSC_COMM_WORLD, PETSC_ERR_ARG_SIZ, "GPU matrix has %d rows, should be %d", Agpu->size1(), m); 117 a->nz = Agpu->nnz(); 118 a->maxnz = a->nz; /* Since we allocate exactly the right amount */ 119 A->preallocated = PETSC_TRUE; 120 if (a->singlemalloc) { 121 if (a->a) {ierr = PetscFree3(a->a,a->j,a->i);CHKERRQ(ierr);} 122 } else { 123 if (a->i) {ierr = PetscFree(a->i);CHKERRQ(ierr);} 124 if (a->j) {ierr = PetscFree(a->j);CHKERRQ(ierr);} 125 if (a->a) {ierr = PetscFree(a->a);CHKERRQ(ierr);} 126 } 127 ierr = PetscMalloc3(a->nz,&a->a,a->nz,&a->j,m+1,&a->i);CHKERRQ(ierr); 128 ierr = PetscLogObjectMemory((PetscObject)A, a->nz*(sizeof(PetscScalar)+sizeof(PetscInt))+(m+1)*sizeof(PetscInt));CHKERRQ(ierr); 129 130 a->singlemalloc = PETSC_TRUE; 131 132 /* Setup row lengths */ 133 ierr = PetscFree(a->imax);CHKERRQ(ierr); 134 ierr = PetscFree(a->ilen);CHKERRQ(ierr); 135 ierr = PetscMalloc1(m,&a->imax);CHKERRQ(ierr); 136 ierr = PetscMalloc1(m,&a->ilen);CHKERRQ(ierr); 137 ierr = PetscLogObjectMemory((PetscObject)A, 2*m*sizeof(PetscInt));CHKERRQ(ierr); 138 139 /* Copy data back from GPU */ 140 viennacl::backend::typesafe_host_array<unsigned int> row_buffer; row_buffer.raw_resize(Agpu->handle1(), Agpu->size1() + 1); 141 142 // copy row array 143 viennacl::backend::memory_read(Agpu->handle1(), 0, row_buffer.raw_size(), row_buffer.get()); 144 (a->i)[0] = row_buffer[0]; 145 for (PetscInt i = 0; i < (PetscInt)Agpu->size1(); ++i) { 146 (a->i)[i+1] = row_buffer[i+1]; 147 a->imax[i] = a->ilen[i] = a->i[i+1] - a->i[i]; //Set imax[] and ilen[] arrays at the same time as i[] for better cache reuse 148 } 149 150 // copy column indices 151 viennacl::backend::typesafe_host_array<unsigned int> col_buffer; col_buffer.raw_resize(Agpu->handle2(), Agpu->nnz()); 152 viennacl::backend::memory_read(Agpu->handle2(), 0, col_buffer.raw_size(), col_buffer.get()); 153 for (PetscInt i=0; i < (PetscInt)Agpu->nnz(); ++i) 154 (a->j)[i] = col_buffer[i]; 155 156 // copy nonzero entries directly to destination (no conversion required) 157 viennacl::backend::memory_read(Agpu->handle(), 0, sizeof(PetscScalar)*Agpu->nnz(), a->a); 158 159 ierr = PetscLogGpuToCpu(row_buffer.raw_size()+col_buffer.raw_size()+(Agpu->nnz()*sizeof(PetscScalar)));CHKERRQ(ierr); 160 ViennaCLWaitForGPU(); 161 /* TODO: Once a->diag is moved out of MatAssemblyEnd(), invalidate it here. */ 162 } 163 } catch(std::exception const & ex) { 164 SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what()); 165 } 166 } else if (A->offloadmask == PETSC_OFFLOAD_UNALLOCATED) { 167 PetscFunctionReturn(0); 168 } else { 169 if (!Agpu && A->offloadmask != PETSC_OFFLOAD_GPU) PetscFunctionReturn(0); 170 171 if (a->compressedrow.use) SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_ARG_WRONG, "ViennaCL: Cannot handle row compression for GPU matrices"); 172 if (!Agpu) { 173 viennacl::backend::memory_read(viennaclstruct->mat->handle(), 0, sizeof(PetscScalar)*viennaclstruct->mat->nnz(), a->a); 174 } else { 175 viennacl::backend::memory_read(Agpu->handle(), 0, sizeof(PetscScalar)*Agpu->nnz(), a->a); 176 } 177 } 178 A->offloadmask = PETSC_OFFLOAD_BOTH; 179 /* This assembly prevents resetting the flag to PETSC_OFFLOAD_CPU and recopying */ 180 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 181 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 182 PetscFunctionReturn(0); 183 } 184 185 PetscErrorCode MatMult_SeqAIJViennaCL(Mat A,Vec xx,Vec yy) 186 { 187 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 188 PetscErrorCode ierr; 189 Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr; 190 const ViennaCLVector *xgpu=NULL; 191 ViennaCLVector *ygpu=NULL; 192 193 PetscFunctionBegin; 194 if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) { 195 ierr = VecViennaCLGetArrayRead(xx,&xgpu);CHKERRQ(ierr); 196 ierr = VecViennaCLGetArrayWrite(yy,&ygpu);CHKERRQ(ierr); 197 ierr = PetscLogGpuTimeBegin();CHKERRQ(ierr); 198 try { 199 if (a->compressedrow.use) { 200 *ygpu = viennacl::linalg::prod(*viennaclstruct->compressed_mat, *xgpu); 201 } else { 202 *ygpu = viennacl::linalg::prod(*viennaclstruct->mat,*xgpu); 203 } 204 ViennaCLWaitForGPU(); 205 } catch (std::exception const & ex) { 206 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what()); 207 } 208 ierr = PetscLogGpuTimeEnd();CHKERRQ(ierr); 209 ierr = VecViennaCLRestoreArrayRead(xx,&xgpu);CHKERRQ(ierr); 210 ierr = VecViennaCLRestoreArrayWrite(yy,&ygpu);CHKERRQ(ierr); 211 ierr = PetscLogGpuFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 212 } else { 213 ierr = VecSet(yy,0);CHKERRQ(ierr); 214 } 215 PetscFunctionReturn(0); 216 } 217 218 PetscErrorCode MatMultAdd_SeqAIJViennaCL(Mat A,Vec xx,Vec yy,Vec zz) 219 { 220 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 221 PetscErrorCode ierr; 222 Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr; 223 const ViennaCLVector *xgpu=NULL,*ygpu=NULL; 224 ViennaCLVector *zgpu=NULL; 225 226 PetscFunctionBegin; 227 if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) { 228 try { 229 ierr = VecViennaCLGetArrayRead(xx,&xgpu);CHKERRQ(ierr); 230 ierr = VecViennaCLGetArrayRead(yy,&ygpu);CHKERRQ(ierr); 231 ierr = VecViennaCLGetArrayWrite(zz,&zgpu);CHKERRQ(ierr); 232 ierr = PetscLogGpuTimeBegin();CHKERRQ(ierr); 233 if (a->compressedrow.use) *viennaclstruct->tempvec = viennacl::linalg::prod(*viennaclstruct->compressed_mat, *xgpu); 234 else *viennaclstruct->tempvec = viennacl::linalg::prod(*viennaclstruct->mat, *xgpu); 235 if (zz != yy) *zgpu = *ygpu + *viennaclstruct->tempvec; 236 else *zgpu += *viennaclstruct->tempvec; 237 ViennaCLWaitForGPU(); 238 ierr = PetscLogGpuTimeEnd();CHKERRQ(ierr); 239 ierr = VecViennaCLRestoreArrayRead(xx,&xgpu);CHKERRQ(ierr); 240 ierr = VecViennaCLRestoreArrayRead(yy,&ygpu);CHKERRQ(ierr); 241 ierr = VecViennaCLRestoreArrayWrite(zz,&zgpu);CHKERRQ(ierr); 242 243 } catch(std::exception const & ex) { 244 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what()); 245 } 246 ierr = PetscLogGpuFlops(2.0*a->nz);CHKERRQ(ierr); 247 } else { 248 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 249 } 250 PetscFunctionReturn(0); 251 } 252 253 PetscErrorCode MatAssemblyEnd_SeqAIJViennaCL(Mat A,MatAssemblyType mode) 254 { 255 PetscErrorCode ierr; 256 257 PetscFunctionBegin; 258 ierr = MatAssemblyEnd_SeqAIJ(A,mode);CHKERRQ(ierr); 259 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 260 if (!A->pinnedtocpu) { 261 ierr = MatViennaCLCopyToGPU(A);CHKERRQ(ierr); 262 } 263 PetscFunctionReturn(0); 264 } 265 266 /* --------------------------------------------------------------------------------*/ 267 /*@ 268 MatCreateSeqAIJViennaCL - Creates a sparse matrix in AIJ (compressed row) format 269 (the default parallel PETSc format). This matrix will ultimately be pushed down 270 to GPUs and use the ViennaCL library for calculations. For good matrix 271 assembly performance the user should preallocate the matrix storage by setting 272 the parameter nz (or the array nnz). By setting these parameters accurately, 273 performance during matrix assembly can be increased substantially. 274 275 276 Collective 277 278 Input Parameters: 279 + comm - MPI communicator, set to PETSC_COMM_SELF 280 . m - number of rows 281 . n - number of columns 282 . nz - number of nonzeros per row (same for all rows) 283 - nnz - array containing the number of nonzeros in the various rows 284 (possibly different for each row) or NULL 285 286 Output Parameter: 287 . A - the matrix 288 289 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 290 MatXXXXSetPreallocation() paradigm instead of this routine directly. 291 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 292 293 Notes: 294 If nnz is given then nz is ignored 295 296 The AIJ format (also called the Yale sparse matrix format or 297 compressed row storage), is fully compatible with standard Fortran 77 298 storage. That is, the stored row and column indices can begin at 299 either one (as in Fortran) or zero. See the users' manual for details. 300 301 Specify the preallocated storage with either nz or nnz (not both). 302 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 303 allocation. For large problems you MUST preallocate memory or you 304 will get TERRIBLE performance, see the users' manual chapter on matrices. 305 306 Level: intermediate 307 308 .seealso: MatCreate(), MatCreateAIJ(), MatCreateAIJCUSPARSE(), MatSetValues(), MatSeqAIJSetColumnIndices(), MatCreateSeqAIJWithArrays(), MatCreateAIJ() 309 310 @*/ 311 PetscErrorCode MatCreateSeqAIJViennaCL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 312 { 313 PetscErrorCode ierr; 314 315 PetscFunctionBegin; 316 ierr = MatCreate(comm,A);CHKERRQ(ierr); 317 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 318 ierr = MatSetType(*A,MATSEQAIJVIENNACL);CHKERRQ(ierr); 319 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,(PetscInt*)nnz);CHKERRQ(ierr); 320 PetscFunctionReturn(0); 321 } 322 323 324 PetscErrorCode MatDestroy_SeqAIJViennaCL(Mat A) 325 { 326 PetscErrorCode ierr; 327 Mat_SeqAIJViennaCL *viennaclcontainer = (Mat_SeqAIJViennaCL*)A->spptr; 328 329 PetscFunctionBegin; 330 try { 331 if (viennaclcontainer) { 332 delete viennaclcontainer->tempvec; 333 delete viennaclcontainer->mat; 334 delete viennaclcontainer->compressed_mat; 335 delete viennaclcontainer; 336 } 337 A->offloadmask = PETSC_OFFLOAD_UNALLOCATED; 338 } catch(std::exception const & ex) { 339 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what()); 340 } 341 342 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqaij_seqaijviennacl_C",NULL);CHKERRQ(ierr); 343 344 /* this next line is because MatDestroy tries to PetscFree spptr if it is not zero, and PetscFree only works if the memory was allocated with PetscNew or PetscMalloc, which don't call the constructor */ 345 A->spptr = 0; 346 ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 347 PetscFunctionReturn(0); 348 } 349 350 351 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJViennaCL(Mat B) 352 { 353 PetscErrorCode ierr; 354 355 PetscFunctionBegin; 356 ierr = MatCreate_SeqAIJ(B);CHKERRQ(ierr); 357 ierr = MatConvert_SeqAIJ_SeqAIJViennaCL(B,MATSEQAIJVIENNACL,MAT_INPLACE_MATRIX,&B); 358 PetscFunctionReturn(0); 359 } 360 361 static PetscErrorCode MatPinToCPU_SeqAIJViennaCL(Mat,PetscBool); 362 static PetscErrorCode MatDuplicate_SeqAIJViennaCL(Mat A,MatDuplicateOption cpvalues,Mat *B) 363 { 364 PetscErrorCode ierr; 365 Mat C; 366 367 PetscFunctionBegin; 368 ierr = MatDuplicate_SeqAIJ(A,cpvalues,B);CHKERRQ(ierr); 369 C = *B; 370 371 ierr = MatPinToCPU_SeqAIJViennaCL(A,PETSC_FALSE);CHKERRQ(ierr); 372 C->ops->pintocpu = MatPinToCPU_SeqAIJViennaCL; 373 374 C->spptr = new Mat_SeqAIJViennaCL(); 375 ((Mat_SeqAIJViennaCL*)C->spptr)->tempvec = NULL; 376 ((Mat_SeqAIJViennaCL*)C->spptr)->mat = NULL; 377 ((Mat_SeqAIJViennaCL*)C->spptr)->compressed_mat = NULL; 378 379 ierr = PetscObjectChangeTypeName((PetscObject)C,MATSEQAIJVIENNACL);CHKERRQ(ierr); 380 381 C->offloadmask = PETSC_OFFLOAD_UNALLOCATED; 382 383 /* If the source matrix is already assembled, copy the destination matrix to the GPU */ 384 if (C->assembled) { 385 ierr = MatViennaCLCopyToGPU(C);CHKERRQ(ierr); 386 } 387 388 PetscFunctionReturn(0); 389 } 390 391 static PetscErrorCode MatSeqAIJGetArray_SeqAIJViennaCL(Mat A,PetscScalar *array[]) 392 { 393 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 394 PetscErrorCode ierr; 395 396 PetscFunctionBegin; 397 ierr = MatViennaCLCopyFromGPU(A,(const ViennaCLAIJMatrix *)NULL);CHKERRQ(ierr); 398 *array = a->a; 399 A->offloadmask = PETSC_OFFLOAD_CPU; 400 PetscFunctionReturn(0); 401 } 402 403 404 static PetscErrorCode MatSeqAIJRestoreArray_SeqAIJViennaCL(Mat A,PetscScalar *array[]) 405 { 406 PetscFunctionBegin; 407 *array = NULL; 408 PetscFunctionReturn(0); 409 } 410 411 static PetscErrorCode MatPinToCPU_SeqAIJViennaCL(Mat A,PetscBool flg) 412 { 413 PetscErrorCode ierr; 414 415 PetscFunctionBegin; 416 A->pinnedtocpu = flg; 417 if (flg) { 418 /* make sure we have an up-to-date copy on the CPU */ 419 ierr = MatViennaCLCopyFromGPU(A,(const ViennaCLAIJMatrix *)NULL);CHKERRQ(ierr); 420 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqAIJGetArray_C",MatSeqAIJGetArray_SeqAIJ);CHKERRQ(ierr); 421 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqAIJRestoreArray_C",MatSeqAIJRestoreArray_SeqAIJ);CHKERRQ(ierr); 422 423 A->ops->mult = MatMult_SeqAIJ; 424 A->ops->multadd = MatMultAdd_SeqAIJ; 425 A->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 426 A->ops->duplicate = MatDuplicate_SeqAIJ; 427 } else { 428 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqAIJGetArray_C",MatSeqAIJGetArray_SeqAIJViennaCL);CHKERRQ(ierr); 429 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqAIJRestoreArray_C",MatSeqAIJRestoreArray_SeqAIJViennaCL);CHKERRQ(ierr); 430 431 A->ops->mult = MatMult_SeqAIJViennaCL; 432 A->ops->multadd = MatMultAdd_SeqAIJViennaCL; 433 A->ops->assemblyend = MatAssemblyEnd_SeqAIJViennaCL; 434 A->ops->destroy = MatDestroy_SeqAIJViennaCL; 435 A->ops->duplicate = MatDuplicate_SeqAIJViennaCL; 436 } 437 PetscFunctionReturn(0); 438 } 439 440 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJViennaCL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 441 { 442 PetscErrorCode ierr; 443 Mat B; 444 Mat_SeqAIJ *aij; 445 446 PetscFunctionBegin; 447 448 if (reuse == MAT_REUSE_MATRIX) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"MAT_REUSE_MATRIX is not supported. Consider using MAT_INPLACE_MATRIX instead"); 449 450 if (reuse == MAT_INITIAL_MATRIX) { 451 ierr = MatDuplicate(A,MAT_COPY_VALUES,newmat);CHKERRQ(ierr); 452 } 453 454 B = *newmat; 455 456 aij = (Mat_SeqAIJ*)B->data; 457 aij->inode.use = PETSC_FALSE; 458 459 B->spptr = new Mat_SeqAIJViennaCL(); 460 461 ((Mat_SeqAIJViennaCL*)B->spptr)->tempvec = NULL; 462 ((Mat_SeqAIJViennaCL*)B->spptr)->mat = NULL; 463 ((Mat_SeqAIJViennaCL*)B->spptr)->compressed_mat = NULL; 464 465 ierr = MatPinToCPU_SeqAIJViennaCL(A,PETSC_FALSE);CHKERRQ(ierr); 466 A->ops->pintocpu = MatPinToCPU_SeqAIJViennaCL; 467 468 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJVIENNACL);CHKERRQ(ierr); 469 ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr); 470 ierr = PetscStrallocpy(VECVIENNACL,&B->defaultvectype);CHKERRQ(ierr); 471 472 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqaij_seqaijviennacl_C",MatConvert_SeqAIJ_SeqAIJViennaCL);CHKERRQ(ierr); 473 474 B->offloadmask = PETSC_OFFLOAD_UNALLOCATED; 475 476 /* If the source matrix is already assembled, copy the destination matrix to the GPU */ 477 if (B->assembled) { 478 ierr = MatViennaCLCopyToGPU(B);CHKERRQ(ierr); 479 } 480 481 PetscFunctionReturn(0); 482 } 483 484 485 /*MC 486 MATSEQAIJVIENNACL - MATAIJVIENNACL = "aijviennacl" = "seqaijviennacl" - A matrix type to be used for sparse matrices. 487 488 A matrix type type whose data resides on GPUs. These matrices are in CSR format by 489 default. All matrix calculations are performed using the ViennaCL library. 490 491 Options Database Keys: 492 + -mat_type aijviennacl - sets the matrix type to "seqaijviennacl" during a call to MatSetFromOptions() 493 . -mat_viennacl_storage_format csr - sets the storage format of matrices for MatMult during a call to MatSetFromOptions(). 494 - -mat_viennacl_mult_storage_format csr - sets the storage format of matrices for MatMult during a call to MatSetFromOptions(). 495 496 Level: beginner 497 498 .seealso: MatCreateSeqAIJViennaCL(), MATAIJVIENNACL, MatCreateAIJViennaCL() 499 M*/ 500 501 502 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ViennaCL(void) 503 { 504 PetscErrorCode ierr; 505 506 PetscFunctionBegin; 507 ierr = MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_LU,MatGetFactor_seqaij_petsc);CHKERRQ(ierr); 508 ierr = MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_CHOLESKY,MatGetFactor_seqaij_petsc);CHKERRQ(ierr); 509 ierr = MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_ILU,MatGetFactor_seqaij_petsc);CHKERRQ(ierr); 510 ierr = MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_ICC,MatGetFactor_seqaij_petsc);CHKERRQ(ierr); 511 PetscFunctionReturn(0); 512 } 513