1 2 3 /* 4 Defines the basic matrix operations for the AIJ (compressed row) 5 matrix storage format. 6 */ 7 8 #include <petscconf.h> 9 #include <../src/mat/impls/aij/seq/aij.h> /*I "petscmat.h" I*/ 10 #include <petscbt.h> 11 #include <../src/vec/vec/impls/dvecimpl.h> 12 #include <petsc/private/vecimpl.h> 13 14 #include <../src/mat/impls/aij/seq/seqviennacl/viennaclmatimpl.h> 15 16 17 #include <algorithm> 18 #include <vector> 19 #include <string> 20 21 #include "viennacl/linalg/prod.hpp" 22 23 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJViennaCL(Mat A, MatType type, MatReuse reuse, Mat *newmat); 24 PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat,MatFactorType,Mat*); 25 26 27 PetscErrorCode MatViennaCLCopyToGPU(Mat A) 28 { 29 Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr; 30 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 31 PetscErrorCode ierr; 32 33 PetscFunctionBegin; 34 if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) { //some OpenCL SDKs have issues with buffers of size 0 35 if (A->valid_GPU_matrix == PETSC_OFFLOAD_UNALLOCATED || A->valid_GPU_matrix == PETSC_OFFLOAD_CPU) { 36 ierr = PetscLogEventBegin(MAT_ViennaCLCopyToGPU,A,0,0,0);CHKERRQ(ierr); 37 38 try { 39 if (a->compressedrow.use) { 40 if (!viennaclstruct->compressed_mat) viennaclstruct->compressed_mat = new ViennaCLCompressedAIJMatrix(); 41 42 // Since PetscInt is different from cl_uint, we have to convert: 43 viennacl::backend::mem_handle dummy; 44 45 viennacl::backend::typesafe_host_array<unsigned int> row_buffer; row_buffer.raw_resize(dummy, a->compressedrow.nrows+1); 46 for (PetscInt i=0; i<=a->compressedrow.nrows; ++i) 47 row_buffer.set(i, (a->compressedrow.i)[i]); 48 49 viennacl::backend::typesafe_host_array<unsigned int> row_indices; row_indices.raw_resize(dummy, a->compressedrow.nrows); 50 for (PetscInt i=0; i<a->compressedrow.nrows; ++i) 51 row_indices.set(i, (a->compressedrow.rindex)[i]); 52 53 viennacl::backend::typesafe_host_array<unsigned int> col_buffer; col_buffer.raw_resize(dummy, a->nz); 54 for (PetscInt i=0; i<a->nz; ++i) 55 col_buffer.set(i, (a->j)[i]); 56 57 viennaclstruct->compressed_mat->set(row_buffer.get(), row_indices.get(), col_buffer.get(), a->a, A->rmap->n, A->cmap->n, a->compressedrow.nrows, a->nz); 58 ierr = PetscLogCpuToGpu(((2*a->compressedrow.nrows)+1+a->nz)*sizeof(PetscInt) + (a->nz)*sizeof(PetscScalar));CHKERRQ(ierr); 59 } else { 60 if (!viennaclstruct->mat) viennaclstruct->mat = new ViennaCLAIJMatrix(); 61 62 // Since PetscInt is in general different from cl_uint, we have to convert: 63 viennacl::backend::mem_handle dummy; 64 65 viennacl::backend::typesafe_host_array<unsigned int> row_buffer; row_buffer.raw_resize(dummy, A->rmap->n+1); 66 for (PetscInt i=0; i<=A->rmap->n; ++i) 67 row_buffer.set(i, (a->i)[i]); 68 69 viennacl::backend::typesafe_host_array<unsigned int> col_buffer; col_buffer.raw_resize(dummy, a->nz); 70 for (PetscInt i=0; i<a->nz; ++i) 71 col_buffer.set(i, (a->j)[i]); 72 73 viennaclstruct->mat->set(row_buffer.get(), col_buffer.get(), a->a, A->rmap->n, A->cmap->n, a->nz); 74 ierr = PetscLogCpuToGpu(((A->rmap->n+1)+a->nz)*sizeof(PetscInt)+(a->nz)*sizeof(PetscScalar));CHKERRQ(ierr); 75 } 76 ViennaCLWaitForGPU(); 77 } catch(std::exception const & ex) { 78 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what()); 79 } 80 81 // Create temporary vector for v += A*x: 82 if (viennaclstruct->tempvec) { 83 if (viennaclstruct->tempvec->size() != static_cast<std::size_t>(A->rmap->n)) { 84 delete (ViennaCLVector*)viennaclstruct->tempvec; 85 viennaclstruct->tempvec = new ViennaCLVector(A->rmap->n); 86 } else { 87 viennaclstruct->tempvec->clear(); 88 } 89 } else { 90 viennaclstruct->tempvec = new ViennaCLVector(A->rmap->n); 91 } 92 93 A->valid_GPU_matrix = PETSC_OFFLOAD_BOTH; 94 95 ierr = PetscLogEventEnd(MAT_ViennaCLCopyToGPU,A,0,0,0);CHKERRQ(ierr); 96 } 97 } 98 PetscFunctionReturn(0); 99 } 100 101 PetscErrorCode MatViennaCLCopyFromGPU(Mat A, const ViennaCLAIJMatrix *Agpu) 102 { 103 Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr; 104 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 105 PetscInt m = A->rmap->n; 106 PetscErrorCode ierr; 107 108 PetscFunctionBegin; 109 if (A->valid_GPU_matrix == PETSC_OFFLOAD_BOTH) PetscFunctionReturn(0); 110 if (A->valid_GPU_matrix == PETSC_OFFLOAD_UNALLOCATED && Agpu) { 111 try { 112 if (a->compressedrow.use) SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_ARG_WRONG, "ViennaCL: Cannot handle row compression for GPU matrices"); 113 else { 114 115 if ((PetscInt)Agpu->size1() != m) SETERRQ2(PETSC_COMM_WORLD, PETSC_ERR_ARG_SIZ, "GPU matrix has %d rows, should be %d", Agpu->size1(), m); 116 a->nz = Agpu->nnz(); 117 a->maxnz = a->nz; /* Since we allocate exactly the right amount */ 118 A->preallocated = PETSC_TRUE; 119 if (a->singlemalloc) { 120 if (a->a) {ierr = PetscFree3(a->a,a->j,a->i);CHKERRQ(ierr);} 121 } else { 122 if (a->i) {ierr = PetscFree(a->i);CHKERRQ(ierr);} 123 if (a->j) {ierr = PetscFree(a->j);CHKERRQ(ierr);} 124 if (a->a) {ierr = PetscFree(a->a);CHKERRQ(ierr);} 125 } 126 ierr = PetscMalloc3(a->nz,&a->a,a->nz,&a->j,m+1,&a->i);CHKERRQ(ierr); 127 ierr = PetscLogObjectMemory((PetscObject)A, a->nz*(sizeof(PetscScalar)+sizeof(PetscInt))+(m+1)*sizeof(PetscInt));CHKERRQ(ierr); 128 129 a->singlemalloc = PETSC_TRUE; 130 131 /* Setup row lengths */ 132 ierr = PetscFree(a->imax);CHKERRQ(ierr); 133 ierr = PetscFree(a->ilen);CHKERRQ(ierr); 134 ierr = PetscMalloc1(m,&a->imax);CHKERRQ(ierr); 135 ierr = PetscMalloc1(m,&a->ilen);CHKERRQ(ierr); 136 ierr = PetscLogObjectMemory((PetscObject)A, 2*m*sizeof(PetscInt));CHKERRQ(ierr); 137 138 /* Copy data back from GPU */ 139 viennacl::backend::typesafe_host_array<unsigned int> row_buffer; row_buffer.raw_resize(Agpu->handle1(), Agpu->size1() + 1); 140 141 // copy row array 142 viennacl::backend::memory_read(Agpu->handle1(), 0, row_buffer.raw_size(), row_buffer.get()); 143 (a->i)[0] = row_buffer[0]; 144 for (PetscInt i = 0; i < (PetscInt)Agpu->size1(); ++i) { 145 (a->i)[i+1] = row_buffer[i+1]; 146 a->imax[i] = a->ilen[i] = a->i[i+1] - a->i[i]; //Set imax[] and ilen[] arrays at the same time as i[] for better cache reuse 147 } 148 149 // copy column indices 150 viennacl::backend::typesafe_host_array<unsigned int> col_buffer; col_buffer.raw_resize(Agpu->handle2(), Agpu->nnz()); 151 viennacl::backend::memory_read(Agpu->handle2(), 0, col_buffer.raw_size(), col_buffer.get()); 152 for (PetscInt i=0; i < (PetscInt)Agpu->nnz(); ++i) 153 (a->j)[i] = col_buffer[i]; 154 155 // copy nonzero entries directly to destination (no conversion required) 156 viennacl::backend::memory_read(Agpu->handle(), 0, sizeof(PetscScalar)*Agpu->nnz(), a->a); 157 158 ierr = PetscLogGpuToCpu(row_buffer.raw_size()+col_buffer.raw_size()+(Agpu->nnz()*sizeof(PetscScalar)));CHKERRQ(ierr); 159 ViennaCLWaitForGPU(); 160 /* TODO: Once a->diag is moved out of MatAssemblyEnd(), invalidate it here. */ 161 } 162 } catch(std::exception const & ex) { 163 SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what()); 164 } 165 } else if (A->valid_GPU_matrix == PETSC_OFFLOAD_UNALLOCATED) { 166 PetscFunctionReturn(0); 167 } else { 168 if (!Agpu && A->valid_GPU_matrix != PETSC_OFFLOAD_GPU) PetscFunctionReturn(0); 169 170 if (a->compressedrow.use) SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_ARG_WRONG, "ViennaCL: Cannot handle row compression for GPU matrices"); 171 if (!Agpu) { 172 viennacl::backend::memory_read(viennaclstruct->mat->handle(), 0, sizeof(PetscScalar)*viennaclstruct->mat->nnz(), a->a); 173 } else { 174 viennacl::backend::memory_read(Agpu->handle(), 0, sizeof(PetscScalar)*Agpu->nnz(), a->a); 175 } 176 } 177 A->valid_GPU_matrix = PETSC_OFFLOAD_BOTH; 178 /* This assembly prevents resetting the flag to PETSC_OFFLOAD_CPU and recopying */ 179 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 180 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 181 PetscFunctionReturn(0); 182 } 183 184 PetscErrorCode MatMult_SeqAIJViennaCL(Mat A,Vec xx,Vec yy) 185 { 186 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 187 PetscErrorCode ierr; 188 Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr; 189 const ViennaCLVector *xgpu=NULL; 190 ViennaCLVector *ygpu=NULL; 191 192 PetscFunctionBegin; 193 if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) { 194 ierr = VecViennaCLGetArrayRead(xx,&xgpu);CHKERRQ(ierr); 195 ierr = VecViennaCLGetArrayWrite(yy,&ygpu);CHKERRQ(ierr); 196 ierr = PetscLogGpuTimeBegin();CHKERRQ(ierr); 197 try { 198 if (a->compressedrow.use) { 199 *ygpu = viennacl::linalg::prod(*viennaclstruct->compressed_mat, *xgpu); 200 } else { 201 *ygpu = viennacl::linalg::prod(*viennaclstruct->mat,*xgpu); 202 } 203 ViennaCLWaitForGPU(); 204 } catch (std::exception const & ex) { 205 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what()); 206 } 207 ierr = PetscLogGpuTimeEnd();CHKERRQ(ierr); 208 ierr = VecViennaCLRestoreArrayRead(xx,&xgpu);CHKERRQ(ierr); 209 ierr = VecViennaCLRestoreArrayWrite(yy,&ygpu);CHKERRQ(ierr); 210 ierr = PetscLogGpuFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 211 } else { 212 ierr = VecSet(yy,0);CHKERRQ(ierr); 213 } 214 PetscFunctionReturn(0); 215 } 216 217 PetscErrorCode MatMultAdd_SeqAIJViennaCL(Mat A,Vec xx,Vec yy,Vec zz) 218 { 219 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 220 PetscErrorCode ierr; 221 Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr; 222 const ViennaCLVector *xgpu=NULL,*ygpu=NULL; 223 ViennaCLVector *zgpu=NULL; 224 225 PetscFunctionBegin; 226 if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) { 227 try { 228 ierr = VecViennaCLGetArrayRead(xx,&xgpu);CHKERRQ(ierr); 229 ierr = VecViennaCLGetArrayRead(yy,&ygpu);CHKERRQ(ierr); 230 ierr = VecViennaCLGetArrayWrite(zz,&zgpu);CHKERRQ(ierr); 231 ierr = PetscLogGpuTimeBegin();CHKERRQ(ierr); 232 if (a->compressedrow.use) *viennaclstruct->tempvec = viennacl::linalg::prod(*viennaclstruct->compressed_mat, *xgpu); 233 else *viennaclstruct->tempvec = viennacl::linalg::prod(*viennaclstruct->mat, *xgpu); 234 if (zz != yy) *zgpu = *ygpu + *viennaclstruct->tempvec; 235 else *zgpu += *viennaclstruct->tempvec; 236 ViennaCLWaitForGPU(); 237 ierr = PetscLogGpuTimeEnd();CHKERRQ(ierr); 238 ierr = VecViennaCLRestoreArrayRead(xx,&xgpu);CHKERRQ(ierr); 239 ierr = VecViennaCLRestoreArrayRead(yy,&ygpu);CHKERRQ(ierr); 240 ierr = VecViennaCLRestoreArrayWrite(zz,&zgpu);CHKERRQ(ierr); 241 242 } catch(std::exception const & ex) { 243 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what()); 244 } 245 ierr = PetscLogGpuFlops(2.0*a->nz);CHKERRQ(ierr); 246 } else { 247 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 248 } 249 PetscFunctionReturn(0); 250 } 251 252 PetscErrorCode MatAssemblyEnd_SeqAIJViennaCL(Mat A,MatAssemblyType mode) 253 { 254 PetscErrorCode ierr; 255 256 PetscFunctionBegin; 257 ierr = MatAssemblyEnd_SeqAIJ(A,mode);CHKERRQ(ierr); 258 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 259 if (!A->pinnedtocpu) { 260 ierr = MatViennaCLCopyToGPU(A);CHKERRQ(ierr); 261 } 262 PetscFunctionReturn(0); 263 } 264 265 /* --------------------------------------------------------------------------------*/ 266 /*@ 267 MatCreateSeqAIJViennaCL - Creates a sparse matrix in AIJ (compressed row) format 268 (the default parallel PETSc format). This matrix will ultimately be pushed down 269 to GPUs and use the ViennaCL library for calculations. For good matrix 270 assembly performance the user should preallocate the matrix storage by setting 271 the parameter nz (or the array nnz). By setting these parameters accurately, 272 performance during matrix assembly can be increased substantially. 273 274 275 Collective 276 277 Input Parameters: 278 + comm - MPI communicator, set to PETSC_COMM_SELF 279 . m - number of rows 280 . n - number of columns 281 . nz - number of nonzeros per row (same for all rows) 282 - nnz - array containing the number of nonzeros in the various rows 283 (possibly different for each row) or NULL 284 285 Output Parameter: 286 . A - the matrix 287 288 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 289 MatXXXXSetPreallocation() paradigm instead of this routine directly. 290 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 291 292 Notes: 293 If nnz is given then nz is ignored 294 295 The AIJ format (also called the Yale sparse matrix format or 296 compressed row storage), is fully compatible with standard Fortran 77 297 storage. That is, the stored row and column indices can begin at 298 either one (as in Fortran) or zero. See the users' manual for details. 299 300 Specify the preallocated storage with either nz or nnz (not both). 301 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 302 allocation. For large problems you MUST preallocate memory or you 303 will get TERRIBLE performance, see the users' manual chapter on matrices. 304 305 Level: intermediate 306 307 .seealso: MatCreate(), MatCreateAIJ(), MatCreateAIJCUSPARSE(), MatSetValues(), MatSeqAIJSetColumnIndices(), MatCreateSeqAIJWithArrays(), MatCreateAIJ() 308 309 @*/ 310 PetscErrorCode MatCreateSeqAIJViennaCL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 311 { 312 PetscErrorCode ierr; 313 314 PetscFunctionBegin; 315 ierr = MatCreate(comm,A);CHKERRQ(ierr); 316 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 317 ierr = MatSetType(*A,MATSEQAIJVIENNACL);CHKERRQ(ierr); 318 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,(PetscInt*)nnz);CHKERRQ(ierr); 319 PetscFunctionReturn(0); 320 } 321 322 323 PetscErrorCode MatDestroy_SeqAIJViennaCL(Mat A) 324 { 325 PetscErrorCode ierr; 326 Mat_SeqAIJViennaCL *viennaclcontainer = (Mat_SeqAIJViennaCL*)A->spptr; 327 328 PetscFunctionBegin; 329 try { 330 if (viennaclcontainer) { 331 delete viennaclcontainer->tempvec; 332 delete viennaclcontainer->mat; 333 delete viennaclcontainer->compressed_mat; 334 delete viennaclcontainer; 335 } 336 A->valid_GPU_matrix = PETSC_OFFLOAD_UNALLOCATED; 337 } catch(std::exception const & ex) { 338 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what()); 339 } 340 341 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqaij_seqaijviennacl_C",NULL);CHKERRQ(ierr); 342 343 /* this next line is because MatDestroy tries to PetscFree spptr if it is not zero, and PetscFree only works if the memory was allocated with PetscNew or PetscMalloc, which don't call the constructor */ 344 A->spptr = 0; 345 ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 346 PetscFunctionReturn(0); 347 } 348 349 350 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJViennaCL(Mat B) 351 { 352 PetscErrorCode ierr; 353 354 PetscFunctionBegin; 355 ierr = MatCreate_SeqAIJ(B);CHKERRQ(ierr); 356 ierr = MatConvert_SeqAIJ_SeqAIJViennaCL(B,MATSEQAIJVIENNACL,MAT_INPLACE_MATRIX,&B); 357 PetscFunctionReturn(0); 358 } 359 360 static PetscErrorCode MatPinToCPU_SeqAIJViennaCL(Mat,PetscBool); 361 static PetscErrorCode MatDuplicate_SeqAIJViennaCL(Mat A,MatDuplicateOption cpvalues,Mat *B) 362 { 363 PetscErrorCode ierr; 364 Mat C; 365 366 PetscFunctionBegin; 367 ierr = MatDuplicate_SeqAIJ(A,cpvalues,B);CHKERRQ(ierr); 368 C = *B; 369 370 ierr = MatPinToCPU_SeqAIJViennaCL(A,PETSC_FALSE);CHKERRQ(ierr); 371 C->ops->pintocpu = MatPinToCPU_SeqAIJViennaCL; 372 373 C->spptr = new Mat_SeqAIJViennaCL(); 374 ((Mat_SeqAIJViennaCL*)C->spptr)->tempvec = NULL; 375 ((Mat_SeqAIJViennaCL*)C->spptr)->mat = NULL; 376 ((Mat_SeqAIJViennaCL*)C->spptr)->compressed_mat = NULL; 377 378 ierr = PetscObjectChangeTypeName((PetscObject)C,MATSEQAIJVIENNACL);CHKERRQ(ierr); 379 380 C->valid_GPU_matrix = PETSC_OFFLOAD_UNALLOCATED; 381 382 /* If the source matrix is already assembled, copy the destination matrix to the GPU */ 383 if (C->assembled) { 384 ierr = MatViennaCLCopyToGPU(C);CHKERRQ(ierr); 385 } 386 387 PetscFunctionReturn(0); 388 } 389 390 static PetscErrorCode MatSeqAIJGetArray_SeqAIJViennaCL(Mat A,PetscScalar *array[]) 391 { 392 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 393 PetscErrorCode ierr; 394 395 PetscFunctionBegin; 396 ierr = MatViennaCLCopyFromGPU(A,(const ViennaCLAIJMatrix *)NULL);CHKERRQ(ierr); 397 *array = a->a; 398 A->valid_GPU_matrix = PETSC_OFFLOAD_CPU; 399 PetscFunctionReturn(0); 400 } 401 402 403 static PetscErrorCode MatSeqAIJRestoreArray_SeqAIJViennaCL(Mat A,PetscScalar *array[]) 404 { 405 PetscFunctionBegin; 406 *array = NULL; 407 PetscFunctionReturn(0); 408 } 409 410 static PetscErrorCode MatPinToCPU_SeqAIJViennaCL(Mat A,PetscBool flg) 411 { 412 PetscErrorCode ierr; 413 414 PetscFunctionBegin; 415 A->pinnedtocpu = flg; 416 if (flg) { 417 /* make sure we have an up-to-date copy on the CPU */ 418 ierr = MatViennaCLCopyFromGPU(A,(const ViennaCLAIJMatrix *)NULL);CHKERRQ(ierr); 419 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqAIJGetArray_C",MatSeqAIJGetArray_SeqAIJ);CHKERRQ(ierr); 420 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqAIJRestoreArray_C",MatSeqAIJRestoreArray_SeqAIJ);CHKERRQ(ierr); 421 422 A->ops->mult = MatMult_SeqAIJ; 423 A->ops->multadd = MatMultAdd_SeqAIJ; 424 A->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 425 A->ops->duplicate = MatDuplicate_SeqAIJ; 426 } else { 427 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqAIJGetArray_C",MatSeqAIJGetArray_SeqAIJViennaCL);CHKERRQ(ierr); 428 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqAIJRestoreArray_C",MatSeqAIJRestoreArray_SeqAIJViennaCL);CHKERRQ(ierr); 429 430 A->ops->mult = MatMult_SeqAIJViennaCL; 431 A->ops->multadd = MatMultAdd_SeqAIJViennaCL; 432 A->ops->assemblyend = MatAssemblyEnd_SeqAIJViennaCL; 433 A->ops->destroy = MatDestroy_SeqAIJViennaCL; 434 A->ops->duplicate = MatDuplicate_SeqAIJViennaCL; 435 } 436 PetscFunctionReturn(0); 437 } 438 439 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJViennaCL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 440 { 441 PetscErrorCode ierr; 442 Mat B; 443 Mat_SeqAIJ *aij; 444 445 PetscFunctionBegin; 446 447 if (reuse == MAT_REUSE_MATRIX) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"MAT_REUSE_MATRIX is not supported. Consider using MAT_INPLACE_MATRIX instead"); 448 449 if (reuse == MAT_INITIAL_MATRIX) { 450 ierr = MatDuplicate(A,MAT_COPY_VALUES,newmat);CHKERRQ(ierr); 451 } 452 453 B = *newmat; 454 455 aij = (Mat_SeqAIJ*)B->data; 456 aij->inode.use = PETSC_FALSE; 457 458 B->spptr = new Mat_SeqAIJViennaCL(); 459 460 ((Mat_SeqAIJViennaCL*)B->spptr)->tempvec = NULL; 461 ((Mat_SeqAIJViennaCL*)B->spptr)->mat = NULL; 462 ((Mat_SeqAIJViennaCL*)B->spptr)->compressed_mat = NULL; 463 464 ierr = MatPinToCPU_SeqAIJViennaCL(A,PETSC_FALSE);CHKERRQ(ierr); 465 A->ops->pintocpu = MatPinToCPU_SeqAIJViennaCL; 466 467 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJVIENNACL);CHKERRQ(ierr); 468 ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr); 469 ierr = PetscStrallocpy(VECVIENNACL,&B->defaultvectype);CHKERRQ(ierr); 470 471 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqaij_seqaijviennacl_C",MatConvert_SeqAIJ_SeqAIJViennaCL);CHKERRQ(ierr); 472 473 B->valid_GPU_matrix = PETSC_OFFLOAD_UNALLOCATED; 474 475 /* If the source matrix is already assembled, copy the destination matrix to the GPU */ 476 if (B->assembled) { 477 ierr = MatViennaCLCopyToGPU(B);CHKERRQ(ierr); 478 } 479 480 PetscFunctionReturn(0); 481 } 482 483 484 /*MC 485 MATSEQAIJVIENNACL - MATAIJVIENNACL = "aijviennacl" = "seqaijviennacl" - A matrix type to be used for sparse matrices. 486 487 A matrix type type whose data resides on GPUs. These matrices are in CSR format by 488 default. All matrix calculations are performed using the ViennaCL library. 489 490 Options Database Keys: 491 + -mat_type aijviennacl - sets the matrix type to "seqaijviennacl" during a call to MatSetFromOptions() 492 . -mat_viennacl_storage_format csr - sets the storage format of matrices for MatMult during a call to MatSetFromOptions(). 493 - -mat_viennacl_mult_storage_format csr - sets the storage format of matrices for MatMult during a call to MatSetFromOptions(). 494 495 Level: beginner 496 497 .seealso: MatCreateSeqAIJViennaCL(), MATAIJVIENNACL, MatCreateAIJViennaCL() 498 M*/ 499 500 501 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ViennaCL(void) 502 { 503 PetscErrorCode ierr; 504 505 PetscFunctionBegin; 506 ierr = MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_LU,MatGetFactor_seqaij_petsc);CHKERRQ(ierr); 507 ierr = MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_CHOLESKY,MatGetFactor_seqaij_petsc);CHKERRQ(ierr); 508 ierr = MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_ILU,MatGetFactor_seqaij_petsc);CHKERRQ(ierr); 509 ierr = MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_ICC,MatGetFactor_seqaij_petsc);CHKERRQ(ierr); 510 PetscFunctionReturn(0); 511 } 512