1e4a0ef16SKarl Rupp 2e4a0ef16SKarl Rupp /* 3e4a0ef16SKarl Rupp Defines the basic matrix operations for the AIJ (compressed row) 4e4a0ef16SKarl Rupp matrix storage format. 5e4a0ef16SKarl Rupp */ 6e4a0ef16SKarl Rupp 7aaa7dc30SBarry Smith #include <petscconf.h> 899acd6aaSStefano Zampini #define PETSC_SKIP_IMMINTRIN_H_CUDAWORKAROUND 1 9aaa7dc30SBarry Smith #include <../src/mat/impls/aij/seq/aij.h> /*I "petscmat.h" I*/ 10aaa7dc30SBarry Smith #include <petscbt.h> 11aaa7dc30SBarry Smith #include <../src/vec/vec/impls/dvecimpl.h> 12af0996ceSBarry Smith #include <petsc/private/vecimpl.h> 13e4a0ef16SKarl Rupp 14aaa7dc30SBarry Smith #include <../src/mat/impls/aij/seq/seqviennacl/viennaclmatimpl.h> 15e4a0ef16SKarl Rupp 16e4a0ef16SKarl Rupp #include <algorithm> 17e4a0ef16SKarl Rupp #include <vector> 18e4a0ef16SKarl Rupp #include <string> 19e4a0ef16SKarl Rupp 20e4a0ef16SKarl Rupp #include "viennacl/linalg/prod.hpp" 21e4a0ef16SKarl Rupp 228713a8baSPatrick Sanan PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJViennaCL(Mat A, MatType type, MatReuse reuse, Mat *newmat); 2372367587SKarl Rupp PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat,MatFactorType,Mat*); 244222ddf1SHong Zhang PETSC_INTERN PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqDense(Mat); 258713a8baSPatrick Sanan 26e4a0ef16SKarl Rupp PetscErrorCode MatViennaCLCopyToGPU(Mat A) 27e4a0ef16SKarl Rupp { 28e4a0ef16SKarl Rupp Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr; 29e4a0ef16SKarl Rupp Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 30e4a0ef16SKarl Rupp PetscErrorCode ierr; 31e4a0ef16SKarl Rupp 32e4a0ef16SKarl Rupp PetscFunctionBegin; 33bf1781e8SStefano Zampini if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) { //some OpenCL SDKs have issues with buffers of size 0 34c70f7ee4SJunchao Zhang if (A->offloadmask == PETSC_OFFLOAD_UNALLOCATED || A->offloadmask == PETSC_OFFLOAD_CPU) { 35e4a0ef16SKarl Rupp ierr = PetscLogEventBegin(MAT_ViennaCLCopyToGPU,A,0,0,0);CHKERRQ(ierr); 36e4a0ef16SKarl Rupp 37e4a0ef16SKarl Rupp try { 38e4a0ef16SKarl Rupp if (a->compressedrow.use) { 39a3430c56SKarl Rupp if (!viennaclstruct->compressed_mat) viennaclstruct->compressed_mat = new ViennaCLCompressedAIJMatrix(); 40e4a0ef16SKarl Rupp 41a3430c56SKarl Rupp // Since PetscInt is different from cl_uint, we have to convert: 42a3430c56SKarl Rupp viennacl::backend::mem_handle dummy; 43e4a0ef16SKarl Rupp 44a3430c56SKarl Rupp viennacl::backend::typesafe_host_array<unsigned int> row_buffer; row_buffer.raw_resize(dummy, a->compressedrow.nrows+1); 45a3430c56SKarl Rupp for (PetscInt i=0; i<=a->compressedrow.nrows; ++i) 46a3430c56SKarl Rupp row_buffer.set(i, (a->compressedrow.i)[i]); 47e4a0ef16SKarl Rupp 48a3430c56SKarl Rupp viennacl::backend::typesafe_host_array<unsigned int> row_indices; row_indices.raw_resize(dummy, a->compressedrow.nrows); 49a3430c56SKarl Rupp for (PetscInt i=0; i<a->compressedrow.nrows; ++i) 50a3430c56SKarl Rupp row_indices.set(i, (a->compressedrow.rindex)[i]); 51a3430c56SKarl Rupp 52a3430c56SKarl Rupp viennacl::backend::typesafe_host_array<unsigned int> col_buffer; col_buffer.raw_resize(dummy, a->nz); 53a3430c56SKarl Rupp for (PetscInt i=0; i<a->nz; ++i) 54a3430c56SKarl Rupp col_buffer.set(i, (a->j)[i]); 55a3430c56SKarl Rupp 56a3430c56SKarl Rupp viennaclstruct->compressed_mat->set(row_buffer.get(), row_indices.get(), col_buffer.get(), a->a, A->rmap->n, A->cmap->n, a->compressedrow.nrows, a->nz); 574863603aSSatish Balay ierr = PetscLogCpuToGpu(((2*a->compressedrow.nrows)+1+a->nz)*sizeof(PetscInt) + (a->nz)*sizeof(PetscScalar));CHKERRQ(ierr); 58e4a0ef16SKarl Rupp } else { 59a3430c56SKarl Rupp if (!viennaclstruct->mat) viennaclstruct->mat = new ViennaCLAIJMatrix(); 60e4a0ef16SKarl Rupp 61e4a0ef16SKarl Rupp // Since PetscInt is in general different from cl_uint, we have to convert: 62e4a0ef16SKarl Rupp viennacl::backend::mem_handle dummy; 63e4a0ef16SKarl Rupp 64e4a0ef16SKarl Rupp viennacl::backend::typesafe_host_array<unsigned int> row_buffer; row_buffer.raw_resize(dummy, A->rmap->n+1); 65e4a0ef16SKarl Rupp for (PetscInt i=0; i<=A->rmap->n; ++i) 66e4a0ef16SKarl Rupp row_buffer.set(i, (a->i)[i]); 67e4a0ef16SKarl Rupp 68e4a0ef16SKarl Rupp viennacl::backend::typesafe_host_array<unsigned int> col_buffer; col_buffer.raw_resize(dummy, a->nz); 69e4a0ef16SKarl Rupp for (PetscInt i=0; i<a->nz; ++i) 70e4a0ef16SKarl Rupp col_buffer.set(i, (a->j)[i]); 71e4a0ef16SKarl Rupp 72e4a0ef16SKarl Rupp viennaclstruct->mat->set(row_buffer.get(), col_buffer.get(), a->a, A->rmap->n, A->cmap->n, a->nz); 734863603aSSatish Balay ierr = PetscLogCpuToGpu(((A->rmap->n+1)+a->nz)*sizeof(PetscInt)+(a->nz)*sizeof(PetscScalar));CHKERRQ(ierr); 74e4a0ef16SKarl Rupp } 754cf1874eSKarl Rupp ViennaCLWaitForGPU(); 764076e183SKarl Rupp } catch(std::exception const & ex) { 774076e183SKarl Rupp SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what()); 78e4a0ef16SKarl Rupp } 79e4a0ef16SKarl Rupp 80a3430c56SKarl Rupp // Create temporary vector for v += A*x: 81a3430c56SKarl Rupp if (viennaclstruct->tempvec) { 829b66742cSDave May if (viennaclstruct->tempvec->size() != static_cast<std::size_t>(A->rmap->n)) { 83a3430c56SKarl Rupp delete (ViennaCLVector*)viennaclstruct->tempvec; 849b66742cSDave May viennaclstruct->tempvec = new ViennaCLVector(A->rmap->n); 85a3430c56SKarl Rupp } else { 86a3430c56SKarl Rupp viennaclstruct->tempvec->clear(); 87a3430c56SKarl Rupp } 88a3430c56SKarl Rupp } else { 899b66742cSDave May viennaclstruct->tempvec = new ViennaCLVector(A->rmap->n); 90a3430c56SKarl Rupp } 91a3430c56SKarl Rupp 92c70f7ee4SJunchao Zhang A->offloadmask = PETSC_OFFLOAD_BOTH; 93e4a0ef16SKarl Rupp 94e4a0ef16SKarl Rupp ierr = PetscLogEventEnd(MAT_ViennaCLCopyToGPU,A,0,0,0);CHKERRQ(ierr); 95e4a0ef16SKarl Rupp } 9667c87b7fSKarl Rupp } 97e4a0ef16SKarl Rupp PetscFunctionReturn(0); 98e4a0ef16SKarl Rupp } 99e4a0ef16SKarl Rupp 1000d73d530SKarl Rupp PetscErrorCode MatViennaCLCopyFromGPU(Mat A, const ViennaCLAIJMatrix *Agpu) 101e4a0ef16SKarl Rupp { 102f38c1e66SStefano Zampini Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr; 103e4a0ef16SKarl Rupp Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 104e4a0ef16SKarl Rupp PetscInt m = A->rmap->n; 105e4a0ef16SKarl Rupp PetscErrorCode ierr; 106e4a0ef16SKarl Rupp 107e4a0ef16SKarl Rupp PetscFunctionBegin; 108c70f7ee4SJunchao Zhang if (A->offloadmask == PETSC_OFFLOAD_BOTH) PetscFunctionReturn(0); 109c70f7ee4SJunchao Zhang if (A->offloadmask == PETSC_OFFLOAD_UNALLOCATED && Agpu) { 110e4a0ef16SKarl Rupp try { 111ea13f565SStefano Zampini if (a->compressedrow.use) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ViennaCL: Cannot handle row compression for GPU matrices"); 1126c4ed002SBarry Smith else { 113e4a0ef16SKarl Rupp 114ea13f565SStefano Zampini if ((PetscInt)Agpu->size1() != m) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "GPU matrix has %d rows, should be %d", Agpu->size1(), m); 115e4a0ef16SKarl Rupp a->nz = Agpu->nnz(); 116e4a0ef16SKarl Rupp a->maxnz = a->nz; /* Since we allocate exactly the right amount */ 117e4a0ef16SKarl Rupp A->preallocated = PETSC_TRUE; 118e4a0ef16SKarl Rupp if (a->singlemalloc) { 119e4a0ef16SKarl Rupp if (a->a) {ierr = PetscFree3(a->a,a->j,a->i);CHKERRQ(ierr);} 120e4a0ef16SKarl Rupp } else { 121e4a0ef16SKarl Rupp if (a->i) {ierr = PetscFree(a->i);CHKERRQ(ierr);} 122e4a0ef16SKarl Rupp if (a->j) {ierr = PetscFree(a->j);CHKERRQ(ierr);} 123e4a0ef16SKarl Rupp if (a->a) {ierr = PetscFree(a->a);CHKERRQ(ierr);} 124e4a0ef16SKarl Rupp } 125dcca6d9dSJed Brown ierr = PetscMalloc3(a->nz,&a->a,a->nz,&a->j,m+1,&a->i);CHKERRQ(ierr); 126f7daeb2aSKarl Rupp ierr = PetscLogObjectMemory((PetscObject)A, a->nz*(sizeof(PetscScalar)+sizeof(PetscInt))+(m+1)*sizeof(PetscInt));CHKERRQ(ierr); 127e4a0ef16SKarl Rupp 128e4a0ef16SKarl Rupp a->singlemalloc = PETSC_TRUE; 129e4a0ef16SKarl Rupp 130e4a0ef16SKarl Rupp /* Setup row lengths */ 131071fcb05SBarry Smith ierr = PetscFree(a->imax);CHKERRQ(ierr); 132071fcb05SBarry Smith ierr = PetscFree(a->ilen);CHKERRQ(ierr); 133071fcb05SBarry Smith ierr = PetscMalloc1(m,&a->imax);CHKERRQ(ierr); 134071fcb05SBarry Smith ierr = PetscMalloc1(m,&a->ilen);CHKERRQ(ierr); 135f7daeb2aSKarl Rupp ierr = PetscLogObjectMemory((PetscObject)A, 2*m*sizeof(PetscInt));CHKERRQ(ierr); 136e4a0ef16SKarl Rupp 137e4a0ef16SKarl Rupp /* Copy data back from GPU */ 138e4a0ef16SKarl Rupp viennacl::backend::typesafe_host_array<unsigned int> row_buffer; row_buffer.raw_resize(Agpu->handle1(), Agpu->size1() + 1); 139e4a0ef16SKarl Rupp 140e4a0ef16SKarl Rupp // copy row array 141e4a0ef16SKarl Rupp viennacl::backend::memory_read(Agpu->handle1(), 0, row_buffer.raw_size(), row_buffer.get()); 142e4a0ef16SKarl Rupp (a->i)[0] = row_buffer[0]; 143e4a0ef16SKarl Rupp for (PetscInt i = 0; i < (PetscInt)Agpu->size1(); ++i) { 144e4a0ef16SKarl Rupp (a->i)[i+1] = row_buffer[i+1]; 145e4a0ef16SKarl Rupp a->imax[i] = a->ilen[i] = a->i[i+1] - a->i[i]; //Set imax[] and ilen[] arrays at the same time as i[] for better cache reuse 146e4a0ef16SKarl Rupp } 147e4a0ef16SKarl Rupp 148e4a0ef16SKarl Rupp // copy column indices 149e4a0ef16SKarl Rupp viennacl::backend::typesafe_host_array<unsigned int> col_buffer; col_buffer.raw_resize(Agpu->handle2(), Agpu->nnz()); 150e4a0ef16SKarl Rupp viennacl::backend::memory_read(Agpu->handle2(), 0, col_buffer.raw_size(), col_buffer.get()); 151e4a0ef16SKarl Rupp for (PetscInt i=0; i < (PetscInt)Agpu->nnz(); ++i) 152e4a0ef16SKarl Rupp (a->j)[i] = col_buffer[i]; 153e4a0ef16SKarl Rupp 154e4a0ef16SKarl Rupp // copy nonzero entries directly to destination (no conversion required) 155e4a0ef16SKarl Rupp viennacl::backend::memory_read(Agpu->handle(), 0, sizeof(PetscScalar)*Agpu->nnz(), a->a); 156e4a0ef16SKarl Rupp 1574863603aSSatish Balay ierr = PetscLogGpuToCpu(row_buffer.raw_size()+col_buffer.raw_size()+(Agpu->nnz()*sizeof(PetscScalar)));CHKERRQ(ierr); 1584cf1874eSKarl Rupp ViennaCLWaitForGPU(); 159023073b3SKarl Rupp /* TODO: Once a->diag is moved out of MatAssemblyEnd(), invalidate it here. */ 160e4a0ef16SKarl Rupp } 1614076e183SKarl Rupp } catch(std::exception const & ex) { 1624076e183SKarl Rupp SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what()); 163e4a0ef16SKarl Rupp } 164c70f7ee4SJunchao Zhang } else if (A->offloadmask == PETSC_OFFLOAD_UNALLOCATED) { 165f38c1e66SStefano Zampini PetscFunctionReturn(0); 166f38c1e66SStefano Zampini } else { 167c70f7ee4SJunchao Zhang if (!Agpu && A->offloadmask != PETSC_OFFLOAD_GPU) PetscFunctionReturn(0); 168e4a0ef16SKarl Rupp 169ea13f565SStefano Zampini if (a->compressedrow.use) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ViennaCL: Cannot handle row compression for GPU matrices"); 170f38c1e66SStefano Zampini if (!Agpu) { 171f38c1e66SStefano Zampini viennacl::backend::memory_read(viennaclstruct->mat->handle(), 0, sizeof(PetscScalar)*viennaclstruct->mat->nnz(), a->a); 172f38c1e66SStefano Zampini } else { 173f38c1e66SStefano Zampini viennacl::backend::memory_read(Agpu->handle(), 0, sizeof(PetscScalar)*Agpu->nnz(), a->a); 174f38c1e66SStefano Zampini } 175f38c1e66SStefano Zampini } 176c70f7ee4SJunchao Zhang A->offloadmask = PETSC_OFFLOAD_BOTH; 177b8ced49eSKarl Rupp /* This assembly prevents resetting the flag to PETSC_OFFLOAD_CPU and recopying */ 178e4a0ef16SKarl Rupp ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 179e4a0ef16SKarl Rupp ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 180e4a0ef16SKarl Rupp PetscFunctionReturn(0); 181e4a0ef16SKarl Rupp } 182e4a0ef16SKarl Rupp 183e4a0ef16SKarl Rupp PetscErrorCode MatMult_SeqAIJViennaCL(Mat A,Vec xx,Vec yy) 184e4a0ef16SKarl Rupp { 185e4a0ef16SKarl Rupp Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 186e4a0ef16SKarl Rupp PetscErrorCode ierr; 187e4a0ef16SKarl Rupp Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr; 1880d73d530SKarl Rupp const ViennaCLVector *xgpu=NULL; 1890d73d530SKarl Rupp ViennaCLVector *ygpu=NULL; 190e4a0ef16SKarl Rupp 191e4a0ef16SKarl Rupp PetscFunctionBegin; 192837a59e1SRichard Tran Mills /* The line below is necessary due to the operations that modify the matrix on the CPU (axpy, scale, etc) */ 193837a59e1SRichard Tran Mills ierr = MatViennaCLCopyToGPU(A);CHKERRQ(ierr); 194bf1781e8SStefano Zampini if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) { 195e4a0ef16SKarl Rupp ierr = VecViennaCLGetArrayRead(xx,&xgpu);CHKERRQ(ierr); 196e4a0ef16SKarl Rupp ierr = VecViennaCLGetArrayWrite(yy,&ygpu);CHKERRQ(ierr); 1977a052e47Shannah_mairs ierr = PetscLogGpuTimeBegin();CHKERRQ(ierr); 198e4a0ef16SKarl Rupp try { 199b7832b47SStefano Zampini if (a->compressedrow.use) { 200b7832b47SStefano Zampini *ygpu = viennacl::linalg::prod(*viennaclstruct->compressed_mat, *xgpu); 201b7832b47SStefano Zampini } else { 202e4a0ef16SKarl Rupp *ygpu = viennacl::linalg::prod(*viennaclstruct->mat,*xgpu); 203b7832b47SStefano Zampini } 2044cf1874eSKarl Rupp ViennaCLWaitForGPU(); 2054076e183SKarl Rupp } catch (std::exception const & ex) { 2064076e183SKarl Rupp SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what()); 207e4a0ef16SKarl Rupp } 208958c4211Shannah_mairs ierr = PetscLogGpuTimeEnd();CHKERRQ(ierr); 209e4a0ef16SKarl Rupp ierr = VecViennaCLRestoreArrayRead(xx,&xgpu);CHKERRQ(ierr); 210e4a0ef16SKarl Rupp ierr = VecViennaCLRestoreArrayWrite(yy,&ygpu);CHKERRQ(ierr); 211958c4211Shannah_mairs ierr = PetscLogGpuFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 212bf1781e8SStefano Zampini } else { 213383ef339SStefano Zampini ierr = VecSet_SeqViennaCL(yy,0);CHKERRQ(ierr); 21467c87b7fSKarl Rupp } 215e4a0ef16SKarl Rupp PetscFunctionReturn(0); 216e4a0ef16SKarl Rupp } 217e4a0ef16SKarl Rupp 218e4a0ef16SKarl Rupp PetscErrorCode MatMultAdd_SeqAIJViennaCL(Mat A,Vec xx,Vec yy,Vec zz) 219e4a0ef16SKarl Rupp { 220e4a0ef16SKarl Rupp Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 221e4a0ef16SKarl Rupp PetscErrorCode ierr; 222e4a0ef16SKarl Rupp Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr; 2230d73d530SKarl Rupp const ViennaCLVector *xgpu=NULL,*ygpu=NULL; 2240d73d530SKarl Rupp ViennaCLVector *zgpu=NULL; 225e4a0ef16SKarl Rupp 226e4a0ef16SKarl Rupp PetscFunctionBegin; 227837a59e1SRichard Tran Mills /* The line below is necessary due to the operations that modify the matrix on the CPU (axpy, scale, etc) */ 228837a59e1SRichard Tran Mills ierr = MatViennaCLCopyToGPU(A);CHKERRQ(ierr); 229bf1781e8SStefano Zampini if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) { 230e4a0ef16SKarl Rupp try { 231e4a0ef16SKarl Rupp ierr = VecViennaCLGetArrayRead(xx,&xgpu);CHKERRQ(ierr); 232e4a0ef16SKarl Rupp ierr = VecViennaCLGetArrayRead(yy,&ygpu);CHKERRQ(ierr); 233e4a0ef16SKarl Rupp ierr = VecViennaCLGetArrayWrite(zz,&zgpu);CHKERRQ(ierr); 2347a052e47Shannah_mairs ierr = PetscLogGpuTimeBegin();CHKERRQ(ierr); 235f38c1e66SStefano Zampini if (a->compressedrow.use) *viennaclstruct->tempvec = viennacl::linalg::prod(*viennaclstruct->compressed_mat, *xgpu); 236f38c1e66SStefano Zampini else *viennaclstruct->tempvec = viennacl::linalg::prod(*viennaclstruct->mat, *xgpu); 237f38c1e66SStefano Zampini if (zz != yy) *zgpu = *ygpu + *viennaclstruct->tempvec; 238f38c1e66SStefano Zampini else *zgpu += *viennaclstruct->tempvec; 2394cf1874eSKarl Rupp ViennaCLWaitForGPU(); 240958c4211Shannah_mairs ierr = PetscLogGpuTimeEnd();CHKERRQ(ierr); 241e4a0ef16SKarl Rupp ierr = VecViennaCLRestoreArrayRead(xx,&xgpu);CHKERRQ(ierr); 242e4a0ef16SKarl Rupp ierr = VecViennaCLRestoreArrayRead(yy,&ygpu);CHKERRQ(ierr); 243e4a0ef16SKarl Rupp ierr = VecViennaCLRestoreArrayWrite(zz,&zgpu);CHKERRQ(ierr); 244e4a0ef16SKarl Rupp 2454076e183SKarl Rupp } catch(std::exception const & ex) { 2464076e183SKarl Rupp SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what()); 247e4a0ef16SKarl Rupp } 248958c4211Shannah_mairs ierr = PetscLogGpuFlops(2.0*a->nz);CHKERRQ(ierr); 249bf1781e8SStefano Zampini } else { 250383ef339SStefano Zampini ierr = VecCopy_SeqViennaCL(yy,zz);CHKERRQ(ierr); 25167c87b7fSKarl Rupp } 252e4a0ef16SKarl Rupp PetscFunctionReturn(0); 253e4a0ef16SKarl Rupp } 254e4a0ef16SKarl Rupp 255e4a0ef16SKarl Rupp PetscErrorCode MatAssemblyEnd_SeqAIJViennaCL(Mat A,MatAssemblyType mode) 256e4a0ef16SKarl Rupp { 257e4a0ef16SKarl Rupp PetscErrorCode ierr; 258e4a0ef16SKarl Rupp 259e4a0ef16SKarl Rupp PetscFunctionBegin; 260e4a0ef16SKarl Rupp ierr = MatAssemblyEnd_SeqAIJ(A,mode);CHKERRQ(ierr); 261489de41dSStefano Zampini if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 262b470e4b4SRichard Tran Mills if (!A->boundtocpu) { 263e4a0ef16SKarl Rupp ierr = MatViennaCLCopyToGPU(A);CHKERRQ(ierr); 264e7e92044SBarry Smith } 265e4a0ef16SKarl Rupp PetscFunctionReturn(0); 266e4a0ef16SKarl Rupp } 267e4a0ef16SKarl Rupp 268e4a0ef16SKarl Rupp /* --------------------------------------------------------------------------------*/ 269cab5ea25SPierre Jolivet /*@C 270e4a0ef16SKarl Rupp MatCreateSeqAIJViennaCL - Creates a sparse matrix in AIJ (compressed row) format 27119fddfadSKarl Rupp (the default parallel PETSc format). This matrix will ultimately be pushed down 272e4a0ef16SKarl Rupp to GPUs and use the ViennaCL library for calculations. For good matrix 273e4a0ef16SKarl Rupp assembly performance the user should preallocate the matrix storage by setting 274e4a0ef16SKarl Rupp the parameter nz (or the array nnz). By setting these parameters accurately, 275e4a0ef16SKarl Rupp performance during matrix assembly can be increased substantially. 276e4a0ef16SKarl Rupp 277d083f849SBarry Smith Collective 278e4a0ef16SKarl Rupp 279e4a0ef16SKarl Rupp Input Parameters: 280e4a0ef16SKarl Rupp + comm - MPI communicator, set to PETSC_COMM_SELF 281e4a0ef16SKarl Rupp . m - number of rows 282e4a0ef16SKarl Rupp . n - number of columns 283e4a0ef16SKarl Rupp . nz - number of nonzeros per row (same for all rows) 284e4a0ef16SKarl Rupp - nnz - array containing the number of nonzeros in the various rows 285e4a0ef16SKarl Rupp (possibly different for each row) or NULL 286e4a0ef16SKarl Rupp 287e4a0ef16SKarl Rupp Output Parameter: 288e4a0ef16SKarl Rupp . A - the matrix 289e4a0ef16SKarl Rupp 290e4a0ef16SKarl Rupp It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 291e4a0ef16SKarl Rupp MatXXXXSetPreallocation() paradigm instead of this routine directly. 292e4a0ef16SKarl Rupp [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 293e4a0ef16SKarl Rupp 294e4a0ef16SKarl Rupp Notes: 295e4a0ef16SKarl Rupp If nnz is given then nz is ignored 296e4a0ef16SKarl Rupp 297e4a0ef16SKarl Rupp The AIJ format (also called the Yale sparse matrix format or 298e4a0ef16SKarl Rupp compressed row storage), is fully compatible with standard Fortran 77 299e4a0ef16SKarl Rupp storage. That is, the stored row and column indices can begin at 300e4a0ef16SKarl Rupp either one (as in Fortran) or zero. See the users' manual for details. 301e4a0ef16SKarl Rupp 302e4a0ef16SKarl Rupp Specify the preallocated storage with either nz or nnz (not both). 303e4a0ef16SKarl Rupp Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 304e4a0ef16SKarl Rupp allocation. For large problems you MUST preallocate memory or you 305e4a0ef16SKarl Rupp will get TERRIBLE performance, see the users' manual chapter on matrices. 306e4a0ef16SKarl Rupp 307e4a0ef16SKarl Rupp Level: intermediate 308e4a0ef16SKarl Rupp 309e9e886b6SKarl Rupp .seealso: MatCreate(), MatCreateAIJ(), MatCreateAIJCUSPARSE(), MatSetValues(), MatSeqAIJSetColumnIndices(), MatCreateSeqAIJWithArrays(), MatCreateAIJ() 310e4a0ef16SKarl Rupp 311e4a0ef16SKarl Rupp @*/ 312e4a0ef16SKarl Rupp PetscErrorCode MatCreateSeqAIJViennaCL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 313e4a0ef16SKarl Rupp { 314e4a0ef16SKarl Rupp PetscErrorCode ierr; 315e4a0ef16SKarl Rupp 316e4a0ef16SKarl Rupp PetscFunctionBegin; 317e4a0ef16SKarl Rupp ierr = MatCreate(comm,A);CHKERRQ(ierr); 318e4a0ef16SKarl Rupp ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 319e4a0ef16SKarl Rupp ierr = MatSetType(*A,MATSEQAIJVIENNACL);CHKERRQ(ierr); 320e4a0ef16SKarl Rupp ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,(PetscInt*)nnz);CHKERRQ(ierr); 321e4a0ef16SKarl Rupp PetscFunctionReturn(0); 322e4a0ef16SKarl Rupp } 323e4a0ef16SKarl Rupp 324e4a0ef16SKarl Rupp PetscErrorCode MatDestroy_SeqAIJViennaCL(Mat A) 325e4a0ef16SKarl Rupp { 326e4a0ef16SKarl Rupp PetscErrorCode ierr; 327e4a0ef16SKarl Rupp Mat_SeqAIJViennaCL *viennaclcontainer = (Mat_SeqAIJViennaCL*)A->spptr; 328e4a0ef16SKarl Rupp 329e4a0ef16SKarl Rupp PetscFunctionBegin; 330e4a0ef16SKarl Rupp try { 3316447cd05SKarl Rupp if (viennaclcontainer) { 3326447cd05SKarl Rupp delete viennaclcontainer->tempvec; 3336447cd05SKarl Rupp delete viennaclcontainer->mat; 3346447cd05SKarl Rupp delete viennaclcontainer->compressed_mat; 335e4a0ef16SKarl Rupp delete viennaclcontainer; 3366447cd05SKarl Rupp } 337c70f7ee4SJunchao Zhang A->offloadmask = PETSC_OFFLOAD_UNALLOCATED; 3384076e183SKarl Rupp } catch(std::exception const & ex) { 3394076e183SKarl Rupp SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what()); 340e4a0ef16SKarl Rupp } 3418713a8baSPatrick Sanan 3428713a8baSPatrick Sanan ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqaij_seqaijviennacl_C",NULL);CHKERRQ(ierr); 3434222ddf1SHong Zhang ierr = PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_seqaijviennacl_seqdense_C",NULL);CHKERRQ(ierr); 3444222ddf1SHong Zhang ierr = PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_seqaijviennacl_seqaij_C",NULL);CHKERRQ(ierr); 3458713a8baSPatrick Sanan 346e4a0ef16SKarl Rupp /* this next line is because MatDestroy tries to PetscFree spptr if it is not zero, and PetscFree only works if the memory was allocated with PetscNew or PetscMalloc, which don't call the constructor */ 347e4a0ef16SKarl Rupp A->spptr = 0; 348e4a0ef16SKarl Rupp ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 349e4a0ef16SKarl Rupp PetscFunctionReturn(0); 350e4a0ef16SKarl Rupp } 351e4a0ef16SKarl Rupp 352e4a0ef16SKarl Rupp PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJViennaCL(Mat B) 353e4a0ef16SKarl Rupp { 354e4a0ef16SKarl Rupp PetscErrorCode ierr; 355e4a0ef16SKarl Rupp 356e4a0ef16SKarl Rupp PetscFunctionBegin; 357e4a0ef16SKarl Rupp ierr = MatCreate_SeqAIJ(B);CHKERRQ(ierr); 3581e1ea65dSPierre Jolivet ierr = MatConvert_SeqAIJ_SeqAIJViennaCL(B,MATSEQAIJVIENNACL,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 3598713a8baSPatrick Sanan PetscFunctionReturn(0); 3608713a8baSPatrick Sanan } 3618713a8baSPatrick Sanan 362b470e4b4SRichard Tran Mills static PetscErrorCode MatBindToCPU_SeqAIJViennaCL(Mat,PetscBool); 363c3cca76eSKarl Rupp static PetscErrorCode MatDuplicate_SeqAIJViennaCL(Mat A,MatDuplicateOption cpvalues,Mat *B) 364c3cca76eSKarl Rupp { 365c3cca76eSKarl Rupp PetscErrorCode ierr; 366c3cca76eSKarl Rupp Mat C; 367c3cca76eSKarl Rupp 368c3cca76eSKarl Rupp PetscFunctionBegin; 369c3cca76eSKarl Rupp ierr = MatDuplicate_SeqAIJ(A,cpvalues,B);CHKERRQ(ierr); 370c3cca76eSKarl Rupp C = *B; 371c3cca76eSKarl Rupp 372b470e4b4SRichard Tran Mills ierr = MatBindToCPU_SeqAIJViennaCL(A,PETSC_FALSE);CHKERRQ(ierr); 37392f9df4aSRichard Tran Mills C->ops->bindtocpu = MatBindToCPU_SeqAIJViennaCL; 374c3cca76eSKarl Rupp 375c3cca76eSKarl Rupp C->spptr = new Mat_SeqAIJViennaCL(); 376c3cca76eSKarl Rupp ((Mat_SeqAIJViennaCL*)C->spptr)->tempvec = NULL; 377c3cca76eSKarl Rupp ((Mat_SeqAIJViennaCL*)C->spptr)->mat = NULL; 378c3cca76eSKarl Rupp ((Mat_SeqAIJViennaCL*)C->spptr)->compressed_mat = NULL; 379c3cca76eSKarl Rupp 380c3cca76eSKarl Rupp ierr = PetscObjectChangeTypeName((PetscObject)C,MATSEQAIJVIENNACL);CHKERRQ(ierr); 381c3cca76eSKarl Rupp 382c70f7ee4SJunchao Zhang C->offloadmask = PETSC_OFFLOAD_UNALLOCATED; 383c3cca76eSKarl Rupp 384c3cca76eSKarl Rupp /* If the source matrix is already assembled, copy the destination matrix to the GPU */ 385c3cca76eSKarl Rupp if (C->assembled) { 386c3cca76eSKarl Rupp ierr = MatViennaCLCopyToGPU(C);CHKERRQ(ierr); 387c3cca76eSKarl Rupp } 388c3cca76eSKarl Rupp 389c3cca76eSKarl Rupp PetscFunctionReturn(0); 390c3cca76eSKarl Rupp } 391c3cca76eSKarl Rupp 392f38c1e66SStefano Zampini static PetscErrorCode MatSeqAIJGetArray_SeqAIJViennaCL(Mat A,PetscScalar *array[]) 393f38c1e66SStefano Zampini { 394f38c1e66SStefano Zampini PetscErrorCode ierr; 395f38c1e66SStefano Zampini 396f38c1e66SStefano Zampini PetscFunctionBegin; 397f38c1e66SStefano Zampini ierr = MatViennaCLCopyFromGPU(A,(const ViennaCLAIJMatrix *)NULL);CHKERRQ(ierr); 398*c1a4f3baSJunchao Zhang *array = ((Mat_SeqAIJ*)A->data)->a; 399f38c1e66SStefano Zampini PetscFunctionReturn(0); 400f38c1e66SStefano Zampini } 401f38c1e66SStefano Zampini 402f38c1e66SStefano Zampini static PetscErrorCode MatSeqAIJRestoreArray_SeqAIJViennaCL(Mat A,PetscScalar *array[]) 403f38c1e66SStefano Zampini { 404f38c1e66SStefano Zampini PetscFunctionBegin; 405*c1a4f3baSJunchao Zhang A->offloadmask = PETSC_OFFLOAD_CPU; 406*c1a4f3baSJunchao Zhang *array = NULL; 407*c1a4f3baSJunchao Zhang PetscFunctionReturn(0); 408*c1a4f3baSJunchao Zhang } 409*c1a4f3baSJunchao Zhang 410*c1a4f3baSJunchao Zhang static PetscErrorCode MatSeqAIJGetArrayRead_SeqAIJViennaCL(Mat A,const PetscScalar *array[]) 411*c1a4f3baSJunchao Zhang { 412*c1a4f3baSJunchao Zhang PetscErrorCode ierr; 413*c1a4f3baSJunchao Zhang 414*c1a4f3baSJunchao Zhang PetscFunctionBegin; 415*c1a4f3baSJunchao Zhang ierr = MatViennaCLCopyFromGPU(A,(const ViennaCLAIJMatrix *)NULL);CHKERRQ(ierr); 416*c1a4f3baSJunchao Zhang *array = ((Mat_SeqAIJ*)A->data)->a; 417*c1a4f3baSJunchao Zhang PetscFunctionReturn(0); 418*c1a4f3baSJunchao Zhang } 419*c1a4f3baSJunchao Zhang 420*c1a4f3baSJunchao Zhang static PetscErrorCode MatSeqAIJRestoreArrayRead_SeqAIJViennaCL(Mat A,const PetscScalar *array[]) 421*c1a4f3baSJunchao Zhang { 422*c1a4f3baSJunchao Zhang PetscFunctionBegin; 423*c1a4f3baSJunchao Zhang *array = NULL; 424*c1a4f3baSJunchao Zhang /* No A->offloadmask = PETSC_OFFLOAD_CPU since if A->offloadmask was PETSC_OFFLOAD_BOTH, it is still BOTH */ 425*c1a4f3baSJunchao Zhang PetscFunctionReturn(0); 426*c1a4f3baSJunchao Zhang } 427*c1a4f3baSJunchao Zhang 428*c1a4f3baSJunchao Zhang static PetscErrorCode MatSeqAIJGetArrayWrite_SeqAIJViennaCL(Mat A,PetscScalar *array[]) 429*c1a4f3baSJunchao Zhang { 430*c1a4f3baSJunchao Zhang PetscFunctionBegin; 431*c1a4f3baSJunchao Zhang *array = ((Mat_SeqAIJ*)A->data)->a; 432*c1a4f3baSJunchao Zhang PetscFunctionReturn(0); 433*c1a4f3baSJunchao Zhang } 434*c1a4f3baSJunchao Zhang 435*c1a4f3baSJunchao Zhang static PetscErrorCode MatSeqAIJRestoreArrayWrite_SeqAIJViennaCL(Mat A,PetscScalar *array[]) 436*c1a4f3baSJunchao Zhang { 437*c1a4f3baSJunchao Zhang PetscFunctionBegin; 438*c1a4f3baSJunchao Zhang A->offloadmask = PETSC_OFFLOAD_CPU; 439f38c1e66SStefano Zampini *array = NULL; 440f38c1e66SStefano Zampini PetscFunctionReturn(0); 441f38c1e66SStefano Zampini } 442f38c1e66SStefano Zampini 443b470e4b4SRichard Tran Mills static PetscErrorCode MatBindToCPU_SeqAIJViennaCL(Mat A,PetscBool flg) 444e7e92044SBarry Smith { 445*c1a4f3baSJunchao Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 446f38c1e66SStefano Zampini PetscErrorCode ierr; 447f38c1e66SStefano Zampini 448e7e92044SBarry Smith PetscFunctionBegin; 449b470e4b4SRichard Tran Mills A->boundtocpu = flg; 450*c1a4f3baSJunchao Zhang if (flg && a->inode.size) { 451*c1a4f3baSJunchao Zhang a->inode.use = PETSC_TRUE; 452ea500dcfSRichard Tran Mills } else { 453*c1a4f3baSJunchao Zhang a->inode.use = PETSC_FALSE; 454ea500dcfSRichard Tran Mills } 455e7e92044SBarry Smith if (flg) { 456f38c1e66SStefano Zampini /* make sure we have an up-to-date copy on the CPU */ 457f38c1e66SStefano Zampini ierr = MatViennaCLCopyFromGPU(A,(const ViennaCLAIJMatrix *)NULL);CHKERRQ(ierr); 458e7e92044SBarry Smith A->ops->mult = MatMult_SeqAIJ; 459e7e92044SBarry Smith A->ops->multadd = MatMultAdd_SeqAIJ; 460e7e92044SBarry Smith A->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 461e7e92044SBarry Smith A->ops->duplicate = MatDuplicate_SeqAIJ; 462*c1a4f3baSJunchao Zhang ierr = PetscMemzero(a->ops,sizeof(Mat_SeqAIJOps));CHKERRQ(ierr); 463e7e92044SBarry Smith } else { 464e7e92044SBarry Smith A->ops->mult = MatMult_SeqAIJViennaCL; 465e7e92044SBarry Smith A->ops->multadd = MatMultAdd_SeqAIJViennaCL; 466e7e92044SBarry Smith A->ops->assemblyend = MatAssemblyEnd_SeqAIJViennaCL; 467e7e92044SBarry Smith A->ops->destroy = MatDestroy_SeqAIJViennaCL; 468e7e92044SBarry Smith A->ops->duplicate = MatDuplicate_SeqAIJViennaCL; 469*c1a4f3baSJunchao Zhang 470*c1a4f3baSJunchao Zhang a->ops->getarray = MatSeqAIJGetArray_SeqAIJViennaCL; 471*c1a4f3baSJunchao Zhang a->ops->restorearray = MatSeqAIJRestoreArray_SeqAIJViennaCL; 472*c1a4f3baSJunchao Zhang a->ops->getarrayread = MatSeqAIJGetArrayRead_SeqAIJViennaCL; 473*c1a4f3baSJunchao Zhang a->ops->restorearrayread = MatSeqAIJRestoreArrayRead_SeqAIJViennaCL; 474*c1a4f3baSJunchao Zhang a->ops->getarraywrite = MatSeqAIJGetArrayWrite_SeqAIJViennaCL; 475*c1a4f3baSJunchao Zhang a->ops->restorearraywrite = MatSeqAIJRestoreArrayWrite_SeqAIJViennaCL; 476e7e92044SBarry Smith } 477e7e92044SBarry Smith PetscFunctionReturn(0); 478e7e92044SBarry Smith } 479e7e92044SBarry Smith 4808713a8baSPatrick Sanan PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJViennaCL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 4818713a8baSPatrick Sanan { 4828713a8baSPatrick Sanan PetscErrorCode ierr; 4838713a8baSPatrick Sanan Mat B; 4848713a8baSPatrick Sanan 4858713a8baSPatrick Sanan PetscFunctionBegin; 4868713a8baSPatrick Sanan 487ea13f565SStefano Zampini if (reuse == MAT_REUSE_MATRIX) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MAT_REUSE_MATRIX is not supported. Consider using MAT_INPLACE_MATRIX instead"); 4888713a8baSPatrick Sanan 4898713a8baSPatrick Sanan if (reuse == MAT_INITIAL_MATRIX) { 4908713a8baSPatrick Sanan ierr = MatDuplicate(A,MAT_COPY_VALUES,newmat);CHKERRQ(ierr); 4918713a8baSPatrick Sanan } 4928713a8baSPatrick Sanan 4938713a8baSPatrick Sanan B = *newmat; 4948713a8baSPatrick Sanan 495e4a0ef16SKarl Rupp B->spptr = new Mat_SeqAIJViennaCL(); 496e4a0ef16SKarl Rupp 497a3430c56SKarl Rupp ((Mat_SeqAIJViennaCL*)B->spptr)->tempvec = NULL; 498a3430c56SKarl Rupp ((Mat_SeqAIJViennaCL*)B->spptr)->mat = NULL; 499a3430c56SKarl Rupp ((Mat_SeqAIJViennaCL*)B->spptr)->compressed_mat = NULL; 500e4a0ef16SKarl Rupp 501b470e4b4SRichard Tran Mills ierr = MatBindToCPU_SeqAIJViennaCL(A,PETSC_FALSE);CHKERRQ(ierr); 50292f9df4aSRichard Tran Mills A->ops->bindtocpu = MatBindToCPU_SeqAIJViennaCL; 503e4a0ef16SKarl Rupp 504e4a0ef16SKarl Rupp ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJVIENNACL);CHKERRQ(ierr); 50534136279SStefano Zampini ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr); 50634136279SStefano Zampini ierr = PetscStrallocpy(VECVIENNACL,&B->defaultvectype);CHKERRQ(ierr); 507e4a0ef16SKarl Rupp 5088713a8baSPatrick Sanan ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqaij_seqaijviennacl_C",MatConvert_SeqAIJ_SeqAIJViennaCL);CHKERRQ(ierr); 5094222ddf1SHong Zhang ierr = PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_seqaijviennacl_seqdense_C",MatProductSetFromOptions_SeqAIJ_SeqDense);CHKERRQ(ierr); 5104222ddf1SHong Zhang ierr = PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_seqaijviennacl_seqaij_C",MatProductSetFromOptions_SeqAIJ);CHKERRQ(ierr); 5118713a8baSPatrick Sanan 512c70f7ee4SJunchao Zhang B->offloadmask = PETSC_OFFLOAD_UNALLOCATED; 5138713a8baSPatrick Sanan 5148713a8baSPatrick Sanan /* If the source matrix is already assembled, copy the destination matrix to the GPU */ 5158713a8baSPatrick Sanan if (B->assembled) { 5168713a8baSPatrick Sanan ierr = MatViennaCLCopyToGPU(B);CHKERRQ(ierr); 5178713a8baSPatrick Sanan } 5188713a8baSPatrick Sanan 519e4a0ef16SKarl Rupp PetscFunctionReturn(0); 520e4a0ef16SKarl Rupp } 521e4a0ef16SKarl Rupp 5223ca39a21SBarry Smith /*MC 523e4a0ef16SKarl Rupp MATSEQAIJVIENNACL - MATAIJVIENNACL = "aijviennacl" = "seqaijviennacl" - A matrix type to be used for sparse matrices. 524e4a0ef16SKarl Rupp 525e4a0ef16SKarl Rupp A matrix type type whose data resides on GPUs. These matrices are in CSR format by 526e4a0ef16SKarl Rupp default. All matrix calculations are performed using the ViennaCL library. 527e4a0ef16SKarl Rupp 528e4a0ef16SKarl Rupp Options Database Keys: 529e4a0ef16SKarl Rupp + -mat_type aijviennacl - sets the matrix type to "seqaijviennacl" during a call to MatSetFromOptions() 530e4a0ef16SKarl Rupp . -mat_viennacl_storage_format csr - sets the storage format of matrices for MatMult during a call to MatSetFromOptions(). 531e4a0ef16SKarl Rupp - -mat_viennacl_mult_storage_format csr - sets the storage format of matrices for MatMult during a call to MatSetFromOptions(). 532e4a0ef16SKarl Rupp 533e4a0ef16SKarl Rupp Level: beginner 534e4a0ef16SKarl Rupp 535e4a0ef16SKarl Rupp .seealso: MatCreateSeqAIJViennaCL(), MATAIJVIENNACL, MatCreateAIJViennaCL() 536e4a0ef16SKarl Rupp M*/ 537e4a0ef16SKarl Rupp 5383ca39a21SBarry Smith PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ViennaCL(void) 53972367587SKarl Rupp { 54072367587SKarl Rupp PetscErrorCode ierr; 54172367587SKarl Rupp 54272367587SKarl Rupp PetscFunctionBegin; 5433ca39a21SBarry Smith ierr = MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_LU,MatGetFactor_seqaij_petsc);CHKERRQ(ierr); 5443ca39a21SBarry Smith ierr = MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_CHOLESKY,MatGetFactor_seqaij_petsc);CHKERRQ(ierr); 5453ca39a21SBarry Smith ierr = MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_ILU,MatGetFactor_seqaij_petsc);CHKERRQ(ierr); 5463ca39a21SBarry Smith ierr = MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_ICC,MatGetFactor_seqaij_petsc);CHKERRQ(ierr); 54772367587SKarl Rupp PetscFunctionReturn(0); 54872367587SKarl Rupp } 549