1e4a0ef16SKarl Rupp 2e4a0ef16SKarl Rupp 3e4a0ef16SKarl Rupp /* 4e4a0ef16SKarl Rupp Defines the basic matrix operations for the AIJ (compressed row) 5e4a0ef16SKarl Rupp matrix storage format. 6e4a0ef16SKarl Rupp */ 7e4a0ef16SKarl Rupp 8aaa7dc30SBarry Smith #include <petscconf.h> 9aaa7dc30SBarry Smith #include <../src/mat/impls/aij/seq/aij.h> /*I "petscmat.h" I*/ 10aaa7dc30SBarry Smith #include <petscbt.h> 11aaa7dc30SBarry Smith #include <../src/vec/vec/impls/dvecimpl.h> 12af0996ceSBarry Smith #include <petsc/private/vecimpl.h> 13e4a0ef16SKarl Rupp 14aaa7dc30SBarry Smith #include <../src/mat/impls/aij/seq/seqviennacl/viennaclmatimpl.h> 15e4a0ef16SKarl Rupp 16e4a0ef16SKarl Rupp 17e4a0ef16SKarl Rupp #include <algorithm> 18e4a0ef16SKarl Rupp #include <vector> 19e4a0ef16SKarl Rupp #include <string> 20e4a0ef16SKarl Rupp 21e4a0ef16SKarl Rupp #include "viennacl/linalg/prod.hpp" 22e4a0ef16SKarl Rupp 238713a8baSPatrick Sanan PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJViennaCL(Mat A, MatType type, MatReuse reuse, Mat *newmat); 2472367587SKarl Rupp PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat,MatFactorType,Mat*); 2572367587SKarl Rupp 268713a8baSPatrick Sanan 27e4a0ef16SKarl Rupp PetscErrorCode MatViennaCLCopyToGPU(Mat A) 28e4a0ef16SKarl Rupp { 29e4a0ef16SKarl Rupp 30e4a0ef16SKarl Rupp Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr; 31e4a0ef16SKarl Rupp Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 32e4a0ef16SKarl Rupp PetscErrorCode ierr; 33e4a0ef16SKarl Rupp 34e4a0ef16SKarl Rupp PetscFunctionBegin; 35bf1781e8SStefano Zampini if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) { //some OpenCL SDKs have issues with buffers of size 0 36b8ced49eSKarl Rupp if (A->valid_GPU_matrix == PETSC_OFFLOAD_UNALLOCATED || A->valid_GPU_matrix == PETSC_OFFLOAD_CPU) { 37e4a0ef16SKarl Rupp ierr = PetscLogEventBegin(MAT_ViennaCLCopyToGPU,A,0,0,0);CHKERRQ(ierr); 38e4a0ef16SKarl Rupp 39e4a0ef16SKarl Rupp try { 40e4a0ef16SKarl Rupp if (a->compressedrow.use) { 41a3430c56SKarl Rupp if (!viennaclstruct->compressed_mat) viennaclstruct->compressed_mat = new ViennaCLCompressedAIJMatrix(); 42e4a0ef16SKarl Rupp 43a3430c56SKarl Rupp // Since PetscInt is different from cl_uint, we have to convert: 44a3430c56SKarl Rupp viennacl::backend::mem_handle dummy; 45e4a0ef16SKarl Rupp 46a3430c56SKarl Rupp viennacl::backend::typesafe_host_array<unsigned int> row_buffer; row_buffer.raw_resize(dummy, a->compressedrow.nrows+1); 47a3430c56SKarl Rupp for (PetscInt i=0; i<=a->compressedrow.nrows; ++i) 48a3430c56SKarl Rupp row_buffer.set(i, (a->compressedrow.i)[i]); 49e4a0ef16SKarl Rupp 50a3430c56SKarl Rupp viennacl::backend::typesafe_host_array<unsigned int> row_indices; row_indices.raw_resize(dummy, a->compressedrow.nrows); 51a3430c56SKarl Rupp for (PetscInt i=0; i<a->compressedrow.nrows; ++i) 52a3430c56SKarl Rupp row_indices.set(i, (a->compressedrow.rindex)[i]); 53a3430c56SKarl Rupp 54a3430c56SKarl Rupp viennacl::backend::typesafe_host_array<unsigned int> col_buffer; col_buffer.raw_resize(dummy, a->nz); 55a3430c56SKarl Rupp for (PetscInt i=0; i<a->nz; ++i) 56a3430c56SKarl Rupp col_buffer.set(i, (a->j)[i]); 57a3430c56SKarl Rupp 58a3430c56SKarl Rupp viennaclstruct->compressed_mat->set(row_buffer.get(), row_indices.get(), col_buffer.get(), a->a, A->rmap->n, A->cmap->n, a->compressedrow.nrows, a->nz); 59*4863603aSSatish Balay ierr = PetscLogCpuToGpu(((2*a->compressedrow.nrows)+1+a->nz)*sizeof(PetscInt) + (a->nz)*sizeof(PetscScalar));CHKERRQ(ierr); 60e4a0ef16SKarl Rupp } else { 61a3430c56SKarl Rupp if (!viennaclstruct->mat) viennaclstruct->mat = new ViennaCLAIJMatrix(); 62e4a0ef16SKarl Rupp 63e4a0ef16SKarl Rupp // Since PetscInt is in general different from cl_uint, we have to convert: 64e4a0ef16SKarl Rupp viennacl::backend::mem_handle dummy; 65e4a0ef16SKarl Rupp 66e4a0ef16SKarl Rupp viennacl::backend::typesafe_host_array<unsigned int> row_buffer; row_buffer.raw_resize(dummy, A->rmap->n+1); 67e4a0ef16SKarl Rupp for (PetscInt i=0; i<=A->rmap->n; ++i) 68e4a0ef16SKarl Rupp row_buffer.set(i, (a->i)[i]); 69e4a0ef16SKarl Rupp 70e4a0ef16SKarl Rupp viennacl::backend::typesafe_host_array<unsigned int> col_buffer; col_buffer.raw_resize(dummy, a->nz); 71e4a0ef16SKarl Rupp for (PetscInt i=0; i<a->nz; ++i) 72e4a0ef16SKarl Rupp col_buffer.set(i, (a->j)[i]); 73e4a0ef16SKarl Rupp 74e4a0ef16SKarl Rupp viennaclstruct->mat->set(row_buffer.get(), col_buffer.get(), a->a, A->rmap->n, A->cmap->n, a->nz); 75*4863603aSSatish Balay ierr = PetscLogCpuToGpu(((A->rmap->n+1)+a->nz)*sizeof(PetscInt)+(a->nz)*sizeof(PetscScalar));CHKERRQ(ierr); 76e4a0ef16SKarl Rupp } 774cf1874eSKarl Rupp ViennaCLWaitForGPU(); 784076e183SKarl Rupp } catch(std::exception const & ex) { 794076e183SKarl Rupp SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what()); 80e4a0ef16SKarl Rupp } 81e4a0ef16SKarl Rupp 82a3430c56SKarl Rupp // Create temporary vector for v += A*x: 83a3430c56SKarl Rupp if (viennaclstruct->tempvec) { 849b66742cSDave May if (viennaclstruct->tempvec->size() != static_cast<std::size_t>(A->rmap->n)) { 85a3430c56SKarl Rupp delete (ViennaCLVector*)viennaclstruct->tempvec; 869b66742cSDave May viennaclstruct->tempvec = new ViennaCLVector(A->rmap->n); 87a3430c56SKarl Rupp } else { 88a3430c56SKarl Rupp viennaclstruct->tempvec->clear(); 89a3430c56SKarl Rupp } 90a3430c56SKarl Rupp } else { 919b66742cSDave May viennaclstruct->tempvec = new ViennaCLVector(A->rmap->n); 92a3430c56SKarl Rupp } 93a3430c56SKarl Rupp 94b8ced49eSKarl Rupp A->valid_GPU_matrix = PETSC_OFFLOAD_BOTH; 95e4a0ef16SKarl Rupp 96e4a0ef16SKarl Rupp ierr = PetscLogEventEnd(MAT_ViennaCLCopyToGPU,A,0,0,0);CHKERRQ(ierr); 97e4a0ef16SKarl Rupp } 9867c87b7fSKarl Rupp } 99e4a0ef16SKarl Rupp PetscFunctionReturn(0); 100e4a0ef16SKarl Rupp } 101e4a0ef16SKarl Rupp 1020d73d530SKarl Rupp PetscErrorCode MatViennaCLCopyFromGPU(Mat A, const ViennaCLAIJMatrix *Agpu) 103e4a0ef16SKarl Rupp { 104e4a0ef16SKarl Rupp Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 105e4a0ef16SKarl Rupp PetscInt m = A->rmap->n; 106e4a0ef16SKarl Rupp PetscErrorCode ierr; 107e4a0ef16SKarl Rupp 108e4a0ef16SKarl Rupp 109e4a0ef16SKarl Rupp PetscFunctionBegin; 110b8ced49eSKarl Rupp if (A->valid_GPU_matrix == PETSC_OFFLOAD_UNALLOCATED) { 111e4a0ef16SKarl Rupp try { 1126c4ed002SBarry Smith if (a->compressedrow.use) SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_ARG_WRONG, "ViennaCL: Cannot handle row compression for GPU matrices"); 1136c4ed002SBarry Smith else { 114e4a0ef16SKarl Rupp 115e4a0ef16SKarl Rupp if ((PetscInt)Agpu->size1() != m) SETERRQ2(PETSC_COMM_WORLD, PETSC_ERR_ARG_SIZ, "GPU matrix has %d rows, should be %d", Agpu->size1(), m); 116e4a0ef16SKarl Rupp a->nz = Agpu->nnz(); 117e4a0ef16SKarl Rupp a->maxnz = a->nz; /* Since we allocate exactly the right amount */ 118e4a0ef16SKarl Rupp A->preallocated = PETSC_TRUE; 119e4a0ef16SKarl Rupp if (a->singlemalloc) { 120e4a0ef16SKarl Rupp if (a->a) {ierr = PetscFree3(a->a,a->j,a->i);CHKERRQ(ierr);} 121e4a0ef16SKarl Rupp } else { 122e4a0ef16SKarl Rupp if (a->i) {ierr = PetscFree(a->i);CHKERRQ(ierr);} 123e4a0ef16SKarl Rupp if (a->j) {ierr = PetscFree(a->j);CHKERRQ(ierr);} 124e4a0ef16SKarl Rupp if (a->a) {ierr = PetscFree(a->a);CHKERRQ(ierr);} 125e4a0ef16SKarl Rupp } 126dcca6d9dSJed Brown ierr = PetscMalloc3(a->nz,&a->a,a->nz,&a->j,m+1,&a->i);CHKERRQ(ierr); 127f7daeb2aSKarl Rupp ierr = PetscLogObjectMemory((PetscObject)A, a->nz*(sizeof(PetscScalar)+sizeof(PetscInt))+(m+1)*sizeof(PetscInt));CHKERRQ(ierr); 128e4a0ef16SKarl Rupp 129e4a0ef16SKarl Rupp a->singlemalloc = PETSC_TRUE; 130e4a0ef16SKarl Rupp 131e4a0ef16SKarl Rupp /* Setup row lengths */ 132e4a0ef16SKarl Rupp if (a->imax) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 133dcca6d9dSJed Brown ierr = PetscMalloc2(m,&a->imax,m,&a->ilen);CHKERRQ(ierr); 134f7daeb2aSKarl Rupp ierr = PetscLogObjectMemory((PetscObject)A, 2*m*sizeof(PetscInt));CHKERRQ(ierr); 135e4a0ef16SKarl Rupp 136e4a0ef16SKarl Rupp /* Copy data back from GPU */ 137e4a0ef16SKarl Rupp viennacl::backend::typesafe_host_array<unsigned int> row_buffer; row_buffer.raw_resize(Agpu->handle1(), Agpu->size1() + 1); 138e4a0ef16SKarl Rupp 139e4a0ef16SKarl Rupp // copy row array 140e4a0ef16SKarl Rupp viennacl::backend::memory_read(Agpu->handle1(), 0, row_buffer.raw_size(), row_buffer.get()); 141e4a0ef16SKarl Rupp (a->i)[0] = row_buffer[0]; 142e4a0ef16SKarl Rupp for (PetscInt i = 0; i < (PetscInt)Agpu->size1(); ++i) { 143e4a0ef16SKarl Rupp (a->i)[i+1] = row_buffer[i+1]; 144e4a0ef16SKarl Rupp a->imax[i] = a->ilen[i] = a->i[i+1] - a->i[i]; //Set imax[] and ilen[] arrays at the same time as i[] for better cache reuse 145e4a0ef16SKarl Rupp } 146e4a0ef16SKarl Rupp 147e4a0ef16SKarl Rupp // copy column indices 148e4a0ef16SKarl Rupp viennacl::backend::typesafe_host_array<unsigned int> col_buffer; col_buffer.raw_resize(Agpu->handle2(), Agpu->nnz()); 149e4a0ef16SKarl Rupp viennacl::backend::memory_read(Agpu->handle2(), 0, col_buffer.raw_size(), col_buffer.get()); 150e4a0ef16SKarl Rupp for (PetscInt i=0; i < (PetscInt)Agpu->nnz(); ++i) 151e4a0ef16SKarl Rupp (a->j)[i] = col_buffer[i]; 152e4a0ef16SKarl Rupp 153e4a0ef16SKarl Rupp // copy nonzero entries directly to destination (no conversion required) 154e4a0ef16SKarl Rupp viennacl::backend::memory_read(Agpu->handle(), 0, sizeof(PetscScalar)*Agpu->nnz(), a->a); 155e4a0ef16SKarl Rupp 156*4863603aSSatish Balay ierr = PetscLogGpuToCpu(row_buffer.raw_size()+col_buffer.raw_size()+(Agpu->nnz()*sizeof(PetscScalar)));CHKERRQ(ierr); 1574cf1874eSKarl Rupp ViennaCLWaitForGPU(); 158023073b3SKarl Rupp /* TODO: Once a->diag is moved out of MatAssemblyEnd(), invalidate it here. */ 159e4a0ef16SKarl Rupp } 1604076e183SKarl Rupp } catch(std::exception const & ex) { 1614076e183SKarl Rupp SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what()); 162e4a0ef16SKarl Rupp } 163e4a0ef16SKarl Rupp 164b8ced49eSKarl Rupp /* This assembly prevents resetting the flag to PETSC_OFFLOAD_CPU and recopying */ 165e4a0ef16SKarl Rupp ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 166e4a0ef16SKarl Rupp ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 167e4a0ef16SKarl Rupp 168b8ced49eSKarl Rupp A->valid_GPU_matrix = PETSC_OFFLOAD_BOTH; 1696c4ed002SBarry Smith } else SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_ARG_WRONG, "ViennaCL error: Only valid for unallocated GPU matrices"); 170e4a0ef16SKarl Rupp PetscFunctionReturn(0); 171e4a0ef16SKarl Rupp } 172e4a0ef16SKarl Rupp 173e4a0ef16SKarl Rupp PetscErrorCode MatMult_SeqAIJViennaCL(Mat A,Vec xx,Vec yy) 174e4a0ef16SKarl Rupp { 175e4a0ef16SKarl Rupp Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 176e4a0ef16SKarl Rupp PetscErrorCode ierr; 177e4a0ef16SKarl Rupp Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr; 1780d73d530SKarl Rupp const ViennaCLVector *xgpu=NULL; 1790d73d530SKarl Rupp ViennaCLVector *ygpu=NULL; 180e4a0ef16SKarl Rupp 181e4a0ef16SKarl Rupp PetscFunctionBegin; 182bf1781e8SStefano Zampini if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) { 183e4a0ef16SKarl Rupp ierr = VecViennaCLGetArrayRead(xx,&xgpu);CHKERRQ(ierr); 184e4a0ef16SKarl Rupp ierr = VecViennaCLGetArrayWrite(yy,&ygpu);CHKERRQ(ierr); 185e4a0ef16SKarl Rupp try { 186b7832b47SStefano Zampini if (a->compressedrow.use) { 187b7832b47SStefano Zampini *ygpu = viennacl::linalg::prod(*viennaclstruct->compressed_mat, *xgpu); 188b7832b47SStefano Zampini } else { 189e4a0ef16SKarl Rupp *ygpu = viennacl::linalg::prod(*viennaclstruct->mat,*xgpu); 190b7832b47SStefano Zampini } 1914cf1874eSKarl Rupp ViennaCLWaitForGPU(); 1924076e183SKarl Rupp } catch (std::exception const & ex) { 1934076e183SKarl Rupp SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what()); 194e4a0ef16SKarl Rupp } 195e4a0ef16SKarl Rupp ierr = VecViennaCLRestoreArrayRead(xx,&xgpu);CHKERRQ(ierr); 196e4a0ef16SKarl Rupp ierr = VecViennaCLRestoreArrayWrite(yy,&ygpu);CHKERRQ(ierr); 1979b66742cSDave May ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 198bf1781e8SStefano Zampini } else { 199bf1781e8SStefano Zampini ierr = VecSet(yy,0);CHKERRQ(ierr); 20067c87b7fSKarl Rupp } 201e4a0ef16SKarl Rupp PetscFunctionReturn(0); 202e4a0ef16SKarl Rupp } 203e4a0ef16SKarl Rupp 204e4a0ef16SKarl Rupp PetscErrorCode MatMultAdd_SeqAIJViennaCL(Mat A,Vec xx,Vec yy,Vec zz) 205e4a0ef16SKarl Rupp { 206e4a0ef16SKarl Rupp Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 207e4a0ef16SKarl Rupp PetscErrorCode ierr; 208e4a0ef16SKarl Rupp Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr; 2090d73d530SKarl Rupp const ViennaCLVector *xgpu=NULL,*ygpu=NULL; 2100d73d530SKarl Rupp ViennaCLVector *zgpu=NULL; 211e4a0ef16SKarl Rupp 212e4a0ef16SKarl Rupp PetscFunctionBegin; 213bf1781e8SStefano Zampini if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) { 214e4a0ef16SKarl Rupp try { 215e4a0ef16SKarl Rupp ierr = VecViennaCLGetArrayRead(xx,&xgpu);CHKERRQ(ierr); 216e4a0ef16SKarl Rupp ierr = VecViennaCLGetArrayRead(yy,&ygpu);CHKERRQ(ierr); 217e4a0ef16SKarl Rupp ierr = VecViennaCLGetArrayWrite(zz,&zgpu);CHKERRQ(ierr); 218e4a0ef16SKarl Rupp 219e4a0ef16SKarl Rupp if (a->compressedrow.use) { 220a3430c56SKarl Rupp ViennaCLVector temp = viennacl::linalg::prod(*viennaclstruct->compressed_mat, *xgpu); 221e4a0ef16SKarl Rupp *zgpu = *ygpu + temp; 2224cf1874eSKarl Rupp ViennaCLWaitForGPU(); 223e4a0ef16SKarl Rupp } else { 224a3430c56SKarl Rupp if (zz == xx || zz == yy) { //temporary required 225a3430c56SKarl Rupp ViennaCLVector temp = viennacl::linalg::prod(*viennaclstruct->mat, *xgpu); 226a3430c56SKarl Rupp *zgpu = *ygpu; 227a3430c56SKarl Rupp *zgpu += temp; 228a3430c56SKarl Rupp ViennaCLWaitForGPU(); 229a3430c56SKarl Rupp } else { 230a3430c56SKarl Rupp *viennaclstruct->tempvec = viennacl::linalg::prod(*viennaclstruct->mat, *xgpu); 231a3430c56SKarl Rupp *zgpu = *ygpu + *viennaclstruct->tempvec; 2324cf1874eSKarl Rupp ViennaCLWaitForGPU(); 233e4a0ef16SKarl Rupp } 234e4a0ef16SKarl Rupp } 235e4a0ef16SKarl Rupp 236e4a0ef16SKarl Rupp ierr = VecViennaCLRestoreArrayRead(xx,&xgpu);CHKERRQ(ierr); 237e4a0ef16SKarl Rupp ierr = VecViennaCLRestoreArrayRead(yy,&ygpu);CHKERRQ(ierr); 238e4a0ef16SKarl Rupp ierr = VecViennaCLRestoreArrayWrite(zz,&zgpu);CHKERRQ(ierr); 239e4a0ef16SKarl Rupp 2404076e183SKarl Rupp } catch(std::exception const & ex) { 2414076e183SKarl Rupp SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what()); 242e4a0ef16SKarl Rupp } 243e4a0ef16SKarl Rupp ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 244bf1781e8SStefano Zampini } else { 245bf1781e8SStefano Zampini ierr = VecCopy(yy,zz);CHKERRQ(ierr); 24667c87b7fSKarl Rupp } 247e4a0ef16SKarl Rupp PetscFunctionReturn(0); 248e4a0ef16SKarl Rupp } 249e4a0ef16SKarl Rupp 250e4a0ef16SKarl Rupp PetscErrorCode MatAssemblyEnd_SeqAIJViennaCL(Mat A,MatAssemblyType mode) 251e4a0ef16SKarl Rupp { 252e4a0ef16SKarl Rupp PetscErrorCode ierr; 253e4a0ef16SKarl Rupp 254e4a0ef16SKarl Rupp PetscFunctionBegin; 255e4a0ef16SKarl Rupp ierr = MatAssemblyEnd_SeqAIJ(A,mode);CHKERRQ(ierr); 256e7e92044SBarry Smith if (!A->pinnedtocpu) { 257e4a0ef16SKarl Rupp ierr = MatViennaCLCopyToGPU(A);CHKERRQ(ierr); 258e7e92044SBarry Smith } 259e4a0ef16SKarl Rupp PetscFunctionReturn(0); 260e4a0ef16SKarl Rupp } 261e4a0ef16SKarl Rupp 262e4a0ef16SKarl Rupp /* --------------------------------------------------------------------------------*/ 263e4a0ef16SKarl Rupp /*@ 264e4a0ef16SKarl Rupp MatCreateSeqAIJViennaCL - Creates a sparse matrix in AIJ (compressed row) format 26519fddfadSKarl Rupp (the default parallel PETSc format). This matrix will ultimately be pushed down 266e4a0ef16SKarl Rupp to GPUs and use the ViennaCL library for calculations. For good matrix 267e4a0ef16SKarl Rupp assembly performance the user should preallocate the matrix storage by setting 268e4a0ef16SKarl Rupp the parameter nz (or the array nnz). By setting these parameters accurately, 269e4a0ef16SKarl Rupp performance during matrix assembly can be increased substantially. 270e4a0ef16SKarl Rupp 271e4a0ef16SKarl Rupp 272d083f849SBarry Smith Collective 273e4a0ef16SKarl Rupp 274e4a0ef16SKarl Rupp Input Parameters: 275e4a0ef16SKarl Rupp + comm - MPI communicator, set to PETSC_COMM_SELF 276e4a0ef16SKarl Rupp . m - number of rows 277e4a0ef16SKarl Rupp . n - number of columns 278e4a0ef16SKarl Rupp . nz - number of nonzeros per row (same for all rows) 279e4a0ef16SKarl Rupp - nnz - array containing the number of nonzeros in the various rows 280e4a0ef16SKarl Rupp (possibly different for each row) or NULL 281e4a0ef16SKarl Rupp 282e4a0ef16SKarl Rupp Output Parameter: 283e4a0ef16SKarl Rupp . A - the matrix 284e4a0ef16SKarl Rupp 285e4a0ef16SKarl Rupp It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 286e4a0ef16SKarl Rupp MatXXXXSetPreallocation() paradigm instead of this routine directly. 287e4a0ef16SKarl Rupp [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 288e4a0ef16SKarl Rupp 289e4a0ef16SKarl Rupp Notes: 290e4a0ef16SKarl Rupp If nnz is given then nz is ignored 291e4a0ef16SKarl Rupp 292e4a0ef16SKarl Rupp The AIJ format (also called the Yale sparse matrix format or 293e4a0ef16SKarl Rupp compressed row storage), is fully compatible with standard Fortran 77 294e4a0ef16SKarl Rupp storage. That is, the stored row and column indices can begin at 295e4a0ef16SKarl Rupp either one (as in Fortran) or zero. See the users' manual for details. 296e4a0ef16SKarl Rupp 297e4a0ef16SKarl Rupp Specify the preallocated storage with either nz or nnz (not both). 298e4a0ef16SKarl Rupp Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 299e4a0ef16SKarl Rupp allocation. For large problems you MUST preallocate memory or you 300e4a0ef16SKarl Rupp will get TERRIBLE performance, see the users' manual chapter on matrices. 301e4a0ef16SKarl Rupp 302e4a0ef16SKarl Rupp Level: intermediate 303e4a0ef16SKarl Rupp 304e9e886b6SKarl Rupp .seealso: MatCreate(), MatCreateAIJ(), MatCreateAIJCUSPARSE(), MatSetValues(), MatSeqAIJSetColumnIndices(), MatCreateSeqAIJWithArrays(), MatCreateAIJ() 305e4a0ef16SKarl Rupp 306e4a0ef16SKarl Rupp @*/ 307e4a0ef16SKarl Rupp PetscErrorCode MatCreateSeqAIJViennaCL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 308e4a0ef16SKarl Rupp { 309e4a0ef16SKarl Rupp PetscErrorCode ierr; 310e4a0ef16SKarl Rupp 311e4a0ef16SKarl Rupp PetscFunctionBegin; 312e4a0ef16SKarl Rupp ierr = MatCreate(comm,A);CHKERRQ(ierr); 313e4a0ef16SKarl Rupp ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 314e4a0ef16SKarl Rupp ierr = MatSetType(*A,MATSEQAIJVIENNACL);CHKERRQ(ierr); 315e4a0ef16SKarl Rupp ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,(PetscInt*)nnz);CHKERRQ(ierr); 316e4a0ef16SKarl Rupp PetscFunctionReturn(0); 317e4a0ef16SKarl Rupp } 318e4a0ef16SKarl Rupp 319e4a0ef16SKarl Rupp 320e4a0ef16SKarl Rupp PetscErrorCode MatDestroy_SeqAIJViennaCL(Mat A) 321e4a0ef16SKarl Rupp { 322e4a0ef16SKarl Rupp PetscErrorCode ierr; 323e4a0ef16SKarl Rupp Mat_SeqAIJViennaCL *viennaclcontainer = (Mat_SeqAIJViennaCL*)A->spptr; 324e4a0ef16SKarl Rupp 325e4a0ef16SKarl Rupp PetscFunctionBegin; 326e4a0ef16SKarl Rupp try { 3276447cd05SKarl Rupp if (viennaclcontainer) { 3286447cd05SKarl Rupp delete viennaclcontainer->tempvec; 3296447cd05SKarl Rupp delete viennaclcontainer->mat; 3306447cd05SKarl Rupp delete viennaclcontainer->compressed_mat; 331e4a0ef16SKarl Rupp delete viennaclcontainer; 3326447cd05SKarl Rupp } 333b8ced49eSKarl Rupp A->valid_GPU_matrix = PETSC_OFFLOAD_UNALLOCATED; 3344076e183SKarl Rupp } catch(std::exception const & ex) { 3354076e183SKarl Rupp SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what()); 336e4a0ef16SKarl Rupp } 3378713a8baSPatrick Sanan 3388713a8baSPatrick Sanan ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqaij_seqaijviennacl_C",NULL);CHKERRQ(ierr); 3398713a8baSPatrick Sanan 340e4a0ef16SKarl Rupp /* this next line is because MatDestroy tries to PetscFree spptr if it is not zero, and PetscFree only works if the memory was allocated with PetscNew or PetscMalloc, which don't call the constructor */ 341e4a0ef16SKarl Rupp A->spptr = 0; 342e4a0ef16SKarl Rupp ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 343e4a0ef16SKarl Rupp PetscFunctionReturn(0); 344e4a0ef16SKarl Rupp } 345e4a0ef16SKarl Rupp 346e4a0ef16SKarl Rupp 347e4a0ef16SKarl Rupp PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJViennaCL(Mat B) 348e4a0ef16SKarl Rupp { 349e4a0ef16SKarl Rupp PetscErrorCode ierr; 350e4a0ef16SKarl Rupp 351e4a0ef16SKarl Rupp PetscFunctionBegin; 352e4a0ef16SKarl Rupp ierr = MatCreate_SeqAIJ(B);CHKERRQ(ierr); 3538713a8baSPatrick Sanan ierr = MatConvert_SeqAIJ_SeqAIJViennaCL(B,MATSEQAIJVIENNACL,MAT_INPLACE_MATRIX,&B); 3548713a8baSPatrick Sanan PetscFunctionReturn(0); 3558713a8baSPatrick Sanan } 3568713a8baSPatrick Sanan 357e7e92044SBarry Smith static PetscErrorCode MatPinToCPU_SeqAIJViennaCL(Mat,PetscBool); 358c3cca76eSKarl Rupp static PetscErrorCode MatDuplicate_SeqAIJViennaCL(Mat A,MatDuplicateOption cpvalues,Mat *B) 359c3cca76eSKarl Rupp { 360c3cca76eSKarl Rupp PetscErrorCode ierr; 361c3cca76eSKarl Rupp Mat C; 362c3cca76eSKarl Rupp 363c3cca76eSKarl Rupp PetscFunctionBegin; 364c3cca76eSKarl Rupp ierr = MatDuplicate_SeqAIJ(A,cpvalues,B);CHKERRQ(ierr); 365c3cca76eSKarl Rupp C = *B; 366c3cca76eSKarl Rupp 367e7e92044SBarry Smith ierr = MatPinToCPU_SeqAIJViennaCL(A,PETSC_FALSE);CHKERRQ(ierr); 368e7e92044SBarry Smith C->ops->pintocpu = MatPinToCPU_SeqAIJViennaCL; 369c3cca76eSKarl Rupp 370c3cca76eSKarl Rupp C->spptr = new Mat_SeqAIJViennaCL(); 371c3cca76eSKarl Rupp ((Mat_SeqAIJViennaCL*)C->spptr)->tempvec = NULL; 372c3cca76eSKarl Rupp ((Mat_SeqAIJViennaCL*)C->spptr)->mat = NULL; 373c3cca76eSKarl Rupp ((Mat_SeqAIJViennaCL*)C->spptr)->compressed_mat = NULL; 374c3cca76eSKarl Rupp 375c3cca76eSKarl Rupp ierr = PetscObjectChangeTypeName((PetscObject)C,MATSEQAIJVIENNACL);CHKERRQ(ierr); 376c3cca76eSKarl Rupp 377b8ced49eSKarl Rupp C->valid_GPU_matrix = PETSC_OFFLOAD_UNALLOCATED; 378c3cca76eSKarl Rupp 379c3cca76eSKarl Rupp /* If the source matrix is already assembled, copy the destination matrix to the GPU */ 380c3cca76eSKarl Rupp if (C->assembled) { 381c3cca76eSKarl Rupp ierr = MatViennaCLCopyToGPU(C);CHKERRQ(ierr); 382c3cca76eSKarl Rupp } 383c3cca76eSKarl Rupp 384c3cca76eSKarl Rupp PetscFunctionReturn(0); 385c3cca76eSKarl Rupp } 386c3cca76eSKarl Rupp 387e7e92044SBarry Smith static PetscErrorCode MatPinToCPU_SeqAIJViennaCL(Mat A,PetscBool flg) 388e7e92044SBarry Smith { 389e7e92044SBarry Smith PetscFunctionBegin; 390e7e92044SBarry Smith A->pinnedtocpu = flg; 391e7e92044SBarry Smith if (flg) { 392e7e92044SBarry Smith A->ops->mult = MatMult_SeqAIJ; 393e7e92044SBarry Smith A->ops->multadd = MatMultAdd_SeqAIJ; 394e7e92044SBarry Smith A->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 395e7e92044SBarry Smith A->ops->destroy = MatDestroy_SeqAIJ; 396e7e92044SBarry Smith A->ops->duplicate = MatDuplicate_SeqAIJ; 397e7e92044SBarry Smith } else { 398e7e92044SBarry Smith A->ops->mult = MatMult_SeqAIJViennaCL; 399e7e92044SBarry Smith A->ops->multadd = MatMultAdd_SeqAIJViennaCL; 400e7e92044SBarry Smith A->ops->assemblyend = MatAssemblyEnd_SeqAIJViennaCL; 401e7e92044SBarry Smith A->ops->destroy = MatDestroy_SeqAIJViennaCL; 402e7e92044SBarry Smith A->ops->duplicate = MatDuplicate_SeqAIJViennaCL; 403e7e92044SBarry Smith } 404e7e92044SBarry Smith PetscFunctionReturn(0); 405e7e92044SBarry Smith } 406e7e92044SBarry Smith 4078713a8baSPatrick Sanan PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJViennaCL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 4088713a8baSPatrick Sanan { 4098713a8baSPatrick Sanan PetscErrorCode ierr; 4108713a8baSPatrick Sanan Mat B; 4118713a8baSPatrick Sanan Mat_SeqAIJ *aij; 4128713a8baSPatrick Sanan 4138713a8baSPatrick Sanan PetscFunctionBegin; 4148713a8baSPatrick Sanan 4158713a8baSPatrick Sanan if (reuse == MAT_REUSE_MATRIX) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"MAT_REUSE_MATRIX is not supported. Consider using MAT_INPLACE_MATRIX instead"); 4168713a8baSPatrick Sanan 4178713a8baSPatrick Sanan if (reuse == MAT_INITIAL_MATRIX) { 4188713a8baSPatrick Sanan ierr = MatDuplicate(A,MAT_COPY_VALUES,newmat);CHKERRQ(ierr); 4198713a8baSPatrick Sanan } 4208713a8baSPatrick Sanan 4218713a8baSPatrick Sanan B = *newmat; 4228713a8baSPatrick Sanan 423e4a0ef16SKarl Rupp aij = (Mat_SeqAIJ*)B->data; 424e4a0ef16SKarl Rupp aij->inode.use = PETSC_FALSE; 4258713a8baSPatrick Sanan 426e4a0ef16SKarl Rupp B->spptr = new Mat_SeqAIJViennaCL(); 427e4a0ef16SKarl Rupp 428a3430c56SKarl Rupp ((Mat_SeqAIJViennaCL*)B->spptr)->tempvec = NULL; 429a3430c56SKarl Rupp ((Mat_SeqAIJViennaCL*)B->spptr)->mat = NULL; 430a3430c56SKarl Rupp ((Mat_SeqAIJViennaCL*)B->spptr)->compressed_mat = NULL; 431e4a0ef16SKarl Rupp 432e7e92044SBarry Smith ierr = MatPinToCPU_SeqAIJViennaCL(A,PETSC_FALSE);CHKERRQ(ierr); 433e7e92044SBarry Smith A->ops->pintocpu = MatPinToCPU_SeqAIJViennaCL; 434e4a0ef16SKarl Rupp 435e4a0ef16SKarl Rupp ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJVIENNACL);CHKERRQ(ierr); 43634136279SStefano Zampini ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr); 43734136279SStefano Zampini ierr = PetscStrallocpy(VECVIENNACL,&B->defaultvectype);CHKERRQ(ierr); 438e4a0ef16SKarl Rupp 4398713a8baSPatrick Sanan ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqaij_seqaijviennacl_C",MatConvert_SeqAIJ_SeqAIJViennaCL);CHKERRQ(ierr); 4408713a8baSPatrick Sanan 441b8ced49eSKarl Rupp B->valid_GPU_matrix = PETSC_OFFLOAD_UNALLOCATED; 4428713a8baSPatrick Sanan 4438713a8baSPatrick Sanan /* If the source matrix is already assembled, copy the destination matrix to the GPU */ 4448713a8baSPatrick Sanan if (B->assembled) { 4458713a8baSPatrick Sanan ierr = MatViennaCLCopyToGPU(B);CHKERRQ(ierr); 4468713a8baSPatrick Sanan } 4478713a8baSPatrick Sanan 448e4a0ef16SKarl Rupp PetscFunctionReturn(0); 449e4a0ef16SKarl Rupp } 450e4a0ef16SKarl Rupp 451e4a0ef16SKarl Rupp 4523ca39a21SBarry Smith /*MC 453e4a0ef16SKarl Rupp MATSEQAIJVIENNACL - MATAIJVIENNACL = "aijviennacl" = "seqaijviennacl" - A matrix type to be used for sparse matrices. 454e4a0ef16SKarl Rupp 455e4a0ef16SKarl Rupp A matrix type type whose data resides on GPUs. These matrices are in CSR format by 456e4a0ef16SKarl Rupp default. All matrix calculations are performed using the ViennaCL library. 457e4a0ef16SKarl Rupp 458e4a0ef16SKarl Rupp Options Database Keys: 459e4a0ef16SKarl Rupp + -mat_type aijviennacl - sets the matrix type to "seqaijviennacl" during a call to MatSetFromOptions() 460e4a0ef16SKarl Rupp . -mat_viennacl_storage_format csr - sets the storage format of matrices for MatMult during a call to MatSetFromOptions(). 461e4a0ef16SKarl Rupp - -mat_viennacl_mult_storage_format csr - sets the storage format of matrices for MatMult during a call to MatSetFromOptions(). 462e4a0ef16SKarl Rupp 463e4a0ef16SKarl Rupp Level: beginner 464e4a0ef16SKarl Rupp 465e4a0ef16SKarl Rupp .seealso: MatCreateSeqAIJViennaCL(), MATAIJVIENNACL, MatCreateAIJViennaCL() 466e4a0ef16SKarl Rupp M*/ 467e4a0ef16SKarl Rupp 46872367587SKarl Rupp 4693ca39a21SBarry Smith PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ViennaCL(void) 47072367587SKarl Rupp { 47172367587SKarl Rupp PetscErrorCode ierr; 47272367587SKarl Rupp 47372367587SKarl Rupp PetscFunctionBegin; 4743ca39a21SBarry Smith ierr = MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_LU,MatGetFactor_seqaij_petsc);CHKERRQ(ierr); 4753ca39a21SBarry Smith ierr = MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_CHOLESKY,MatGetFactor_seqaij_petsc);CHKERRQ(ierr); 4763ca39a21SBarry Smith ierr = MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_ILU,MatGetFactor_seqaij_petsc);CHKERRQ(ierr); 4773ca39a21SBarry Smith ierr = MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_ICC,MatGetFactor_seqaij_petsc);CHKERRQ(ierr); 47872367587SKarl Rupp PetscFunctionReturn(0); 47972367587SKarl Rupp } 480