1e4a0ef16SKarl Rupp 2e4a0ef16SKarl Rupp 3e4a0ef16SKarl Rupp /* 4e4a0ef16SKarl Rupp Defines the basic matrix operations for the AIJ (compressed row) 5e4a0ef16SKarl Rupp matrix storage format. 6e4a0ef16SKarl Rupp */ 7e4a0ef16SKarl Rupp 8aaa7dc30SBarry Smith #include <petscconf.h> 9aaa7dc30SBarry Smith #include <../src/mat/impls/aij/seq/aij.h> /*I "petscmat.h" I*/ 10aaa7dc30SBarry Smith #include <petscbt.h> 11aaa7dc30SBarry Smith #include <../src/vec/vec/impls/dvecimpl.h> 12af0996ceSBarry Smith #include <petsc/private/vecimpl.h> 13e4a0ef16SKarl Rupp 14aaa7dc30SBarry Smith #include <../src/mat/impls/aij/seq/seqviennacl/viennaclmatimpl.h> 15e4a0ef16SKarl Rupp 16e4a0ef16SKarl Rupp 17e4a0ef16SKarl Rupp #include <algorithm> 18e4a0ef16SKarl Rupp #include <vector> 19e4a0ef16SKarl Rupp #include <string> 20e4a0ef16SKarl Rupp 21e4a0ef16SKarl Rupp #include "viennacl/linalg/prod.hpp" 22e4a0ef16SKarl Rupp 238713a8baSPatrick Sanan PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJViennaCL(Mat A, MatType type, MatReuse reuse, Mat *newmat); 2472367587SKarl Rupp PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat,MatFactorType,Mat*); 2572367587SKarl Rupp 268713a8baSPatrick Sanan 27e4a0ef16SKarl Rupp PetscErrorCode MatViennaCLCopyToGPU(Mat A) 28e4a0ef16SKarl Rupp { 29e4a0ef16SKarl Rupp 30e4a0ef16SKarl Rupp Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr; 31e4a0ef16SKarl Rupp Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 32e4a0ef16SKarl Rupp PetscErrorCode ierr; 33e4a0ef16SKarl Rupp 34e4a0ef16SKarl Rupp PetscFunctionBegin; 35bf1781e8SStefano Zampini if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) { //some OpenCL SDKs have issues with buffers of size 0 36b8ced49eSKarl Rupp if (A->valid_GPU_matrix == PETSC_OFFLOAD_UNALLOCATED || A->valid_GPU_matrix == PETSC_OFFLOAD_CPU) { 37e4a0ef16SKarl Rupp ierr = PetscLogEventBegin(MAT_ViennaCLCopyToGPU,A,0,0,0);CHKERRQ(ierr); 38e4a0ef16SKarl Rupp 39e4a0ef16SKarl Rupp try { 40e4a0ef16SKarl Rupp if (a->compressedrow.use) { 41a3430c56SKarl Rupp if (!viennaclstruct->compressed_mat) viennaclstruct->compressed_mat = new ViennaCLCompressedAIJMatrix(); 42e4a0ef16SKarl Rupp 43a3430c56SKarl Rupp // Since PetscInt is different from cl_uint, we have to convert: 44a3430c56SKarl Rupp viennacl::backend::mem_handle dummy; 45e4a0ef16SKarl Rupp 46a3430c56SKarl Rupp viennacl::backend::typesafe_host_array<unsigned int> row_buffer; row_buffer.raw_resize(dummy, a->compressedrow.nrows+1); 47a3430c56SKarl Rupp for (PetscInt i=0; i<=a->compressedrow.nrows; ++i) 48a3430c56SKarl Rupp row_buffer.set(i, (a->compressedrow.i)[i]); 49e4a0ef16SKarl Rupp 50a3430c56SKarl Rupp viennacl::backend::typesafe_host_array<unsigned int> row_indices; row_indices.raw_resize(dummy, a->compressedrow.nrows); 51a3430c56SKarl Rupp for (PetscInt i=0; i<a->compressedrow.nrows; ++i) 52a3430c56SKarl Rupp row_indices.set(i, (a->compressedrow.rindex)[i]); 53a3430c56SKarl Rupp 54a3430c56SKarl Rupp viennacl::backend::typesafe_host_array<unsigned int> col_buffer; col_buffer.raw_resize(dummy, a->nz); 55a3430c56SKarl Rupp for (PetscInt i=0; i<a->nz; ++i) 56a3430c56SKarl Rupp col_buffer.set(i, (a->j)[i]); 57a3430c56SKarl Rupp 58a3430c56SKarl Rupp viennaclstruct->compressed_mat->set(row_buffer.get(), row_indices.get(), col_buffer.get(), a->a, A->rmap->n, A->cmap->n, a->compressedrow.nrows, a->nz); 59e4a0ef16SKarl Rupp } else { 60a3430c56SKarl Rupp if (!viennaclstruct->mat) viennaclstruct->mat = new ViennaCLAIJMatrix(); 61e4a0ef16SKarl Rupp 62e4a0ef16SKarl Rupp // Since PetscInt is in general different from cl_uint, we have to convert: 63e4a0ef16SKarl Rupp viennacl::backend::mem_handle dummy; 64e4a0ef16SKarl Rupp 65e4a0ef16SKarl Rupp viennacl::backend::typesafe_host_array<unsigned int> row_buffer; row_buffer.raw_resize(dummy, A->rmap->n+1); 66e4a0ef16SKarl Rupp for (PetscInt i=0; i<=A->rmap->n; ++i) 67e4a0ef16SKarl Rupp row_buffer.set(i, (a->i)[i]); 68e4a0ef16SKarl Rupp 69e4a0ef16SKarl Rupp viennacl::backend::typesafe_host_array<unsigned int> col_buffer; col_buffer.raw_resize(dummy, a->nz); 70e4a0ef16SKarl Rupp for (PetscInt i=0; i<a->nz; ++i) 71e4a0ef16SKarl Rupp col_buffer.set(i, (a->j)[i]); 72e4a0ef16SKarl Rupp 73e4a0ef16SKarl Rupp viennaclstruct->mat->set(row_buffer.get(), col_buffer.get(), a->a, A->rmap->n, A->cmap->n, a->nz); 74e4a0ef16SKarl Rupp } 754cf1874eSKarl Rupp ViennaCLWaitForGPU(); 764076e183SKarl Rupp } catch(std::exception const & ex) { 774076e183SKarl Rupp SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what()); 78e4a0ef16SKarl Rupp } 79e4a0ef16SKarl Rupp 80a3430c56SKarl Rupp // Create temporary vector for v += A*x: 81a3430c56SKarl Rupp if (viennaclstruct->tempvec) { 829b66742cSDave May if (viennaclstruct->tempvec->size() != static_cast<std::size_t>(A->rmap->n)) { 83a3430c56SKarl Rupp delete (ViennaCLVector*)viennaclstruct->tempvec; 849b66742cSDave May viennaclstruct->tempvec = new ViennaCLVector(A->rmap->n); 85a3430c56SKarl Rupp } else { 86a3430c56SKarl Rupp viennaclstruct->tempvec->clear(); 87a3430c56SKarl Rupp } 88a3430c56SKarl Rupp } else { 899b66742cSDave May viennaclstruct->tempvec = new ViennaCLVector(A->rmap->n); 90a3430c56SKarl Rupp } 91a3430c56SKarl Rupp 92b8ced49eSKarl Rupp A->valid_GPU_matrix = PETSC_OFFLOAD_BOTH; 93e4a0ef16SKarl Rupp 94e4a0ef16SKarl Rupp ierr = PetscLogEventEnd(MAT_ViennaCLCopyToGPU,A,0,0,0);CHKERRQ(ierr); 95e4a0ef16SKarl Rupp } 9667c87b7fSKarl Rupp } 97e4a0ef16SKarl Rupp PetscFunctionReturn(0); 98e4a0ef16SKarl Rupp } 99e4a0ef16SKarl Rupp 1000d73d530SKarl Rupp PetscErrorCode MatViennaCLCopyFromGPU(Mat A, const ViennaCLAIJMatrix *Agpu) 101e4a0ef16SKarl Rupp { 102e4a0ef16SKarl Rupp Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 103e4a0ef16SKarl Rupp PetscInt m = A->rmap->n; 104e4a0ef16SKarl Rupp PetscErrorCode ierr; 105e4a0ef16SKarl Rupp 106e4a0ef16SKarl Rupp 107e4a0ef16SKarl Rupp PetscFunctionBegin; 108b8ced49eSKarl Rupp if (A->valid_GPU_matrix == PETSC_OFFLOAD_UNALLOCATED) { 109e4a0ef16SKarl Rupp try { 1106c4ed002SBarry Smith if (a->compressedrow.use) SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_ARG_WRONG, "ViennaCL: Cannot handle row compression for GPU matrices"); 1116c4ed002SBarry Smith else { 112e4a0ef16SKarl Rupp 113e4a0ef16SKarl Rupp if ((PetscInt)Agpu->size1() != m) SETERRQ2(PETSC_COMM_WORLD, PETSC_ERR_ARG_SIZ, "GPU matrix has %d rows, should be %d", Agpu->size1(), m); 114e4a0ef16SKarl Rupp a->nz = Agpu->nnz(); 115e4a0ef16SKarl Rupp a->maxnz = a->nz; /* Since we allocate exactly the right amount */ 116e4a0ef16SKarl Rupp A->preallocated = PETSC_TRUE; 117e4a0ef16SKarl Rupp if (a->singlemalloc) { 118e4a0ef16SKarl Rupp if (a->a) {ierr = PetscFree3(a->a,a->j,a->i);CHKERRQ(ierr);} 119e4a0ef16SKarl Rupp } else { 120e4a0ef16SKarl Rupp if (a->i) {ierr = PetscFree(a->i);CHKERRQ(ierr);} 121e4a0ef16SKarl Rupp if (a->j) {ierr = PetscFree(a->j);CHKERRQ(ierr);} 122e4a0ef16SKarl Rupp if (a->a) {ierr = PetscFree(a->a);CHKERRQ(ierr);} 123e4a0ef16SKarl Rupp } 124dcca6d9dSJed Brown ierr = PetscMalloc3(a->nz,&a->a,a->nz,&a->j,m+1,&a->i);CHKERRQ(ierr); 125f7daeb2aSKarl Rupp ierr = PetscLogObjectMemory((PetscObject)A, a->nz*(sizeof(PetscScalar)+sizeof(PetscInt))+(m+1)*sizeof(PetscInt));CHKERRQ(ierr); 126e4a0ef16SKarl Rupp 127e4a0ef16SKarl Rupp a->singlemalloc = PETSC_TRUE; 128e4a0ef16SKarl Rupp 129e4a0ef16SKarl Rupp /* Setup row lengths */ 130e4a0ef16SKarl Rupp if (a->imax) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 131dcca6d9dSJed Brown ierr = PetscMalloc2(m,&a->imax,m,&a->ilen);CHKERRQ(ierr); 132f7daeb2aSKarl Rupp ierr = PetscLogObjectMemory((PetscObject)A, 2*m*sizeof(PetscInt));CHKERRQ(ierr); 133e4a0ef16SKarl Rupp 134e4a0ef16SKarl Rupp /* Copy data back from GPU */ 135e4a0ef16SKarl Rupp viennacl::backend::typesafe_host_array<unsigned int> row_buffer; row_buffer.raw_resize(Agpu->handle1(), Agpu->size1() + 1); 136e4a0ef16SKarl Rupp 137e4a0ef16SKarl Rupp // copy row array 138e4a0ef16SKarl Rupp viennacl::backend::memory_read(Agpu->handle1(), 0, row_buffer.raw_size(), row_buffer.get()); 139e4a0ef16SKarl Rupp (a->i)[0] = row_buffer[0]; 140e4a0ef16SKarl Rupp for (PetscInt i = 0; i < (PetscInt)Agpu->size1(); ++i) { 141e4a0ef16SKarl Rupp (a->i)[i+1] = row_buffer[i+1]; 142e4a0ef16SKarl Rupp a->imax[i] = a->ilen[i] = a->i[i+1] - a->i[i]; //Set imax[] and ilen[] arrays at the same time as i[] for better cache reuse 143e4a0ef16SKarl Rupp } 144e4a0ef16SKarl Rupp 145e4a0ef16SKarl Rupp // copy column indices 146e4a0ef16SKarl Rupp viennacl::backend::typesafe_host_array<unsigned int> col_buffer; col_buffer.raw_resize(Agpu->handle2(), Agpu->nnz()); 147e4a0ef16SKarl Rupp viennacl::backend::memory_read(Agpu->handle2(), 0, col_buffer.raw_size(), col_buffer.get()); 148e4a0ef16SKarl Rupp for (PetscInt i=0; i < (PetscInt)Agpu->nnz(); ++i) 149e4a0ef16SKarl Rupp (a->j)[i] = col_buffer[i]; 150e4a0ef16SKarl Rupp 151e4a0ef16SKarl Rupp // copy nonzero entries directly to destination (no conversion required) 152e4a0ef16SKarl Rupp viennacl::backend::memory_read(Agpu->handle(), 0, sizeof(PetscScalar)*Agpu->nnz(), a->a); 153e4a0ef16SKarl Rupp 1544cf1874eSKarl Rupp ViennaCLWaitForGPU(); 155023073b3SKarl Rupp /* TODO: Once a->diag is moved out of MatAssemblyEnd(), invalidate it here. */ 156e4a0ef16SKarl Rupp } 1574076e183SKarl Rupp } catch(std::exception const & ex) { 1584076e183SKarl Rupp SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what()); 159e4a0ef16SKarl Rupp } 160e4a0ef16SKarl Rupp 161b8ced49eSKarl Rupp /* This assembly prevents resetting the flag to PETSC_OFFLOAD_CPU and recopying */ 162e4a0ef16SKarl Rupp ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 163e4a0ef16SKarl Rupp ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 164e4a0ef16SKarl Rupp 165b8ced49eSKarl Rupp A->valid_GPU_matrix = PETSC_OFFLOAD_BOTH; 1666c4ed002SBarry Smith } else SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_ARG_WRONG, "ViennaCL error: Only valid for unallocated GPU matrices"); 167e4a0ef16SKarl Rupp PetscFunctionReturn(0); 168e4a0ef16SKarl Rupp } 169e4a0ef16SKarl Rupp 170e4a0ef16SKarl Rupp PetscErrorCode MatMult_SeqAIJViennaCL(Mat A,Vec xx,Vec yy) 171e4a0ef16SKarl Rupp { 172e4a0ef16SKarl Rupp Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 173e4a0ef16SKarl Rupp PetscErrorCode ierr; 174e4a0ef16SKarl Rupp Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr; 1750d73d530SKarl Rupp const ViennaCLVector *xgpu=NULL; 1760d73d530SKarl Rupp ViennaCLVector *ygpu=NULL; 177e4a0ef16SKarl Rupp 178e4a0ef16SKarl Rupp PetscFunctionBegin; 179bf1781e8SStefano Zampini if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) { 180e4a0ef16SKarl Rupp ierr = VecViennaCLGetArrayRead(xx,&xgpu);CHKERRQ(ierr); 181e4a0ef16SKarl Rupp ierr = VecViennaCLGetArrayWrite(yy,&ygpu);CHKERRQ(ierr); 182e4a0ef16SKarl Rupp try { 183b7832b47SStefano Zampini if (a->compressedrow.use) { 184b7832b47SStefano Zampini *ygpu = viennacl::linalg::prod(*viennaclstruct->compressed_mat, *xgpu); 185b7832b47SStefano Zampini } else { 186e4a0ef16SKarl Rupp *ygpu = viennacl::linalg::prod(*viennaclstruct->mat,*xgpu); 187b7832b47SStefano Zampini } 1884cf1874eSKarl Rupp ViennaCLWaitForGPU(); 1894076e183SKarl Rupp } catch (std::exception const & ex) { 1904076e183SKarl Rupp SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what()); 191e4a0ef16SKarl Rupp } 192e4a0ef16SKarl Rupp ierr = VecViennaCLRestoreArrayRead(xx,&xgpu);CHKERRQ(ierr); 193e4a0ef16SKarl Rupp ierr = VecViennaCLRestoreArrayWrite(yy,&ygpu);CHKERRQ(ierr); 1949b66742cSDave May ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 195bf1781e8SStefano Zampini } else { 196bf1781e8SStefano Zampini ierr = VecSet(yy,0);CHKERRQ(ierr); 19767c87b7fSKarl Rupp } 198e4a0ef16SKarl Rupp PetscFunctionReturn(0); 199e4a0ef16SKarl Rupp } 200e4a0ef16SKarl Rupp 201e4a0ef16SKarl Rupp PetscErrorCode MatMultAdd_SeqAIJViennaCL(Mat A,Vec xx,Vec yy,Vec zz) 202e4a0ef16SKarl Rupp { 203e4a0ef16SKarl Rupp Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 204e4a0ef16SKarl Rupp PetscErrorCode ierr; 205e4a0ef16SKarl Rupp Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr; 2060d73d530SKarl Rupp const ViennaCLVector *xgpu=NULL,*ygpu=NULL; 2070d73d530SKarl Rupp ViennaCLVector *zgpu=NULL; 208e4a0ef16SKarl Rupp 209e4a0ef16SKarl Rupp PetscFunctionBegin; 210bf1781e8SStefano Zampini if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) { 211e4a0ef16SKarl Rupp try { 212e4a0ef16SKarl Rupp ierr = VecViennaCLGetArrayRead(xx,&xgpu);CHKERRQ(ierr); 213e4a0ef16SKarl Rupp ierr = VecViennaCLGetArrayRead(yy,&ygpu);CHKERRQ(ierr); 214e4a0ef16SKarl Rupp ierr = VecViennaCLGetArrayWrite(zz,&zgpu);CHKERRQ(ierr); 215e4a0ef16SKarl Rupp 216e4a0ef16SKarl Rupp if (a->compressedrow.use) { 217a3430c56SKarl Rupp ViennaCLVector temp = viennacl::linalg::prod(*viennaclstruct->compressed_mat, *xgpu); 218e4a0ef16SKarl Rupp *zgpu = *ygpu + temp; 2194cf1874eSKarl Rupp ViennaCLWaitForGPU(); 220e4a0ef16SKarl Rupp } else { 221a3430c56SKarl Rupp if (zz == xx || zz == yy) { //temporary required 222a3430c56SKarl Rupp ViennaCLVector temp = viennacl::linalg::prod(*viennaclstruct->mat, *xgpu); 223a3430c56SKarl Rupp *zgpu = *ygpu; 224a3430c56SKarl Rupp *zgpu += temp; 225a3430c56SKarl Rupp ViennaCLWaitForGPU(); 226a3430c56SKarl Rupp } else { 227a3430c56SKarl Rupp *viennaclstruct->tempvec = viennacl::linalg::prod(*viennaclstruct->mat, *xgpu); 228a3430c56SKarl Rupp *zgpu = *ygpu + *viennaclstruct->tempvec; 2294cf1874eSKarl Rupp ViennaCLWaitForGPU(); 230e4a0ef16SKarl Rupp } 231e4a0ef16SKarl Rupp } 232e4a0ef16SKarl Rupp 233e4a0ef16SKarl Rupp ierr = VecViennaCLRestoreArrayRead(xx,&xgpu);CHKERRQ(ierr); 234e4a0ef16SKarl Rupp ierr = VecViennaCLRestoreArrayRead(yy,&ygpu);CHKERRQ(ierr); 235e4a0ef16SKarl Rupp ierr = VecViennaCLRestoreArrayWrite(zz,&zgpu);CHKERRQ(ierr); 236e4a0ef16SKarl Rupp 2374076e183SKarl Rupp } catch(std::exception const & ex) { 2384076e183SKarl Rupp SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what()); 239e4a0ef16SKarl Rupp } 240e4a0ef16SKarl Rupp ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 241bf1781e8SStefano Zampini } else { 242bf1781e8SStefano Zampini ierr = VecCopy(yy,zz);CHKERRQ(ierr); 24367c87b7fSKarl Rupp } 244e4a0ef16SKarl Rupp PetscFunctionReturn(0); 245e4a0ef16SKarl Rupp } 246e4a0ef16SKarl Rupp 247e4a0ef16SKarl Rupp PetscErrorCode MatAssemblyEnd_SeqAIJViennaCL(Mat A,MatAssemblyType mode) 248e4a0ef16SKarl Rupp { 249e4a0ef16SKarl Rupp PetscErrorCode ierr; 250e4a0ef16SKarl Rupp 251e4a0ef16SKarl Rupp PetscFunctionBegin; 252e4a0ef16SKarl Rupp ierr = MatAssemblyEnd_SeqAIJ(A,mode);CHKERRQ(ierr); 253*e7e92044SBarry Smith if (!A->pinnedtocpu) { 254e4a0ef16SKarl Rupp ierr = MatViennaCLCopyToGPU(A);CHKERRQ(ierr); 255*e7e92044SBarry Smith } 256e4a0ef16SKarl Rupp PetscFunctionReturn(0); 257e4a0ef16SKarl Rupp } 258e4a0ef16SKarl Rupp 259e4a0ef16SKarl Rupp /* --------------------------------------------------------------------------------*/ 260e4a0ef16SKarl Rupp /*@ 261e4a0ef16SKarl Rupp MatCreateSeqAIJViennaCL - Creates a sparse matrix in AIJ (compressed row) format 26219fddfadSKarl Rupp (the default parallel PETSc format). This matrix will ultimately be pushed down 263e4a0ef16SKarl Rupp to GPUs and use the ViennaCL library for calculations. For good matrix 264e4a0ef16SKarl Rupp assembly performance the user should preallocate the matrix storage by setting 265e4a0ef16SKarl Rupp the parameter nz (or the array nnz). By setting these parameters accurately, 266e4a0ef16SKarl Rupp performance during matrix assembly can be increased substantially. 267e4a0ef16SKarl Rupp 268e4a0ef16SKarl Rupp 269e4a0ef16SKarl Rupp Collective on MPI_Comm 270e4a0ef16SKarl Rupp 271e4a0ef16SKarl Rupp Input Parameters: 272e4a0ef16SKarl Rupp + comm - MPI communicator, set to PETSC_COMM_SELF 273e4a0ef16SKarl Rupp . m - number of rows 274e4a0ef16SKarl Rupp . n - number of columns 275e4a0ef16SKarl Rupp . nz - number of nonzeros per row (same for all rows) 276e4a0ef16SKarl Rupp - nnz - array containing the number of nonzeros in the various rows 277e4a0ef16SKarl Rupp (possibly different for each row) or NULL 278e4a0ef16SKarl Rupp 279e4a0ef16SKarl Rupp Output Parameter: 280e4a0ef16SKarl Rupp . A - the matrix 281e4a0ef16SKarl Rupp 282e4a0ef16SKarl Rupp It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 283e4a0ef16SKarl Rupp MatXXXXSetPreallocation() paradigm instead of this routine directly. 284e4a0ef16SKarl Rupp [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 285e4a0ef16SKarl Rupp 286e4a0ef16SKarl Rupp Notes: 287e4a0ef16SKarl Rupp If nnz is given then nz is ignored 288e4a0ef16SKarl Rupp 289e4a0ef16SKarl Rupp The AIJ format (also called the Yale sparse matrix format or 290e4a0ef16SKarl Rupp compressed row storage), is fully compatible with standard Fortran 77 291e4a0ef16SKarl Rupp storage. That is, the stored row and column indices can begin at 292e4a0ef16SKarl Rupp either one (as in Fortran) or zero. See the users' manual for details. 293e4a0ef16SKarl Rupp 294e4a0ef16SKarl Rupp Specify the preallocated storage with either nz or nnz (not both). 295e4a0ef16SKarl Rupp Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 296e4a0ef16SKarl Rupp allocation. For large problems you MUST preallocate memory or you 297e4a0ef16SKarl Rupp will get TERRIBLE performance, see the users' manual chapter on matrices. 298e4a0ef16SKarl Rupp 299e4a0ef16SKarl Rupp Level: intermediate 300e4a0ef16SKarl Rupp 301e9e886b6SKarl Rupp .seealso: MatCreate(), MatCreateAIJ(), MatCreateAIJCUSPARSE(), MatSetValues(), MatSeqAIJSetColumnIndices(), MatCreateSeqAIJWithArrays(), MatCreateAIJ() 302e4a0ef16SKarl Rupp 303e4a0ef16SKarl Rupp @*/ 304e4a0ef16SKarl Rupp PetscErrorCode MatCreateSeqAIJViennaCL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 305e4a0ef16SKarl Rupp { 306e4a0ef16SKarl Rupp PetscErrorCode ierr; 307e4a0ef16SKarl Rupp 308e4a0ef16SKarl Rupp PetscFunctionBegin; 309e4a0ef16SKarl Rupp ierr = MatCreate(comm,A);CHKERRQ(ierr); 310e4a0ef16SKarl Rupp ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 311e4a0ef16SKarl Rupp ierr = MatSetType(*A,MATSEQAIJVIENNACL);CHKERRQ(ierr); 312e4a0ef16SKarl Rupp ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,(PetscInt*)nnz);CHKERRQ(ierr); 313e4a0ef16SKarl Rupp PetscFunctionReturn(0); 314e4a0ef16SKarl Rupp } 315e4a0ef16SKarl Rupp 316e4a0ef16SKarl Rupp 317e4a0ef16SKarl Rupp PetscErrorCode MatDestroy_SeqAIJViennaCL(Mat A) 318e4a0ef16SKarl Rupp { 319e4a0ef16SKarl Rupp PetscErrorCode ierr; 320e4a0ef16SKarl Rupp Mat_SeqAIJViennaCL *viennaclcontainer = (Mat_SeqAIJViennaCL*)A->spptr; 321e4a0ef16SKarl Rupp 322e4a0ef16SKarl Rupp PetscFunctionBegin; 323e4a0ef16SKarl Rupp try { 3246447cd05SKarl Rupp if (viennaclcontainer) { 3256447cd05SKarl Rupp delete viennaclcontainer->tempvec; 3266447cd05SKarl Rupp delete viennaclcontainer->mat; 3276447cd05SKarl Rupp delete viennaclcontainer->compressed_mat; 328e4a0ef16SKarl Rupp delete viennaclcontainer; 3296447cd05SKarl Rupp } 330b8ced49eSKarl Rupp A->valid_GPU_matrix = PETSC_OFFLOAD_UNALLOCATED; 3314076e183SKarl Rupp } catch(std::exception const & ex) { 3324076e183SKarl Rupp SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what()); 333e4a0ef16SKarl Rupp } 3348713a8baSPatrick Sanan 3358713a8baSPatrick Sanan ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqaij_seqaijviennacl_C",NULL);CHKERRQ(ierr); 3368713a8baSPatrick Sanan 337e4a0ef16SKarl Rupp /* this next line is because MatDestroy tries to PetscFree spptr if it is not zero, and PetscFree only works if the memory was allocated with PetscNew or PetscMalloc, which don't call the constructor */ 338e4a0ef16SKarl Rupp A->spptr = 0; 339e4a0ef16SKarl Rupp ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 340e4a0ef16SKarl Rupp PetscFunctionReturn(0); 341e4a0ef16SKarl Rupp } 342e4a0ef16SKarl Rupp 343e4a0ef16SKarl Rupp 344e4a0ef16SKarl Rupp PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJViennaCL(Mat B) 345e4a0ef16SKarl Rupp { 346e4a0ef16SKarl Rupp PetscErrorCode ierr; 347e4a0ef16SKarl Rupp 348e4a0ef16SKarl Rupp PetscFunctionBegin; 349e4a0ef16SKarl Rupp ierr = MatCreate_SeqAIJ(B);CHKERRQ(ierr); 3508713a8baSPatrick Sanan ierr = MatConvert_SeqAIJ_SeqAIJViennaCL(B,MATSEQAIJVIENNACL,MAT_INPLACE_MATRIX,&B); 3518713a8baSPatrick Sanan PetscFunctionReturn(0); 3528713a8baSPatrick Sanan } 3538713a8baSPatrick Sanan 354*e7e92044SBarry Smith static PetscErrorCode MatPinToCPU_SeqAIJViennaCL(Mat,PetscBool); 355c3cca76eSKarl Rupp static PetscErrorCode MatDuplicate_SeqAIJViennaCL(Mat A,MatDuplicateOption cpvalues,Mat *B) 356c3cca76eSKarl Rupp { 357c3cca76eSKarl Rupp PetscErrorCode ierr; 358c3cca76eSKarl Rupp Mat C; 359c3cca76eSKarl Rupp 360c3cca76eSKarl Rupp PetscFunctionBegin; 361c3cca76eSKarl Rupp ierr = MatDuplicate_SeqAIJ(A,cpvalues,B);CHKERRQ(ierr); 362c3cca76eSKarl Rupp C = *B; 363c3cca76eSKarl Rupp 364*e7e92044SBarry Smith ierr = MatPinToCPU_SeqAIJViennaCL(A,PETSC_FALSE);CHKERRQ(ierr); 365*e7e92044SBarry Smith C->ops->pintocpu = MatPinToCPU_SeqAIJViennaCL; 366c3cca76eSKarl Rupp 367c3cca76eSKarl Rupp C->spptr = new Mat_SeqAIJViennaCL(); 368c3cca76eSKarl Rupp ((Mat_SeqAIJViennaCL*)C->spptr)->tempvec = NULL; 369c3cca76eSKarl Rupp ((Mat_SeqAIJViennaCL*)C->spptr)->mat = NULL; 370c3cca76eSKarl Rupp ((Mat_SeqAIJViennaCL*)C->spptr)->compressed_mat = NULL; 371c3cca76eSKarl Rupp 372c3cca76eSKarl Rupp ierr = PetscObjectChangeTypeName((PetscObject)C,MATSEQAIJVIENNACL);CHKERRQ(ierr); 373c3cca76eSKarl Rupp 374b8ced49eSKarl Rupp C->valid_GPU_matrix = PETSC_OFFLOAD_UNALLOCATED; 375c3cca76eSKarl Rupp 376c3cca76eSKarl Rupp /* If the source matrix is already assembled, copy the destination matrix to the GPU */ 377c3cca76eSKarl Rupp if (C->assembled) { 378c3cca76eSKarl Rupp ierr = MatViennaCLCopyToGPU(C);CHKERRQ(ierr); 379c3cca76eSKarl Rupp } 380c3cca76eSKarl Rupp 381c3cca76eSKarl Rupp PetscFunctionReturn(0); 382c3cca76eSKarl Rupp } 383c3cca76eSKarl Rupp 384*e7e92044SBarry Smith static PetscErrorCode MatPinToCPU_SeqAIJViennaCL(Mat A,PetscBool flg) 385*e7e92044SBarry Smith { 386*e7e92044SBarry Smith PetscFunctionBegin; 387*e7e92044SBarry Smith A->pinnedtocpu = flg; 388*e7e92044SBarry Smith if (flg) { 389*e7e92044SBarry Smith A->ops->mult = MatMult_SeqAIJ; 390*e7e92044SBarry Smith A->ops->multadd = MatMultAdd_SeqAIJ; 391*e7e92044SBarry Smith A->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 392*e7e92044SBarry Smith A->ops->destroy = MatDestroy_SeqAIJ; 393*e7e92044SBarry Smith A->ops->duplicate = MatDuplicate_SeqAIJ; 394*e7e92044SBarry Smith } else { 395*e7e92044SBarry Smith A->ops->mult = MatMult_SeqAIJViennaCL; 396*e7e92044SBarry Smith A->ops->multadd = MatMultAdd_SeqAIJViennaCL; 397*e7e92044SBarry Smith A->ops->assemblyend = MatAssemblyEnd_SeqAIJViennaCL; 398*e7e92044SBarry Smith A->ops->destroy = MatDestroy_SeqAIJViennaCL; 399*e7e92044SBarry Smith A->ops->duplicate = MatDuplicate_SeqAIJViennaCL; 400*e7e92044SBarry Smith } 401*e7e92044SBarry Smith PetscFunctionReturn(0); 402*e7e92044SBarry Smith } 403*e7e92044SBarry Smith 4048713a8baSPatrick Sanan PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJViennaCL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 4058713a8baSPatrick Sanan { 4068713a8baSPatrick Sanan PetscErrorCode ierr; 4078713a8baSPatrick Sanan Mat B; 4088713a8baSPatrick Sanan Mat_SeqAIJ *aij; 4098713a8baSPatrick Sanan 4108713a8baSPatrick Sanan PetscFunctionBegin; 4118713a8baSPatrick Sanan 4128713a8baSPatrick Sanan if (reuse == MAT_REUSE_MATRIX) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"MAT_REUSE_MATRIX is not supported. Consider using MAT_INPLACE_MATRIX instead"); 4138713a8baSPatrick Sanan 4148713a8baSPatrick Sanan if (reuse == MAT_INITIAL_MATRIX) { 4158713a8baSPatrick Sanan ierr = MatDuplicate(A,MAT_COPY_VALUES,newmat);CHKERRQ(ierr); 4168713a8baSPatrick Sanan } 4178713a8baSPatrick Sanan 4188713a8baSPatrick Sanan B = *newmat; 4198713a8baSPatrick Sanan 420e4a0ef16SKarl Rupp aij = (Mat_SeqAIJ*)B->data; 421e4a0ef16SKarl Rupp aij->inode.use = PETSC_FALSE; 4228713a8baSPatrick Sanan 423e4a0ef16SKarl Rupp B->spptr = new Mat_SeqAIJViennaCL(); 424e4a0ef16SKarl Rupp 425a3430c56SKarl Rupp ((Mat_SeqAIJViennaCL*)B->spptr)->tempvec = NULL; 426a3430c56SKarl Rupp ((Mat_SeqAIJViennaCL*)B->spptr)->mat = NULL; 427a3430c56SKarl Rupp ((Mat_SeqAIJViennaCL*)B->spptr)->compressed_mat = NULL; 428e4a0ef16SKarl Rupp 429*e7e92044SBarry Smith ierr = MatPinToCPU_SeqAIJViennaCL(A,PETSC_FALSE);CHKERRQ(ierr); 430*e7e92044SBarry Smith A->ops->pintocpu = MatPinToCPU_SeqAIJViennaCL; 431e4a0ef16SKarl Rupp 432e4a0ef16SKarl Rupp ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJVIENNACL);CHKERRQ(ierr); 43334136279SStefano Zampini ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr); 43434136279SStefano Zampini ierr = PetscStrallocpy(VECVIENNACL,&B->defaultvectype);CHKERRQ(ierr); 435e4a0ef16SKarl Rupp 4368713a8baSPatrick Sanan ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqaij_seqaijviennacl_C",MatConvert_SeqAIJ_SeqAIJViennaCL);CHKERRQ(ierr); 4378713a8baSPatrick Sanan 438b8ced49eSKarl Rupp B->valid_GPU_matrix = PETSC_OFFLOAD_UNALLOCATED; 4398713a8baSPatrick Sanan 4408713a8baSPatrick Sanan /* If the source matrix is already assembled, copy the destination matrix to the GPU */ 4418713a8baSPatrick Sanan if (B->assembled) { 4428713a8baSPatrick Sanan ierr = MatViennaCLCopyToGPU(B);CHKERRQ(ierr); 4438713a8baSPatrick Sanan } 4448713a8baSPatrick Sanan 445e4a0ef16SKarl Rupp PetscFunctionReturn(0); 446e4a0ef16SKarl Rupp } 447e4a0ef16SKarl Rupp 448e4a0ef16SKarl Rupp 4493ca39a21SBarry Smith /*MC 450e4a0ef16SKarl Rupp MATSEQAIJVIENNACL - MATAIJVIENNACL = "aijviennacl" = "seqaijviennacl" - A matrix type to be used for sparse matrices. 451e4a0ef16SKarl Rupp 452e4a0ef16SKarl Rupp A matrix type type whose data resides on GPUs. These matrices are in CSR format by 453e4a0ef16SKarl Rupp default. All matrix calculations are performed using the ViennaCL library. 454e4a0ef16SKarl Rupp 455e4a0ef16SKarl Rupp Options Database Keys: 456e4a0ef16SKarl Rupp + -mat_type aijviennacl - sets the matrix type to "seqaijviennacl" during a call to MatSetFromOptions() 457e4a0ef16SKarl Rupp . -mat_viennacl_storage_format csr - sets the storage format of matrices for MatMult during a call to MatSetFromOptions(). 458e4a0ef16SKarl Rupp - -mat_viennacl_mult_storage_format csr - sets the storage format of matrices for MatMult during a call to MatSetFromOptions(). 459e4a0ef16SKarl Rupp 460e4a0ef16SKarl Rupp Level: beginner 461e4a0ef16SKarl Rupp 462e4a0ef16SKarl Rupp .seealso: MatCreateSeqAIJViennaCL(), MATAIJVIENNACL, MatCreateAIJViennaCL() 463e4a0ef16SKarl Rupp M*/ 464e4a0ef16SKarl Rupp 46572367587SKarl Rupp 4663ca39a21SBarry Smith PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ViennaCL(void) 46772367587SKarl Rupp { 46872367587SKarl Rupp PetscErrorCode ierr; 46972367587SKarl Rupp 47072367587SKarl Rupp PetscFunctionBegin; 4713ca39a21SBarry Smith ierr = MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_LU,MatGetFactor_seqaij_petsc);CHKERRQ(ierr); 4723ca39a21SBarry Smith ierr = MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_CHOLESKY,MatGetFactor_seqaij_petsc);CHKERRQ(ierr); 4733ca39a21SBarry Smith ierr = MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_ILU,MatGetFactor_seqaij_petsc);CHKERRQ(ierr); 4743ca39a21SBarry Smith ierr = MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_ICC,MatGetFactor_seqaij_petsc);CHKERRQ(ierr); 47572367587SKarl Rupp PetscFunctionReturn(0); 47672367587SKarl Rupp } 477