1e4a0ef16SKarl Rupp 2e4a0ef16SKarl Rupp 3e4a0ef16SKarl Rupp /* 4e4a0ef16SKarl Rupp Defines the basic matrix operations for the AIJ (compressed row) 5e4a0ef16SKarl Rupp matrix storage format. 6e4a0ef16SKarl Rupp */ 7e4a0ef16SKarl Rupp 8aaa7dc30SBarry Smith #include <petscconf.h> 9aaa7dc30SBarry Smith #include <../src/mat/impls/aij/seq/aij.h> /*I "petscmat.h" I*/ 10aaa7dc30SBarry Smith #include <petscbt.h> 11aaa7dc30SBarry Smith #include <../src/vec/vec/impls/dvecimpl.h> 12af0996ceSBarry Smith #include <petsc/private/vecimpl.h> 13e4a0ef16SKarl Rupp 14aaa7dc30SBarry Smith #include <../src/mat/impls/aij/seq/seqviennacl/viennaclmatimpl.h> 15e4a0ef16SKarl Rupp 16e4a0ef16SKarl Rupp 17e4a0ef16SKarl Rupp #include <algorithm> 18e4a0ef16SKarl Rupp #include <vector> 19e4a0ef16SKarl Rupp #include <string> 20e4a0ef16SKarl Rupp 21e4a0ef16SKarl Rupp #include "viennacl/linalg/prod.hpp" 22e4a0ef16SKarl Rupp 238713a8baSPatrick Sanan PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJViennaCL(Mat A, MatType type, MatReuse reuse, Mat *newmat); 2472367587SKarl Rupp PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat,MatFactorType,Mat*); 2572367587SKarl Rupp 268713a8baSPatrick Sanan 27e4a0ef16SKarl Rupp PetscErrorCode MatViennaCLCopyToGPU(Mat A) 28e4a0ef16SKarl Rupp { 29e4a0ef16SKarl Rupp 30e4a0ef16SKarl Rupp Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr; 31e4a0ef16SKarl Rupp Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 32e4a0ef16SKarl Rupp PetscErrorCode ierr; 33e4a0ef16SKarl Rupp 34e4a0ef16SKarl Rupp 35e4a0ef16SKarl Rupp PetscFunctionBegin; 3667c87b7fSKarl Rupp if (A->rmap->n > 0 && A->cmap->n > 0) { //some OpenCL SDKs have issues with buffers of size 0 37b8ced49eSKarl Rupp if (A->valid_GPU_matrix == PETSC_OFFLOAD_UNALLOCATED || A->valid_GPU_matrix == PETSC_OFFLOAD_CPU) { 38e4a0ef16SKarl Rupp ierr = PetscLogEventBegin(MAT_ViennaCLCopyToGPU,A,0,0,0);CHKERRQ(ierr); 39e4a0ef16SKarl Rupp 40e4a0ef16SKarl Rupp try { 41e4a0ef16SKarl Rupp if (a->compressedrow.use) { 42a3430c56SKarl Rupp if (!viennaclstruct->compressed_mat) viennaclstruct->compressed_mat = new ViennaCLCompressedAIJMatrix(); 43e4a0ef16SKarl Rupp 44a3430c56SKarl Rupp // Since PetscInt is different from cl_uint, we have to convert: 45a3430c56SKarl Rupp viennacl::backend::mem_handle dummy; 46e4a0ef16SKarl Rupp 47a3430c56SKarl Rupp viennacl::backend::typesafe_host_array<unsigned int> row_buffer; row_buffer.raw_resize(dummy, a->compressedrow.nrows+1); 48a3430c56SKarl Rupp for (PetscInt i=0; i<=a->compressedrow.nrows; ++i) 49a3430c56SKarl Rupp row_buffer.set(i, (a->compressedrow.i)[i]); 50e4a0ef16SKarl Rupp 51a3430c56SKarl Rupp viennacl::backend::typesafe_host_array<unsigned int> row_indices; row_indices.raw_resize(dummy, a->compressedrow.nrows); 52a3430c56SKarl Rupp for (PetscInt i=0; i<a->compressedrow.nrows; ++i) 53a3430c56SKarl Rupp row_indices.set(i, (a->compressedrow.rindex)[i]); 54a3430c56SKarl Rupp 55a3430c56SKarl Rupp viennacl::backend::typesafe_host_array<unsigned int> col_buffer; col_buffer.raw_resize(dummy, a->nz); 56a3430c56SKarl Rupp for (PetscInt i=0; i<a->nz; ++i) 57a3430c56SKarl Rupp col_buffer.set(i, (a->j)[i]); 58a3430c56SKarl Rupp 59a3430c56SKarl Rupp viennaclstruct->compressed_mat->set(row_buffer.get(), row_indices.get(), col_buffer.get(), a->a, A->rmap->n, A->cmap->n, a->compressedrow.nrows, a->nz); 60e4a0ef16SKarl Rupp } else { 61a3430c56SKarl Rupp if (!viennaclstruct->mat) viennaclstruct->mat = new ViennaCLAIJMatrix(); 62e4a0ef16SKarl Rupp 63e4a0ef16SKarl Rupp // Since PetscInt is in general different from cl_uint, we have to convert: 64e4a0ef16SKarl Rupp viennacl::backend::mem_handle dummy; 65e4a0ef16SKarl Rupp 66e4a0ef16SKarl Rupp viennacl::backend::typesafe_host_array<unsigned int> row_buffer; row_buffer.raw_resize(dummy, A->rmap->n+1); 67e4a0ef16SKarl Rupp for (PetscInt i=0; i<=A->rmap->n; ++i) 68e4a0ef16SKarl Rupp row_buffer.set(i, (a->i)[i]); 69e4a0ef16SKarl Rupp 70e4a0ef16SKarl Rupp viennacl::backend::typesafe_host_array<unsigned int> col_buffer; col_buffer.raw_resize(dummy, a->nz); 71e4a0ef16SKarl Rupp for (PetscInt i=0; i<a->nz; ++i) 72e4a0ef16SKarl Rupp col_buffer.set(i, (a->j)[i]); 73e4a0ef16SKarl Rupp 74e4a0ef16SKarl Rupp viennaclstruct->mat->set(row_buffer.get(), col_buffer.get(), a->a, A->rmap->n, A->cmap->n, a->nz); 75e4a0ef16SKarl Rupp } 764cf1874eSKarl Rupp ViennaCLWaitForGPU(); 774076e183SKarl Rupp } catch(std::exception const & ex) { 784076e183SKarl Rupp SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what()); 79e4a0ef16SKarl Rupp } 80e4a0ef16SKarl Rupp 81a3430c56SKarl Rupp // Create temporary vector for v += A*x: 82a3430c56SKarl Rupp if (viennaclstruct->tempvec) { 839b66742cSDave May if (viennaclstruct->tempvec->size() != static_cast<std::size_t>(A->rmap->n)) { 84a3430c56SKarl Rupp delete (ViennaCLVector*)viennaclstruct->tempvec; 859b66742cSDave May viennaclstruct->tempvec = new ViennaCLVector(A->rmap->n); 86a3430c56SKarl Rupp } else { 87a3430c56SKarl Rupp viennaclstruct->tempvec->clear(); 88a3430c56SKarl Rupp } 89a3430c56SKarl Rupp } else { 909b66742cSDave May viennaclstruct->tempvec = new ViennaCLVector(A->rmap->n); 91a3430c56SKarl Rupp } 92a3430c56SKarl Rupp 93b8ced49eSKarl Rupp A->valid_GPU_matrix = PETSC_OFFLOAD_BOTH; 94e4a0ef16SKarl Rupp 95e4a0ef16SKarl Rupp ierr = PetscLogEventEnd(MAT_ViennaCLCopyToGPU,A,0,0,0);CHKERRQ(ierr); 96e4a0ef16SKarl Rupp } 9767c87b7fSKarl Rupp } 98e4a0ef16SKarl Rupp PetscFunctionReturn(0); 99e4a0ef16SKarl Rupp } 100e4a0ef16SKarl Rupp 1010d73d530SKarl Rupp PetscErrorCode MatViennaCLCopyFromGPU(Mat A, const ViennaCLAIJMatrix *Agpu) 102e4a0ef16SKarl Rupp { 103e4a0ef16SKarl Rupp Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 104e4a0ef16SKarl Rupp PetscInt m = A->rmap->n; 105e4a0ef16SKarl Rupp PetscErrorCode ierr; 106e4a0ef16SKarl Rupp 107e4a0ef16SKarl Rupp 108e4a0ef16SKarl Rupp PetscFunctionBegin; 109b8ced49eSKarl Rupp if (A->valid_GPU_matrix == PETSC_OFFLOAD_UNALLOCATED) { 110e4a0ef16SKarl Rupp try { 1116c4ed002SBarry Smith if (a->compressedrow.use) SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_ARG_WRONG, "ViennaCL: Cannot handle row compression for GPU matrices"); 1126c4ed002SBarry Smith else { 113e4a0ef16SKarl Rupp 114e4a0ef16SKarl Rupp if ((PetscInt)Agpu->size1() != m) SETERRQ2(PETSC_COMM_WORLD, PETSC_ERR_ARG_SIZ, "GPU matrix has %d rows, should be %d", Agpu->size1(), m); 115e4a0ef16SKarl Rupp a->nz = Agpu->nnz(); 116e4a0ef16SKarl Rupp a->maxnz = a->nz; /* Since we allocate exactly the right amount */ 117e4a0ef16SKarl Rupp A->preallocated = PETSC_TRUE; 118e4a0ef16SKarl Rupp if (a->singlemalloc) { 119e4a0ef16SKarl Rupp if (a->a) {ierr = PetscFree3(a->a,a->j,a->i);CHKERRQ(ierr);} 120e4a0ef16SKarl Rupp } else { 121e4a0ef16SKarl Rupp if (a->i) {ierr = PetscFree(a->i);CHKERRQ(ierr);} 122e4a0ef16SKarl Rupp if (a->j) {ierr = PetscFree(a->j);CHKERRQ(ierr);} 123e4a0ef16SKarl Rupp if (a->a) {ierr = PetscFree(a->a);CHKERRQ(ierr);} 124e4a0ef16SKarl Rupp } 125dcca6d9dSJed Brown ierr = PetscMalloc3(a->nz,&a->a,a->nz,&a->j,m+1,&a->i);CHKERRQ(ierr); 126f7daeb2aSKarl Rupp ierr = PetscLogObjectMemory((PetscObject)A, a->nz*(sizeof(PetscScalar)+sizeof(PetscInt))+(m+1)*sizeof(PetscInt));CHKERRQ(ierr); 127e4a0ef16SKarl Rupp 128e4a0ef16SKarl Rupp a->singlemalloc = PETSC_TRUE; 129e4a0ef16SKarl Rupp 130e4a0ef16SKarl Rupp /* Setup row lengths */ 131e4a0ef16SKarl Rupp if (a->imax) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 132dcca6d9dSJed Brown ierr = PetscMalloc2(m,&a->imax,m,&a->ilen);CHKERRQ(ierr); 133f7daeb2aSKarl Rupp ierr = PetscLogObjectMemory((PetscObject)A, 2*m*sizeof(PetscInt));CHKERRQ(ierr); 134e4a0ef16SKarl Rupp 135e4a0ef16SKarl Rupp /* Copy data back from GPU */ 136e4a0ef16SKarl Rupp viennacl::backend::typesafe_host_array<unsigned int> row_buffer; row_buffer.raw_resize(Agpu->handle1(), Agpu->size1() + 1); 137e4a0ef16SKarl Rupp 138e4a0ef16SKarl Rupp // copy row array 139e4a0ef16SKarl Rupp viennacl::backend::memory_read(Agpu->handle1(), 0, row_buffer.raw_size(), row_buffer.get()); 140e4a0ef16SKarl Rupp (a->i)[0] = row_buffer[0]; 141e4a0ef16SKarl Rupp for (PetscInt i = 0; i < (PetscInt)Agpu->size1(); ++i) { 142e4a0ef16SKarl Rupp (a->i)[i+1] = row_buffer[i+1]; 143e4a0ef16SKarl Rupp a->imax[i] = a->ilen[i] = a->i[i+1] - a->i[i]; //Set imax[] and ilen[] arrays at the same time as i[] for better cache reuse 144e4a0ef16SKarl Rupp } 145e4a0ef16SKarl Rupp 146e4a0ef16SKarl Rupp // copy column indices 147e4a0ef16SKarl Rupp viennacl::backend::typesafe_host_array<unsigned int> col_buffer; col_buffer.raw_resize(Agpu->handle2(), Agpu->nnz()); 148e4a0ef16SKarl Rupp viennacl::backend::memory_read(Agpu->handle2(), 0, col_buffer.raw_size(), col_buffer.get()); 149e4a0ef16SKarl Rupp for (PetscInt i=0; i < (PetscInt)Agpu->nnz(); ++i) 150e4a0ef16SKarl Rupp (a->j)[i] = col_buffer[i]; 151e4a0ef16SKarl Rupp 152e4a0ef16SKarl Rupp // copy nonzero entries directly to destination (no conversion required) 153e4a0ef16SKarl Rupp viennacl::backend::memory_read(Agpu->handle(), 0, sizeof(PetscScalar)*Agpu->nnz(), a->a); 154e4a0ef16SKarl Rupp 1554cf1874eSKarl Rupp ViennaCLWaitForGPU(); 156023073b3SKarl Rupp /* TODO: Once a->diag is moved out of MatAssemblyEnd(), invalidate it here. */ 157e4a0ef16SKarl Rupp } 1584076e183SKarl Rupp } catch(std::exception const & ex) { 1594076e183SKarl Rupp SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what()); 160e4a0ef16SKarl Rupp } 161e4a0ef16SKarl Rupp 162b8ced49eSKarl Rupp /* This assembly prevents resetting the flag to PETSC_OFFLOAD_CPU and recopying */ 163e4a0ef16SKarl Rupp ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 164e4a0ef16SKarl Rupp ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 165e4a0ef16SKarl Rupp 166b8ced49eSKarl Rupp A->valid_GPU_matrix = PETSC_OFFLOAD_BOTH; 1676c4ed002SBarry Smith } else SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_ARG_WRONG, "ViennaCL error: Only valid for unallocated GPU matrices"); 168e4a0ef16SKarl Rupp PetscFunctionReturn(0); 169e4a0ef16SKarl Rupp } 170e4a0ef16SKarl Rupp 171e4a0ef16SKarl Rupp PetscErrorCode MatMult_SeqAIJViennaCL(Mat A,Vec xx,Vec yy) 172e4a0ef16SKarl Rupp { 173e4a0ef16SKarl Rupp Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 174e4a0ef16SKarl Rupp PetscErrorCode ierr; 175e4a0ef16SKarl Rupp Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr; 1760d73d530SKarl Rupp const ViennaCLVector *xgpu=NULL; 1770d73d530SKarl Rupp ViennaCLVector *ygpu=NULL; 178e4a0ef16SKarl Rupp 179e4a0ef16SKarl Rupp PetscFunctionBegin; 18067c87b7fSKarl Rupp if (A->rmap->n > 0 && A->cmap->n > 0) { 181e4a0ef16SKarl Rupp ierr = VecViennaCLGetArrayRead(xx,&xgpu);CHKERRQ(ierr); 182e4a0ef16SKarl Rupp ierr = VecViennaCLGetArrayWrite(yy,&ygpu);CHKERRQ(ierr); 183e4a0ef16SKarl Rupp try { 184e4a0ef16SKarl Rupp *ygpu = viennacl::linalg::prod(*viennaclstruct->mat,*xgpu); 1854cf1874eSKarl Rupp ViennaCLWaitForGPU(); 1864076e183SKarl Rupp } catch (std::exception const & ex) { 1874076e183SKarl Rupp SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what()); 188e4a0ef16SKarl Rupp } 189e4a0ef16SKarl Rupp ierr = VecViennaCLRestoreArrayRead(xx,&xgpu);CHKERRQ(ierr); 190e4a0ef16SKarl Rupp ierr = VecViennaCLRestoreArrayWrite(yy,&ygpu);CHKERRQ(ierr); 1919b66742cSDave May ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr); 19267c87b7fSKarl Rupp } 193e4a0ef16SKarl Rupp PetscFunctionReturn(0); 194e4a0ef16SKarl Rupp } 195e4a0ef16SKarl Rupp 196e4a0ef16SKarl Rupp 197e4a0ef16SKarl Rupp 198e4a0ef16SKarl Rupp PetscErrorCode MatMultAdd_SeqAIJViennaCL(Mat A,Vec xx,Vec yy,Vec zz) 199e4a0ef16SKarl Rupp { 200e4a0ef16SKarl Rupp Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 201e4a0ef16SKarl Rupp PetscErrorCode ierr; 202e4a0ef16SKarl Rupp Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr; 2030d73d530SKarl Rupp const ViennaCLVector *xgpu=NULL,*ygpu=NULL; 2040d73d530SKarl Rupp ViennaCLVector *zgpu=NULL; 205e4a0ef16SKarl Rupp 206e4a0ef16SKarl Rupp PetscFunctionBegin; 20767c87b7fSKarl Rupp if (A->rmap->n > 0 && A->cmap->n > 0) { 208e4a0ef16SKarl Rupp try { 209e4a0ef16SKarl Rupp ierr = VecViennaCLGetArrayRead(xx,&xgpu);CHKERRQ(ierr); 210e4a0ef16SKarl Rupp ierr = VecViennaCLGetArrayRead(yy,&ygpu);CHKERRQ(ierr); 211e4a0ef16SKarl Rupp ierr = VecViennaCLGetArrayWrite(zz,&zgpu);CHKERRQ(ierr); 212e4a0ef16SKarl Rupp 213e4a0ef16SKarl Rupp if (a->compressedrow.use) { 214a3430c56SKarl Rupp ViennaCLVector temp = viennacl::linalg::prod(*viennaclstruct->compressed_mat, *xgpu); 215e4a0ef16SKarl Rupp *zgpu = *ygpu + temp; 2164cf1874eSKarl Rupp ViennaCLWaitForGPU(); 217e4a0ef16SKarl Rupp } else { 218a3430c56SKarl Rupp if (zz == xx || zz == yy) { //temporary required 219a3430c56SKarl Rupp ViennaCLVector temp = viennacl::linalg::prod(*viennaclstruct->mat, *xgpu); 220a3430c56SKarl Rupp *zgpu = *ygpu; 221a3430c56SKarl Rupp *zgpu += temp; 222a3430c56SKarl Rupp ViennaCLWaitForGPU(); 223a3430c56SKarl Rupp } else { 224a3430c56SKarl Rupp *viennaclstruct->tempvec = viennacl::linalg::prod(*viennaclstruct->mat, *xgpu); 225a3430c56SKarl Rupp *zgpu = *ygpu + *viennaclstruct->tempvec; 2264cf1874eSKarl Rupp ViennaCLWaitForGPU(); 227e4a0ef16SKarl Rupp } 228e4a0ef16SKarl Rupp } 229e4a0ef16SKarl Rupp 230e4a0ef16SKarl Rupp ierr = VecViennaCLRestoreArrayRead(xx,&xgpu);CHKERRQ(ierr); 231e4a0ef16SKarl Rupp ierr = VecViennaCLRestoreArrayRead(yy,&ygpu);CHKERRQ(ierr); 232e4a0ef16SKarl Rupp ierr = VecViennaCLRestoreArrayWrite(zz,&zgpu);CHKERRQ(ierr); 233e4a0ef16SKarl Rupp 2344076e183SKarl Rupp } catch(std::exception const & ex) { 2354076e183SKarl Rupp SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what()); 236e4a0ef16SKarl Rupp } 237e4a0ef16SKarl Rupp ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 23867c87b7fSKarl Rupp } 239e4a0ef16SKarl Rupp PetscFunctionReturn(0); 240e4a0ef16SKarl Rupp } 241e4a0ef16SKarl Rupp 242e4a0ef16SKarl Rupp PetscErrorCode MatAssemblyEnd_SeqAIJViennaCL(Mat A,MatAssemblyType mode) 243e4a0ef16SKarl Rupp { 244e4a0ef16SKarl Rupp PetscErrorCode ierr; 245e4a0ef16SKarl Rupp 246e4a0ef16SKarl Rupp PetscFunctionBegin; 247e4a0ef16SKarl Rupp ierr = MatAssemblyEnd_SeqAIJ(A,mode);CHKERRQ(ierr); 248e4a0ef16SKarl Rupp ierr = MatViennaCLCopyToGPU(A);CHKERRQ(ierr); 249e4a0ef16SKarl Rupp PetscFunctionReturn(0); 250e4a0ef16SKarl Rupp } 251e4a0ef16SKarl Rupp 252e4a0ef16SKarl Rupp /* --------------------------------------------------------------------------------*/ 253e4a0ef16SKarl Rupp /*@ 254e4a0ef16SKarl Rupp MatCreateSeqAIJViennaCL - Creates a sparse matrix in AIJ (compressed row) format 25519fddfadSKarl Rupp (the default parallel PETSc format). This matrix will ultimately be pushed down 256e4a0ef16SKarl Rupp to GPUs and use the ViennaCL library for calculations. For good matrix 257e4a0ef16SKarl Rupp assembly performance the user should preallocate the matrix storage by setting 258e4a0ef16SKarl Rupp the parameter nz (or the array nnz). By setting these parameters accurately, 259e4a0ef16SKarl Rupp performance during matrix assembly can be increased substantially. 260e4a0ef16SKarl Rupp 261e4a0ef16SKarl Rupp 262e4a0ef16SKarl Rupp Collective on MPI_Comm 263e4a0ef16SKarl Rupp 264e4a0ef16SKarl Rupp Input Parameters: 265e4a0ef16SKarl Rupp + comm - MPI communicator, set to PETSC_COMM_SELF 266e4a0ef16SKarl Rupp . m - number of rows 267e4a0ef16SKarl Rupp . n - number of columns 268e4a0ef16SKarl Rupp . nz - number of nonzeros per row (same for all rows) 269e4a0ef16SKarl Rupp - nnz - array containing the number of nonzeros in the various rows 270e4a0ef16SKarl Rupp (possibly different for each row) or NULL 271e4a0ef16SKarl Rupp 272e4a0ef16SKarl Rupp Output Parameter: 273e4a0ef16SKarl Rupp . A - the matrix 274e4a0ef16SKarl Rupp 275e4a0ef16SKarl Rupp It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 276e4a0ef16SKarl Rupp MatXXXXSetPreallocation() paradigm instead of this routine directly. 277e4a0ef16SKarl Rupp [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 278e4a0ef16SKarl Rupp 279e4a0ef16SKarl Rupp Notes: 280e4a0ef16SKarl Rupp If nnz is given then nz is ignored 281e4a0ef16SKarl Rupp 282e4a0ef16SKarl Rupp The AIJ format (also called the Yale sparse matrix format or 283e4a0ef16SKarl Rupp compressed row storage), is fully compatible with standard Fortran 77 284e4a0ef16SKarl Rupp storage. That is, the stored row and column indices can begin at 285e4a0ef16SKarl Rupp either one (as in Fortran) or zero. See the users' manual for details. 286e4a0ef16SKarl Rupp 287e4a0ef16SKarl Rupp Specify the preallocated storage with either nz or nnz (not both). 288e4a0ef16SKarl Rupp Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 289e4a0ef16SKarl Rupp allocation. For large problems you MUST preallocate memory or you 290e4a0ef16SKarl Rupp will get TERRIBLE performance, see the users' manual chapter on matrices. 291e4a0ef16SKarl Rupp 292e4a0ef16SKarl Rupp Level: intermediate 293e4a0ef16SKarl Rupp 294e9e886b6SKarl Rupp .seealso: MatCreate(), MatCreateAIJ(), MatCreateAIJCUSPARSE(), MatSetValues(), MatSeqAIJSetColumnIndices(), MatCreateSeqAIJWithArrays(), MatCreateAIJ() 295e4a0ef16SKarl Rupp 296e4a0ef16SKarl Rupp @*/ 297e4a0ef16SKarl Rupp PetscErrorCode MatCreateSeqAIJViennaCL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 298e4a0ef16SKarl Rupp { 299e4a0ef16SKarl Rupp PetscErrorCode ierr; 300e4a0ef16SKarl Rupp 301e4a0ef16SKarl Rupp PetscFunctionBegin; 302e4a0ef16SKarl Rupp ierr = MatCreate(comm,A);CHKERRQ(ierr); 303e4a0ef16SKarl Rupp ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 304e4a0ef16SKarl Rupp ierr = MatSetType(*A,MATSEQAIJVIENNACL);CHKERRQ(ierr); 305e4a0ef16SKarl Rupp ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,(PetscInt*)nnz);CHKERRQ(ierr); 306e4a0ef16SKarl Rupp PetscFunctionReturn(0); 307e4a0ef16SKarl Rupp } 308e4a0ef16SKarl Rupp 309e4a0ef16SKarl Rupp 310e4a0ef16SKarl Rupp PetscErrorCode MatDestroy_SeqAIJViennaCL(Mat A) 311e4a0ef16SKarl Rupp { 312e4a0ef16SKarl Rupp PetscErrorCode ierr; 313e4a0ef16SKarl Rupp Mat_SeqAIJViennaCL *viennaclcontainer = (Mat_SeqAIJViennaCL*)A->spptr; 314e4a0ef16SKarl Rupp 315e4a0ef16SKarl Rupp PetscFunctionBegin; 316e4a0ef16SKarl Rupp try { 3176447cd05SKarl Rupp if (viennaclcontainer) { 3186447cd05SKarl Rupp delete viennaclcontainer->tempvec; 3196447cd05SKarl Rupp delete viennaclcontainer->mat; 3206447cd05SKarl Rupp delete viennaclcontainer->compressed_mat; 321e4a0ef16SKarl Rupp delete viennaclcontainer; 3226447cd05SKarl Rupp } 323b8ced49eSKarl Rupp A->valid_GPU_matrix = PETSC_OFFLOAD_UNALLOCATED; 3244076e183SKarl Rupp } catch(std::exception const & ex) { 3254076e183SKarl Rupp SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what()); 326e4a0ef16SKarl Rupp } 3278713a8baSPatrick Sanan 3288713a8baSPatrick Sanan ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqaij_seqaijviennacl_C",NULL);CHKERRQ(ierr); 3298713a8baSPatrick Sanan 330e4a0ef16SKarl Rupp /* this next line is because MatDestroy tries to PetscFree spptr if it is not zero, and PetscFree only works if the memory was allocated with PetscNew or PetscMalloc, which don't call the constructor */ 331e4a0ef16SKarl Rupp A->spptr = 0; 332e4a0ef16SKarl Rupp ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 333e4a0ef16SKarl Rupp PetscFunctionReturn(0); 334e4a0ef16SKarl Rupp } 335e4a0ef16SKarl Rupp 336e4a0ef16SKarl Rupp 337e4a0ef16SKarl Rupp PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJViennaCL(Mat B) 338e4a0ef16SKarl Rupp { 339e4a0ef16SKarl Rupp PetscErrorCode ierr; 340e4a0ef16SKarl Rupp 341e4a0ef16SKarl Rupp PetscFunctionBegin; 342e4a0ef16SKarl Rupp ierr = MatCreate_SeqAIJ(B);CHKERRQ(ierr); 3438713a8baSPatrick Sanan ierr = MatConvert_SeqAIJ_SeqAIJViennaCL(B,MATSEQAIJVIENNACL,MAT_INPLACE_MATRIX,&B); 3448713a8baSPatrick Sanan PetscFunctionReturn(0); 3458713a8baSPatrick Sanan } 3468713a8baSPatrick Sanan 347c3cca76eSKarl Rupp static PetscErrorCode MatDuplicate_SeqAIJViennaCL(Mat A,MatDuplicateOption cpvalues,Mat *B) 348c3cca76eSKarl Rupp { 349c3cca76eSKarl Rupp PetscErrorCode ierr; 350c3cca76eSKarl Rupp Mat C; 351c3cca76eSKarl Rupp 352c3cca76eSKarl Rupp PetscFunctionBegin; 353c3cca76eSKarl Rupp ierr = MatDuplicate_SeqAIJ(A,cpvalues,B);CHKERRQ(ierr); 354c3cca76eSKarl Rupp C = *B; 355c3cca76eSKarl Rupp 356c3cca76eSKarl Rupp C->ops->mult = MatMult_SeqAIJViennaCL; 357c3cca76eSKarl Rupp C->ops->multadd = MatMultAdd_SeqAIJViennaCL; 358c3cca76eSKarl Rupp C->ops->assemblyend = MatAssemblyEnd_SeqAIJViennaCL; 359c3cca76eSKarl Rupp C->ops->destroy = MatDestroy_SeqAIJViennaCL; 360c3cca76eSKarl Rupp C->ops->duplicate = MatDuplicate_SeqAIJViennaCL; 361c3cca76eSKarl Rupp 362c3cca76eSKarl Rupp C->spptr = new Mat_SeqAIJViennaCL(); 363c3cca76eSKarl Rupp ((Mat_SeqAIJViennaCL*)C->spptr)->tempvec = NULL; 364c3cca76eSKarl Rupp ((Mat_SeqAIJViennaCL*)C->spptr)->mat = NULL; 365c3cca76eSKarl Rupp ((Mat_SeqAIJViennaCL*)C->spptr)->compressed_mat = NULL; 366c3cca76eSKarl Rupp 367c3cca76eSKarl Rupp ierr = PetscObjectChangeTypeName((PetscObject)C,MATSEQAIJVIENNACL);CHKERRQ(ierr); 368c3cca76eSKarl Rupp 369b8ced49eSKarl Rupp C->valid_GPU_matrix = PETSC_OFFLOAD_UNALLOCATED; 370c3cca76eSKarl Rupp 371c3cca76eSKarl Rupp /* If the source matrix is already assembled, copy the destination matrix to the GPU */ 372c3cca76eSKarl Rupp if (C->assembled) { 373c3cca76eSKarl Rupp ierr = MatViennaCLCopyToGPU(C);CHKERRQ(ierr); 374c3cca76eSKarl Rupp } 375c3cca76eSKarl Rupp 376c3cca76eSKarl Rupp PetscFunctionReturn(0); 377c3cca76eSKarl Rupp } 378c3cca76eSKarl Rupp 3798713a8baSPatrick Sanan PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJViennaCL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 3808713a8baSPatrick Sanan { 3818713a8baSPatrick Sanan PetscErrorCode ierr; 3828713a8baSPatrick Sanan Mat B; 3838713a8baSPatrick Sanan Mat_SeqAIJ *aij; 3848713a8baSPatrick Sanan 3858713a8baSPatrick Sanan PetscFunctionBegin; 3868713a8baSPatrick Sanan 3878713a8baSPatrick Sanan if (reuse == MAT_REUSE_MATRIX) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"MAT_REUSE_MATRIX is not supported. Consider using MAT_INPLACE_MATRIX instead"); 3888713a8baSPatrick Sanan 3898713a8baSPatrick Sanan if (reuse == MAT_INITIAL_MATRIX) { 3908713a8baSPatrick Sanan ierr = MatDuplicate(A,MAT_COPY_VALUES,newmat);CHKERRQ(ierr); 3918713a8baSPatrick Sanan } 3928713a8baSPatrick Sanan 3938713a8baSPatrick Sanan B = *newmat; 3948713a8baSPatrick Sanan 395e4a0ef16SKarl Rupp aij = (Mat_SeqAIJ*)B->data; 396e4a0ef16SKarl Rupp aij->inode.use = PETSC_FALSE; 3978713a8baSPatrick Sanan 398e4a0ef16SKarl Rupp B->ops->mult = MatMult_SeqAIJViennaCL; 399e4a0ef16SKarl Rupp B->ops->multadd = MatMultAdd_SeqAIJViennaCL; 400e4a0ef16SKarl Rupp B->spptr = new Mat_SeqAIJViennaCL(); 401e4a0ef16SKarl Rupp 402a3430c56SKarl Rupp ((Mat_SeqAIJViennaCL*)B->spptr)->tempvec = NULL; 403a3430c56SKarl Rupp ((Mat_SeqAIJViennaCL*)B->spptr)->mat = NULL; 404a3430c56SKarl Rupp ((Mat_SeqAIJViennaCL*)B->spptr)->compressed_mat = NULL; 405e4a0ef16SKarl Rupp 406e4a0ef16SKarl Rupp B->ops->assemblyend = MatAssemblyEnd_SeqAIJViennaCL; 407e4a0ef16SKarl Rupp B->ops->destroy = MatDestroy_SeqAIJViennaCL; 408c3cca76eSKarl Rupp B->ops->duplicate = MatDuplicate_SeqAIJViennaCL; 409e4a0ef16SKarl Rupp 410e4a0ef16SKarl Rupp ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJVIENNACL);CHKERRQ(ierr); 411*34136279SStefano Zampini ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr); 412*34136279SStefano Zampini ierr = PetscStrallocpy(VECVIENNACL,&B->defaultvectype);CHKERRQ(ierr); 413e4a0ef16SKarl Rupp 4148713a8baSPatrick Sanan ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqaij_seqaijviennacl_C",MatConvert_SeqAIJ_SeqAIJViennaCL);CHKERRQ(ierr); 4158713a8baSPatrick Sanan 416b8ced49eSKarl Rupp B->valid_GPU_matrix = PETSC_OFFLOAD_UNALLOCATED; 4178713a8baSPatrick Sanan 4188713a8baSPatrick Sanan /* If the source matrix is already assembled, copy the destination matrix to the GPU */ 4198713a8baSPatrick Sanan if (B->assembled) { 4208713a8baSPatrick Sanan ierr = MatViennaCLCopyToGPU(B);CHKERRQ(ierr); 4218713a8baSPatrick Sanan } 4228713a8baSPatrick Sanan 423e4a0ef16SKarl Rupp PetscFunctionReturn(0); 424e4a0ef16SKarl Rupp } 425e4a0ef16SKarl Rupp 426e4a0ef16SKarl Rupp 4273ca39a21SBarry Smith /*MC 428e4a0ef16SKarl Rupp MATSEQAIJVIENNACL - MATAIJVIENNACL = "aijviennacl" = "seqaijviennacl" - A matrix type to be used for sparse matrices. 429e4a0ef16SKarl Rupp 430e4a0ef16SKarl Rupp A matrix type type whose data resides on GPUs. These matrices are in CSR format by 431e4a0ef16SKarl Rupp default. All matrix calculations are performed using the ViennaCL library. 432e4a0ef16SKarl Rupp 433e4a0ef16SKarl Rupp Options Database Keys: 434e4a0ef16SKarl Rupp + -mat_type aijviennacl - sets the matrix type to "seqaijviennacl" during a call to MatSetFromOptions() 435e4a0ef16SKarl Rupp . -mat_viennacl_storage_format csr - sets the storage format of matrices for MatMult during a call to MatSetFromOptions(). 436e4a0ef16SKarl Rupp - -mat_viennacl_mult_storage_format csr - sets the storage format of matrices for MatMult during a call to MatSetFromOptions(). 437e4a0ef16SKarl Rupp 438e4a0ef16SKarl Rupp Level: beginner 439e4a0ef16SKarl Rupp 440e4a0ef16SKarl Rupp .seealso: MatCreateSeqAIJViennaCL(), MATAIJVIENNACL, MatCreateAIJViennaCL() 441e4a0ef16SKarl Rupp M*/ 442e4a0ef16SKarl Rupp 44372367587SKarl Rupp 4443ca39a21SBarry Smith PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ViennaCL(void) 44572367587SKarl Rupp { 44672367587SKarl Rupp PetscErrorCode ierr; 44772367587SKarl Rupp 44872367587SKarl Rupp PetscFunctionBegin; 4493ca39a21SBarry Smith ierr = MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_LU,MatGetFactor_seqaij_petsc);CHKERRQ(ierr); 4503ca39a21SBarry Smith ierr = MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_CHOLESKY,MatGetFactor_seqaij_petsc);CHKERRQ(ierr); 4513ca39a21SBarry Smith ierr = MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_ILU,MatGetFactor_seqaij_petsc);CHKERRQ(ierr); 4523ca39a21SBarry Smith ierr = MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_ICC,MatGetFactor_seqaij_petsc);CHKERRQ(ierr); 45372367587SKarl Rupp PetscFunctionReturn(0); 45472367587SKarl Rupp } 455