1e4a0ef16SKarl Rupp 2e4a0ef16SKarl Rupp 3e4a0ef16SKarl Rupp /* 4e4a0ef16SKarl Rupp Defines the basic matrix operations for the AIJ (compressed row) 5e4a0ef16SKarl Rupp matrix storage format. 6e4a0ef16SKarl Rupp */ 7e4a0ef16SKarl Rupp 8aaa7dc30SBarry Smith #include <petscconf.h> 9aaa7dc30SBarry Smith #include <../src/mat/impls/aij/seq/aij.h> /*I "petscmat.h" I*/ 10aaa7dc30SBarry Smith #include <petscbt.h> 11aaa7dc30SBarry Smith #include <../src/vec/vec/impls/dvecimpl.h> 12aaa7dc30SBarry Smith #include <petsc-private/vecimpl.h> 13e4a0ef16SKarl Rupp 14aaa7dc30SBarry Smith #include <../src/mat/impls/aij/seq/seqviennacl/viennaclmatimpl.h> 15e4a0ef16SKarl Rupp 16e4a0ef16SKarl Rupp 17e4a0ef16SKarl Rupp #include <algorithm> 18e4a0ef16SKarl Rupp #include <vector> 19e4a0ef16SKarl Rupp #include <string> 20e4a0ef16SKarl Rupp 21e4a0ef16SKarl Rupp #include "viennacl/linalg/prod.hpp" 22e4a0ef16SKarl Rupp 23e4a0ef16SKarl Rupp #undef __FUNCT__ 24e4a0ef16SKarl Rupp #define __FUNCT__ "MatViennaCLCopyToGPU" 25e4a0ef16SKarl Rupp PetscErrorCode MatViennaCLCopyToGPU(Mat A) 26e4a0ef16SKarl Rupp { 27e4a0ef16SKarl Rupp 28e4a0ef16SKarl Rupp Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr; 29e4a0ef16SKarl Rupp Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 30e4a0ef16SKarl Rupp PetscErrorCode ierr; 31e4a0ef16SKarl Rupp 32e4a0ef16SKarl Rupp 33e4a0ef16SKarl Rupp PetscFunctionBegin; 3467c87b7fSKarl Rupp if (A->rmap->n > 0 && A->cmap->n > 0) { //some OpenCL SDKs have issues with buffers of size 0 35e4a0ef16SKarl Rupp if (A->valid_GPU_matrix == PETSC_VIENNACL_UNALLOCATED || A->valid_GPU_matrix == PETSC_VIENNACL_CPU) { 36e4a0ef16SKarl Rupp ierr = PetscLogEventBegin(MAT_ViennaCLCopyToGPU,A,0,0,0);CHKERRQ(ierr); 37e4a0ef16SKarl Rupp 38e4a0ef16SKarl Rupp try { 391fc5b511SKarl Rupp ierr = PetscObjectSetFromOptions_ViennaCL((PetscObject)A);CHKERRQ(ierr); /* Allows to set device type before allocating any objects */ 40e4a0ef16SKarl Rupp if (a->compressedrow.use) { 41*a3430c56SKarl Rupp if (!viennaclstruct->compressed_mat) viennaclstruct->compressed_mat = new ViennaCLCompressedAIJMatrix(); 42e4a0ef16SKarl Rupp 43*a3430c56SKarl Rupp // Since PetscInt is different from cl_uint, we have to convert: 44*a3430c56SKarl Rupp viennacl::backend::mem_handle dummy; 45e4a0ef16SKarl Rupp 46*a3430c56SKarl Rupp viennacl::backend::typesafe_host_array<unsigned int> row_buffer; row_buffer.raw_resize(dummy, a->compressedrow.nrows+1); 47*a3430c56SKarl Rupp for (PetscInt i=0; i<=a->compressedrow.nrows; ++i) 48*a3430c56SKarl Rupp row_buffer.set(i, (a->compressedrow.i)[i]); 49e4a0ef16SKarl Rupp 50*a3430c56SKarl Rupp viennacl::backend::typesafe_host_array<unsigned int> row_indices; row_indices.raw_resize(dummy, a->compressedrow.nrows); 51*a3430c56SKarl Rupp for (PetscInt i=0; i<a->compressedrow.nrows; ++i) 52*a3430c56SKarl Rupp row_indices.set(i, (a->compressedrow.rindex)[i]); 53*a3430c56SKarl Rupp 54*a3430c56SKarl Rupp viennacl::backend::typesafe_host_array<unsigned int> col_buffer; col_buffer.raw_resize(dummy, a->nz); 55*a3430c56SKarl Rupp for (PetscInt i=0; i<a->nz; ++i) 56*a3430c56SKarl Rupp col_buffer.set(i, (a->j)[i]); 57*a3430c56SKarl Rupp 58*a3430c56SKarl Rupp viennaclstruct->compressed_mat->set(row_buffer.get(), row_indices.get(), col_buffer.get(), a->a, A->rmap->n, A->cmap->n, a->compressedrow.nrows, a->nz); 59e4a0ef16SKarl Rupp } else { 60*a3430c56SKarl Rupp if (!viennaclstruct->mat) viennaclstruct->mat = new ViennaCLAIJMatrix(); 61e4a0ef16SKarl Rupp 62e4a0ef16SKarl Rupp // Since PetscInt is in general different from cl_uint, we have to convert: 63e4a0ef16SKarl Rupp viennacl::backend::mem_handle dummy; 64e4a0ef16SKarl Rupp 65e4a0ef16SKarl Rupp viennacl::backend::typesafe_host_array<unsigned int> row_buffer; row_buffer.raw_resize(dummy, A->rmap->n+1); 66e4a0ef16SKarl Rupp for (PetscInt i=0; i<=A->rmap->n; ++i) 67e4a0ef16SKarl Rupp row_buffer.set(i, (a->i)[i]); 68e4a0ef16SKarl Rupp 69e4a0ef16SKarl Rupp viennacl::backend::typesafe_host_array<unsigned int> col_buffer; col_buffer.raw_resize(dummy, a->nz); 70e4a0ef16SKarl Rupp for (PetscInt i=0; i<a->nz; ++i) 71e4a0ef16SKarl Rupp col_buffer.set(i, (a->j)[i]); 72e4a0ef16SKarl Rupp 73e4a0ef16SKarl Rupp viennaclstruct->mat->set(row_buffer.get(), col_buffer.get(), a->a, A->rmap->n, A->cmap->n, a->nz); 74e4a0ef16SKarl Rupp } 754cf1874eSKarl Rupp ViennaCLWaitForGPU(); 764076e183SKarl Rupp } catch(std::exception const & ex) { 774076e183SKarl Rupp SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what()); 78e4a0ef16SKarl Rupp } 79e4a0ef16SKarl Rupp 80*a3430c56SKarl Rupp // Create temporary vector for v += A*x: 81*a3430c56SKarl Rupp if (viennaclstruct->tempvec) { 82*a3430c56SKarl Rupp if (viennaclstruct->tempvec->size() != static_cast<std::size_t>(a->nz)) { 83*a3430c56SKarl Rupp delete (ViennaCLVector*)viennaclstruct->tempvec; 84*a3430c56SKarl Rupp viennaclstruct->tempvec = new ViennaCLVector(a->nz); 85*a3430c56SKarl Rupp } else { 86*a3430c56SKarl Rupp viennaclstruct->tempvec->clear(); 87*a3430c56SKarl Rupp } 88*a3430c56SKarl Rupp } else { 89*a3430c56SKarl Rupp viennaclstruct->tempvec = new ViennaCLVector(a->nz); 90*a3430c56SKarl Rupp } 91*a3430c56SKarl Rupp 92e4a0ef16SKarl Rupp A->valid_GPU_matrix = PETSC_VIENNACL_BOTH; 93e4a0ef16SKarl Rupp 94e4a0ef16SKarl Rupp ierr = PetscLogEventEnd(MAT_ViennaCLCopyToGPU,A,0,0,0);CHKERRQ(ierr); 95e4a0ef16SKarl Rupp } 9667c87b7fSKarl Rupp } 97e4a0ef16SKarl Rupp PetscFunctionReturn(0); 98e4a0ef16SKarl Rupp } 99e4a0ef16SKarl Rupp 100e4a0ef16SKarl Rupp #undef __FUNCT__ 101e4a0ef16SKarl Rupp #define __FUNCT__ "MatViennaCLCopyFromGPU" 1020d73d530SKarl Rupp PetscErrorCode MatViennaCLCopyFromGPU(Mat A, const ViennaCLAIJMatrix *Agpu) 103e4a0ef16SKarl Rupp { 104e4a0ef16SKarl Rupp Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 105e4a0ef16SKarl Rupp PetscInt m = A->rmap->n; 106e4a0ef16SKarl Rupp PetscErrorCode ierr; 107e4a0ef16SKarl Rupp 108e4a0ef16SKarl Rupp 109e4a0ef16SKarl Rupp PetscFunctionBegin; 110e4a0ef16SKarl Rupp if (A->valid_GPU_matrix == PETSC_VIENNACL_UNALLOCATED) { 111e4a0ef16SKarl Rupp try { 112e4a0ef16SKarl Rupp if (a->compressedrow.use) { 113e4a0ef16SKarl Rupp SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_ARG_WRONG, "ViennaCL: Cannot handle row compression for GPU matrices"); 114e4a0ef16SKarl Rupp } else { 115e4a0ef16SKarl Rupp 116e4a0ef16SKarl Rupp if ((PetscInt)Agpu->size1() != m) SETERRQ2(PETSC_COMM_WORLD, PETSC_ERR_ARG_SIZ, "GPU matrix has %d rows, should be %d", Agpu->size1(), m); 117e4a0ef16SKarl Rupp a->nz = Agpu->nnz(); 118e4a0ef16SKarl Rupp a->maxnz = a->nz; /* Since we allocate exactly the right amount */ 119e4a0ef16SKarl Rupp A->preallocated = PETSC_TRUE; 120e4a0ef16SKarl Rupp if (a->singlemalloc) { 121e4a0ef16SKarl Rupp if (a->a) {ierr = PetscFree3(a->a,a->j,a->i);CHKERRQ(ierr);} 122e4a0ef16SKarl Rupp } else { 123e4a0ef16SKarl Rupp if (a->i) {ierr = PetscFree(a->i);CHKERRQ(ierr);} 124e4a0ef16SKarl Rupp if (a->j) {ierr = PetscFree(a->j);CHKERRQ(ierr);} 125e4a0ef16SKarl Rupp if (a->a) {ierr = PetscFree(a->a);CHKERRQ(ierr);} 126e4a0ef16SKarl Rupp } 127dcca6d9dSJed Brown ierr = PetscMalloc3(a->nz,&a->a,a->nz,&a->j,m+1,&a->i);CHKERRQ(ierr); 128f7daeb2aSKarl Rupp ierr = PetscLogObjectMemory((PetscObject)A, a->nz*(sizeof(PetscScalar)+sizeof(PetscInt))+(m+1)*sizeof(PetscInt));CHKERRQ(ierr); 129e4a0ef16SKarl Rupp 130e4a0ef16SKarl Rupp a->singlemalloc = PETSC_TRUE; 131e4a0ef16SKarl Rupp 132e4a0ef16SKarl Rupp /* Setup row lengths */ 133e4a0ef16SKarl Rupp if (a->imax) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 134dcca6d9dSJed Brown ierr = PetscMalloc2(m,&a->imax,m,&a->ilen);CHKERRQ(ierr); 135f7daeb2aSKarl Rupp ierr = PetscLogObjectMemory((PetscObject)A, 2*m*sizeof(PetscInt));CHKERRQ(ierr); 136e4a0ef16SKarl Rupp 137e4a0ef16SKarl Rupp /* Copy data back from GPU */ 138e4a0ef16SKarl Rupp viennacl::backend::typesafe_host_array<unsigned int> row_buffer; row_buffer.raw_resize(Agpu->handle1(), Agpu->size1() + 1); 139e4a0ef16SKarl Rupp 140e4a0ef16SKarl Rupp // copy row array 141e4a0ef16SKarl Rupp viennacl::backend::memory_read(Agpu->handle1(), 0, row_buffer.raw_size(), row_buffer.get()); 142e4a0ef16SKarl Rupp (a->i)[0] = row_buffer[0]; 143e4a0ef16SKarl Rupp for (PetscInt i = 0; i < (PetscInt)Agpu->size1(); ++i) { 144e4a0ef16SKarl Rupp (a->i)[i+1] = row_buffer[i+1]; 145e4a0ef16SKarl Rupp a->imax[i] = a->ilen[i] = a->i[i+1] - a->i[i]; //Set imax[] and ilen[] arrays at the same time as i[] for better cache reuse 146e4a0ef16SKarl Rupp } 147e4a0ef16SKarl Rupp 148e4a0ef16SKarl Rupp // copy column indices 149e4a0ef16SKarl Rupp viennacl::backend::typesafe_host_array<unsigned int> col_buffer; col_buffer.raw_resize(Agpu->handle2(), Agpu->nnz()); 150e4a0ef16SKarl Rupp viennacl::backend::memory_read(Agpu->handle2(), 0, col_buffer.raw_size(), col_buffer.get()); 151e4a0ef16SKarl Rupp for (PetscInt i=0; i < (PetscInt)Agpu->nnz(); ++i) 152e4a0ef16SKarl Rupp (a->j)[i] = col_buffer[i]; 153e4a0ef16SKarl Rupp 154e4a0ef16SKarl Rupp // copy nonzero entries directly to destination (no conversion required) 155e4a0ef16SKarl Rupp viennacl::backend::memory_read(Agpu->handle(), 0, sizeof(PetscScalar)*Agpu->nnz(), a->a); 156e4a0ef16SKarl Rupp 1574cf1874eSKarl Rupp ViennaCLWaitForGPU(); 158023073b3SKarl Rupp /* TODO: Once a->diag is moved out of MatAssemblyEnd(), invalidate it here. */ 159e4a0ef16SKarl Rupp } 1604076e183SKarl Rupp } catch(std::exception const & ex) { 1614076e183SKarl Rupp SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what()); 162e4a0ef16SKarl Rupp } 163e4a0ef16SKarl Rupp 164e4a0ef16SKarl Rupp /* This assembly prevents resetting the flag to PETSC_VIENNACL_CPU and recopying */ 165e4a0ef16SKarl Rupp ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 166e4a0ef16SKarl Rupp ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 167e4a0ef16SKarl Rupp 168e4a0ef16SKarl Rupp A->valid_GPU_matrix = PETSC_VIENNACL_BOTH; 169e4a0ef16SKarl Rupp } else { 170e4a0ef16SKarl Rupp SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_ARG_WRONG, "ViennaCL error: Only valid for unallocated GPU matrices"); 171e4a0ef16SKarl Rupp } 172e4a0ef16SKarl Rupp PetscFunctionReturn(0); 173e4a0ef16SKarl Rupp } 174e4a0ef16SKarl Rupp 175e4a0ef16SKarl Rupp #undef __FUNCT__ 176e4a0ef16SKarl Rupp #define __FUNCT__ "MatGetVecs_SeqAIJViennaCL" 177e4a0ef16SKarl Rupp PetscErrorCode MatGetVecs_SeqAIJViennaCL(Mat mat, Vec *right, Vec *left) 178e4a0ef16SKarl Rupp { 179e4a0ef16SKarl Rupp PetscErrorCode ierr; 180e4a0ef16SKarl Rupp 181e4a0ef16SKarl Rupp PetscFunctionBegin; 182e4a0ef16SKarl Rupp if (right) { 183e4a0ef16SKarl Rupp ierr = VecCreate(PetscObjectComm((PetscObject)mat),right);CHKERRQ(ierr); 184e4a0ef16SKarl Rupp ierr = VecSetSizes(*right,mat->cmap->n,PETSC_DETERMINE);CHKERRQ(ierr); 185e4a0ef16SKarl Rupp ierr = VecSetBlockSize(*right,mat->rmap->bs);CHKERRQ(ierr); 186e4a0ef16SKarl Rupp ierr = VecSetType(*right,VECSEQVIENNACL);CHKERRQ(ierr); 187e4a0ef16SKarl Rupp ierr = PetscLayoutReference(mat->cmap,&(*right)->map);CHKERRQ(ierr); 188e4a0ef16SKarl Rupp } 189e4a0ef16SKarl Rupp if (left) { 190e4a0ef16SKarl Rupp ierr = VecCreate(PetscObjectComm((PetscObject)mat),left);CHKERRQ(ierr); 191e4a0ef16SKarl Rupp ierr = VecSetSizes(*left,mat->rmap->n,PETSC_DETERMINE);CHKERRQ(ierr); 192e4a0ef16SKarl Rupp ierr = VecSetBlockSize(*left,mat->rmap->bs);CHKERRQ(ierr); 193e4a0ef16SKarl Rupp ierr = VecSetType(*left,VECSEQVIENNACL);CHKERRQ(ierr); 194e4a0ef16SKarl Rupp ierr = PetscLayoutReference(mat->rmap,&(*left)->map);CHKERRQ(ierr); 195e4a0ef16SKarl Rupp } 196e4a0ef16SKarl Rupp PetscFunctionReturn(0); 197e4a0ef16SKarl Rupp } 198e4a0ef16SKarl Rupp 199e4a0ef16SKarl Rupp #undef __FUNCT__ 200e4a0ef16SKarl Rupp #define __FUNCT__ "MatMult_SeqAIJViennaCL" 201e4a0ef16SKarl Rupp PetscErrorCode MatMult_SeqAIJViennaCL(Mat A,Vec xx,Vec yy) 202e4a0ef16SKarl Rupp { 203e4a0ef16SKarl Rupp Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 204e4a0ef16SKarl Rupp PetscErrorCode ierr; 205e4a0ef16SKarl Rupp Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr; 2060d73d530SKarl Rupp const ViennaCLVector *xgpu=NULL; 2070d73d530SKarl Rupp ViennaCLVector *ygpu=NULL; 208e4a0ef16SKarl Rupp 209e4a0ef16SKarl Rupp PetscFunctionBegin; 21067c87b7fSKarl Rupp if (A->rmap->n > 0 && A->cmap->n > 0) { 211e4a0ef16SKarl Rupp ierr = VecViennaCLGetArrayRead(xx,&xgpu);CHKERRQ(ierr); 212e4a0ef16SKarl Rupp ierr = VecViennaCLGetArrayWrite(yy,&ygpu);CHKERRQ(ierr); 213e4a0ef16SKarl Rupp try { 214e4a0ef16SKarl Rupp *ygpu = viennacl::linalg::prod(*viennaclstruct->mat,*xgpu); 2154cf1874eSKarl Rupp ViennaCLWaitForGPU(); 2164076e183SKarl Rupp } catch (std::exception const & ex) { 2174076e183SKarl Rupp SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what()); 218e4a0ef16SKarl Rupp } 219e4a0ef16SKarl Rupp ierr = VecViennaCLRestoreArrayRead(xx,&xgpu);CHKERRQ(ierr); 220e4a0ef16SKarl Rupp ierr = VecViennaCLRestoreArrayWrite(yy,&ygpu);CHKERRQ(ierr); 221e4a0ef16SKarl Rupp ierr = PetscLogFlops(2.0*a->nz - viennaclstruct->mat->nnz());CHKERRQ(ierr); 22267c87b7fSKarl Rupp } 223e4a0ef16SKarl Rupp PetscFunctionReturn(0); 224e4a0ef16SKarl Rupp } 225e4a0ef16SKarl Rupp 226e4a0ef16SKarl Rupp 227e4a0ef16SKarl Rupp 228e4a0ef16SKarl Rupp #undef __FUNCT__ 229e4a0ef16SKarl Rupp #define __FUNCT__ "MatMultAdd_SeqAIJViennaCL" 230e4a0ef16SKarl Rupp PetscErrorCode MatMultAdd_SeqAIJViennaCL(Mat A,Vec xx,Vec yy,Vec zz) 231e4a0ef16SKarl Rupp { 232e4a0ef16SKarl Rupp Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 233e4a0ef16SKarl Rupp PetscErrorCode ierr; 234e4a0ef16SKarl Rupp Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr; 2350d73d530SKarl Rupp const ViennaCLVector *xgpu=NULL,*ygpu=NULL; 2360d73d530SKarl Rupp ViennaCLVector *zgpu=NULL; 237e4a0ef16SKarl Rupp 238e4a0ef16SKarl Rupp PetscFunctionBegin; 23967c87b7fSKarl Rupp if (A->rmap->n > 0 && A->cmap->n > 0) { 240e4a0ef16SKarl Rupp try { 241e4a0ef16SKarl Rupp ierr = VecViennaCLGetArrayRead(xx,&xgpu);CHKERRQ(ierr); 242e4a0ef16SKarl Rupp ierr = VecViennaCLGetArrayRead(yy,&ygpu);CHKERRQ(ierr); 243e4a0ef16SKarl Rupp ierr = VecViennaCLGetArrayWrite(zz,&zgpu);CHKERRQ(ierr); 244e4a0ef16SKarl Rupp 245e4a0ef16SKarl Rupp if (a->compressedrow.use) { 246*a3430c56SKarl Rupp ViennaCLVector temp = viennacl::linalg::prod(*viennaclstruct->compressed_mat, *xgpu); 247e4a0ef16SKarl Rupp *zgpu = *ygpu + temp; 2484cf1874eSKarl Rupp ViennaCLWaitForGPU(); 249e4a0ef16SKarl Rupp } else { 250*a3430c56SKarl Rupp if (zz == xx || zz == yy) { //temporary required 251*a3430c56SKarl Rupp ViennaCLVector temp = viennacl::linalg::prod(*viennaclstruct->mat, *xgpu); 252*a3430c56SKarl Rupp *zgpu = *ygpu; 253*a3430c56SKarl Rupp *zgpu += temp; 254*a3430c56SKarl Rupp ViennaCLWaitForGPU(); 255*a3430c56SKarl Rupp } else { 256*a3430c56SKarl Rupp *viennaclstruct->tempvec = viennacl::linalg::prod(*viennaclstruct->mat, *xgpu); 257*a3430c56SKarl Rupp *zgpu = *ygpu + *viennaclstruct->tempvec; 2584cf1874eSKarl Rupp ViennaCLWaitForGPU(); 259e4a0ef16SKarl Rupp } 260e4a0ef16SKarl Rupp } 261e4a0ef16SKarl Rupp 262e4a0ef16SKarl Rupp ierr = VecViennaCLRestoreArrayRead(xx,&xgpu);CHKERRQ(ierr); 263e4a0ef16SKarl Rupp ierr = VecViennaCLRestoreArrayRead(yy,&ygpu);CHKERRQ(ierr); 264e4a0ef16SKarl Rupp ierr = VecViennaCLRestoreArrayWrite(zz,&zgpu);CHKERRQ(ierr); 265e4a0ef16SKarl Rupp 2664076e183SKarl Rupp } catch(std::exception const & ex) { 2674076e183SKarl Rupp SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what()); 268e4a0ef16SKarl Rupp } 269e4a0ef16SKarl Rupp ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 27067c87b7fSKarl Rupp } 271e4a0ef16SKarl Rupp PetscFunctionReturn(0); 272e4a0ef16SKarl Rupp } 273e4a0ef16SKarl Rupp 274e4a0ef16SKarl Rupp #undef __FUNCT__ 275e4a0ef16SKarl Rupp #define __FUNCT__ "MatAssemblyEnd_SeqAIJViennaCL" 276e4a0ef16SKarl Rupp PetscErrorCode MatAssemblyEnd_SeqAIJViennaCL(Mat A,MatAssemblyType mode) 277e4a0ef16SKarl Rupp { 278e4a0ef16SKarl Rupp PetscErrorCode ierr; 279e4a0ef16SKarl Rupp 280e4a0ef16SKarl Rupp PetscFunctionBegin; 281e4a0ef16SKarl Rupp ierr = MatAssemblyEnd_SeqAIJ(A,mode);CHKERRQ(ierr); 282e4a0ef16SKarl Rupp ierr = MatViennaCLCopyToGPU(A);CHKERRQ(ierr); 283e4a0ef16SKarl Rupp if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 284e4a0ef16SKarl Rupp A->ops->mult = MatMult_SeqAIJViennaCL; 285e4a0ef16SKarl Rupp A->ops->multadd = MatMultAdd_SeqAIJViennaCL; 286e4a0ef16SKarl Rupp PetscFunctionReturn(0); 287e4a0ef16SKarl Rupp } 288e4a0ef16SKarl Rupp 289e4a0ef16SKarl Rupp /* --------------------------------------------------------------------------------*/ 290e4a0ef16SKarl Rupp #undef __FUNCT__ 291e4a0ef16SKarl Rupp #define __FUNCT__ "MatCreateSeqAIJViennaCL" 292e4a0ef16SKarl Rupp /*@ 293e4a0ef16SKarl Rupp MatCreateSeqAIJViennaCL - Creates a sparse matrix in AIJ (compressed row) format 29419fddfadSKarl Rupp (the default parallel PETSc format). This matrix will ultimately be pushed down 295e4a0ef16SKarl Rupp to GPUs and use the ViennaCL library for calculations. For good matrix 296e4a0ef16SKarl Rupp assembly performance the user should preallocate the matrix storage by setting 297e4a0ef16SKarl Rupp the parameter nz (or the array nnz). By setting these parameters accurately, 298e4a0ef16SKarl Rupp performance during matrix assembly can be increased substantially. 299e4a0ef16SKarl Rupp 300e4a0ef16SKarl Rupp 301e4a0ef16SKarl Rupp Collective on MPI_Comm 302e4a0ef16SKarl Rupp 303e4a0ef16SKarl Rupp Input Parameters: 304e4a0ef16SKarl Rupp + comm - MPI communicator, set to PETSC_COMM_SELF 305e4a0ef16SKarl Rupp . m - number of rows 306e4a0ef16SKarl Rupp . n - number of columns 307e4a0ef16SKarl Rupp . nz - number of nonzeros per row (same for all rows) 308e4a0ef16SKarl Rupp - nnz - array containing the number of nonzeros in the various rows 309e4a0ef16SKarl Rupp (possibly different for each row) or NULL 310e4a0ef16SKarl Rupp 311e4a0ef16SKarl Rupp Output Parameter: 312e4a0ef16SKarl Rupp . A - the matrix 313e4a0ef16SKarl Rupp 314e4a0ef16SKarl Rupp It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 315e4a0ef16SKarl Rupp MatXXXXSetPreallocation() paradigm instead of this routine directly. 316e4a0ef16SKarl Rupp [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 317e4a0ef16SKarl Rupp 318e4a0ef16SKarl Rupp Notes: 319e4a0ef16SKarl Rupp If nnz is given then nz is ignored 320e4a0ef16SKarl Rupp 321e4a0ef16SKarl Rupp The AIJ format (also called the Yale sparse matrix format or 322e4a0ef16SKarl Rupp compressed row storage), is fully compatible with standard Fortran 77 323e4a0ef16SKarl Rupp storage. That is, the stored row and column indices can begin at 324e4a0ef16SKarl Rupp either one (as in Fortran) or zero. See the users' manual for details. 325e4a0ef16SKarl Rupp 326e4a0ef16SKarl Rupp Specify the preallocated storage with either nz or nnz (not both). 327e4a0ef16SKarl Rupp Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 328e4a0ef16SKarl Rupp allocation. For large problems you MUST preallocate memory or you 329e4a0ef16SKarl Rupp will get TERRIBLE performance, see the users' manual chapter on matrices. 330e4a0ef16SKarl Rupp 331e4a0ef16SKarl Rupp Level: intermediate 332e4a0ef16SKarl Rupp 333e4a0ef16SKarl Rupp .seealso: MatCreate(), MatCreateAIJ(), MatCreateAIJCUSP(), MatSetValues(), MatSeqAIJSetColumnIndices(), MatCreateSeqAIJWithArrays(), MatCreateAIJ() 334e4a0ef16SKarl Rupp 335e4a0ef16SKarl Rupp @*/ 336e4a0ef16SKarl Rupp PetscErrorCode MatCreateSeqAIJViennaCL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 337e4a0ef16SKarl Rupp { 338e4a0ef16SKarl Rupp PetscErrorCode ierr; 339e4a0ef16SKarl Rupp 340e4a0ef16SKarl Rupp PetscFunctionBegin; 341e4a0ef16SKarl Rupp ierr = MatCreate(comm,A);CHKERRQ(ierr); 342e4a0ef16SKarl Rupp ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 343e4a0ef16SKarl Rupp ierr = MatSetType(*A,MATSEQAIJVIENNACL);CHKERRQ(ierr); 344e4a0ef16SKarl Rupp ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,(PetscInt*)nnz);CHKERRQ(ierr); 345e4a0ef16SKarl Rupp PetscFunctionReturn(0); 346e4a0ef16SKarl Rupp } 347e4a0ef16SKarl Rupp 348e4a0ef16SKarl Rupp 349e4a0ef16SKarl Rupp #undef __FUNCT__ 350e4a0ef16SKarl Rupp #define __FUNCT__ "MatDestroy_SeqAIJViennaCL" 351e4a0ef16SKarl Rupp PetscErrorCode MatDestroy_SeqAIJViennaCL(Mat A) 352e4a0ef16SKarl Rupp { 353e4a0ef16SKarl Rupp PetscErrorCode ierr; 354e4a0ef16SKarl Rupp Mat_SeqAIJViennaCL *viennaclcontainer = (Mat_SeqAIJViennaCL*)A->spptr; 355e4a0ef16SKarl Rupp 356e4a0ef16SKarl Rupp PetscFunctionBegin; 357e4a0ef16SKarl Rupp try { 358*a3430c56SKarl Rupp if (!viennaclcontainer->tempvec) delete viennaclcontainer->tempvec; 359*a3430c56SKarl Rupp if (!viennaclcontainer->mat) delete viennaclcontainer->mat; 360*a3430c56SKarl Rupp if (!viennaclcontainer->compressed_mat) delete viennaclcontainer->compressed_mat; 361e4a0ef16SKarl Rupp delete viennaclcontainer; 362e4a0ef16SKarl Rupp A->valid_GPU_matrix = PETSC_VIENNACL_UNALLOCATED; 3634076e183SKarl Rupp } catch(std::exception const & ex) { 3644076e183SKarl Rupp SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what()); 365e4a0ef16SKarl Rupp } 366e4a0ef16SKarl Rupp /* this next line is because MatDestroy tries to PetscFree spptr if it is not zero, and PetscFree only works if the memory was allocated with PetscNew or PetscMalloc, which don't call the constructor */ 367e4a0ef16SKarl Rupp A->spptr = 0; 368e4a0ef16SKarl Rupp ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 369e4a0ef16SKarl Rupp PetscFunctionReturn(0); 370e4a0ef16SKarl Rupp } 371e4a0ef16SKarl Rupp 372e4a0ef16SKarl Rupp 373e4a0ef16SKarl Rupp #undef __FUNCT__ 374e4a0ef16SKarl Rupp #define __FUNCT__ "MatCreate_SeqAIJViennaCL" 375e4a0ef16SKarl Rupp PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJViennaCL(Mat B) 376e4a0ef16SKarl Rupp { 377e4a0ef16SKarl Rupp PetscErrorCode ierr; 378e4a0ef16SKarl Rupp Mat_SeqAIJ *aij; 379e4a0ef16SKarl Rupp 380e4a0ef16SKarl Rupp PetscFunctionBegin; 381e4a0ef16SKarl Rupp ierr = MatCreate_SeqAIJ(B);CHKERRQ(ierr); 382e4a0ef16SKarl Rupp aij = (Mat_SeqAIJ*)B->data; 383e4a0ef16SKarl Rupp aij->inode.use = PETSC_FALSE; 384e4a0ef16SKarl Rupp B->ops->mult = MatMult_SeqAIJViennaCL; 385e4a0ef16SKarl Rupp B->ops->multadd = MatMultAdd_SeqAIJViennaCL; 386e4a0ef16SKarl Rupp B->spptr = new Mat_SeqAIJViennaCL(); 387e4a0ef16SKarl Rupp 388*a3430c56SKarl Rupp ((Mat_SeqAIJViennaCL*)B->spptr)->tempvec = NULL; 389*a3430c56SKarl Rupp ((Mat_SeqAIJViennaCL*)B->spptr)->mat = NULL; 390*a3430c56SKarl Rupp ((Mat_SeqAIJViennaCL*)B->spptr)->compressed_mat = NULL; 391e4a0ef16SKarl Rupp 392e4a0ef16SKarl Rupp B->ops->assemblyend = MatAssemblyEnd_SeqAIJViennaCL; 393e4a0ef16SKarl Rupp B->ops->destroy = MatDestroy_SeqAIJViennaCL; 394e4a0ef16SKarl Rupp B->ops->getvecs = MatGetVecs_SeqAIJViennaCL; 395e4a0ef16SKarl Rupp 396e4a0ef16SKarl Rupp ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJVIENNACL);CHKERRQ(ierr); 397e4a0ef16SKarl Rupp 398e4a0ef16SKarl Rupp B->valid_GPU_matrix = PETSC_VIENNACL_UNALLOCATED; 399e4a0ef16SKarl Rupp PetscFunctionReturn(0); 400e4a0ef16SKarl Rupp } 401e4a0ef16SKarl Rupp 402e4a0ef16SKarl Rupp 403e4a0ef16SKarl Rupp /*M 404e4a0ef16SKarl Rupp MATSEQAIJVIENNACL - MATAIJVIENNACL = "aijviennacl" = "seqaijviennacl" - A matrix type to be used for sparse matrices. 405e4a0ef16SKarl Rupp 406e4a0ef16SKarl Rupp A matrix type type whose data resides on GPUs. These matrices are in CSR format by 407e4a0ef16SKarl Rupp default. All matrix calculations are performed using the ViennaCL library. 408e4a0ef16SKarl Rupp 409e4a0ef16SKarl Rupp Options Database Keys: 410e4a0ef16SKarl Rupp + -mat_type aijviennacl - sets the matrix type to "seqaijviennacl" during a call to MatSetFromOptions() 411e4a0ef16SKarl Rupp . -mat_viennacl_storage_format csr - sets the storage format of matrices for MatMult during a call to MatSetFromOptions(). 412e4a0ef16SKarl Rupp - -mat_viennacl_mult_storage_format csr - sets the storage format of matrices for MatMult during a call to MatSetFromOptions(). 413e4a0ef16SKarl Rupp 414e4a0ef16SKarl Rupp Level: beginner 415e4a0ef16SKarl Rupp 416e4a0ef16SKarl Rupp .seealso: MatCreateSeqAIJViennaCL(), MATAIJVIENNACL, MatCreateAIJViennaCL() 417e4a0ef16SKarl Rupp M*/ 418e4a0ef16SKarl Rupp 419