1e4a0ef16SKarl Rupp 2e4a0ef16SKarl Rupp 3e4a0ef16SKarl Rupp /* 4e4a0ef16SKarl Rupp Defines the basic matrix operations for the AIJ (compressed row) 5e4a0ef16SKarl Rupp matrix storage format. 6e4a0ef16SKarl Rupp */ 7e4a0ef16SKarl Rupp 8e4a0ef16SKarl Rupp #include "petscconf.h" 9e4a0ef16SKarl Rupp #include "../src/mat/impls/aij/seq/aij.h" /*I "petscmat.h" I*/ 10e4a0ef16SKarl Rupp #include "petscbt.h" 11e4a0ef16SKarl Rupp #include "../src/vec/vec/impls/dvecimpl.h" 12e4a0ef16SKarl Rupp #include "petsc-private/vecimpl.h" 13e4a0ef16SKarl Rupp 14e4a0ef16SKarl Rupp #include "../src/mat/impls/aij/seq/seqviennacl/viennaclmatimpl.h" 15e4a0ef16SKarl Rupp 16e4a0ef16SKarl Rupp 17e4a0ef16SKarl Rupp #include <algorithm> 18e4a0ef16SKarl Rupp #include <vector> 19e4a0ef16SKarl Rupp #include <string> 20e4a0ef16SKarl Rupp 21e4a0ef16SKarl Rupp #include "viennacl/linalg/prod.hpp" 22e4a0ef16SKarl Rupp 23e4a0ef16SKarl Rupp #undef __FUNCT__ 24e4a0ef16SKarl Rupp #define __FUNCT__ "MatViennaCLCopyToGPU" 25e4a0ef16SKarl Rupp PetscErrorCode MatViennaCLCopyToGPU(Mat A) 26e4a0ef16SKarl Rupp { 27e4a0ef16SKarl Rupp 28e4a0ef16SKarl Rupp Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr; 29e4a0ef16SKarl Rupp Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 30e4a0ef16SKarl Rupp PetscInt *ii; 31e4a0ef16SKarl Rupp PetscErrorCode ierr; 32e4a0ef16SKarl Rupp 33e4a0ef16SKarl Rupp 34e4a0ef16SKarl Rupp PetscFunctionBegin; 3567c87b7fSKarl Rupp if (A->rmap->n > 0 && A->cmap->n > 0) { //some OpenCL SDKs have issues with buffers of size 0 36e4a0ef16SKarl Rupp if (A->valid_GPU_matrix == PETSC_VIENNACL_UNALLOCATED || A->valid_GPU_matrix == PETSC_VIENNACL_CPU) { 37e4a0ef16SKarl Rupp ierr = PetscLogEventBegin(MAT_ViennaCLCopyToGPU,A,0,0,0);CHKERRQ(ierr); 38e4a0ef16SKarl Rupp 39e4a0ef16SKarl Rupp try { 401fc5b511SKarl Rupp ierr = PetscObjectSetFromOptions_ViennaCL((PetscObject)A);CHKERRQ(ierr); /* Allows to set device type before allocating any objects */ 41e4a0ef16SKarl Rupp viennaclstruct->mat = new ViennaCLAIJMatrix(); 42e4a0ef16SKarl Rupp if (a->compressedrow.use) { 43e4a0ef16SKarl Rupp ii = a->compressedrow.i; 44e4a0ef16SKarl Rupp 45e4a0ef16SKarl Rupp viennaclstruct->mat->set(ii, a->j, a->a, A->rmap->n, A->cmap->n, a->nz); 46e4a0ef16SKarl Rupp 47e4a0ef16SKarl Rupp // TODO: Either convert to full CSR (inefficient), or hold row indices in temporary vector (requires additional bookkeeping for matrix-vector multiplications) 48023073b3SKarl Rupp // Cannot be reasonably supported in ViennaCL 1.4.x (custom kernels required), hence postponing until there is support for compressed CSR 49e4a0ef16SKarl Rupp 50e4a0ef16SKarl Rupp SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: Compressed CSR (only nonzero rows) not yet supported"); 51e4a0ef16SKarl Rupp } else { 52e4a0ef16SKarl Rupp 53e4a0ef16SKarl Rupp // Since PetscInt is in general different from cl_uint, we have to convert: 54e4a0ef16SKarl Rupp viennacl::backend::mem_handle dummy; 55e4a0ef16SKarl Rupp 56e4a0ef16SKarl Rupp viennacl::backend::typesafe_host_array<unsigned int> row_buffer; row_buffer.raw_resize(dummy, A->rmap->n+1); 57e4a0ef16SKarl Rupp for (PetscInt i=0; i<=A->rmap->n; ++i) 58e4a0ef16SKarl Rupp row_buffer.set(i, (a->i)[i]); 59e4a0ef16SKarl Rupp 60e4a0ef16SKarl Rupp viennacl::backend::typesafe_host_array<unsigned int> col_buffer; col_buffer.raw_resize(dummy, a->nz); 61e4a0ef16SKarl Rupp for (PetscInt i=0; i<a->nz; ++i) 62e4a0ef16SKarl Rupp col_buffer.set(i, (a->j)[i]); 63e4a0ef16SKarl Rupp 64e4a0ef16SKarl Rupp viennaclstruct->mat->set(row_buffer.get(), col_buffer.get(), a->a, A->rmap->n, A->cmap->n, a->nz); 65e4a0ef16SKarl Rupp } 664cf1874eSKarl Rupp ViennaCLWaitForGPU(); 674076e183SKarl Rupp } catch(std::exception const & ex) { 684076e183SKarl Rupp SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what()); 69e4a0ef16SKarl Rupp } 70e4a0ef16SKarl Rupp 71e4a0ef16SKarl Rupp A->valid_GPU_matrix = PETSC_VIENNACL_BOTH; 72e4a0ef16SKarl Rupp 73e4a0ef16SKarl Rupp ierr = PetscLogEventEnd(MAT_ViennaCLCopyToGPU,A,0,0,0);CHKERRQ(ierr); 74e4a0ef16SKarl Rupp } 7567c87b7fSKarl Rupp } 76e4a0ef16SKarl Rupp PetscFunctionReturn(0); 77e4a0ef16SKarl Rupp } 78e4a0ef16SKarl Rupp 79e4a0ef16SKarl Rupp #undef __FUNCT__ 80e4a0ef16SKarl Rupp #define __FUNCT__ "MatViennaCLCopyFromGPU" 810d73d530SKarl Rupp PetscErrorCode MatViennaCLCopyFromGPU(Mat A, const ViennaCLAIJMatrix *Agpu) 82e4a0ef16SKarl Rupp { 83e4a0ef16SKarl Rupp Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 84e4a0ef16SKarl Rupp PetscInt m = A->rmap->n; 85e4a0ef16SKarl Rupp PetscErrorCode ierr; 86e4a0ef16SKarl Rupp 87e4a0ef16SKarl Rupp 88e4a0ef16SKarl Rupp PetscFunctionBegin; 89e4a0ef16SKarl Rupp if (A->valid_GPU_matrix == PETSC_VIENNACL_UNALLOCATED) { 90e4a0ef16SKarl Rupp try { 91e4a0ef16SKarl Rupp if (a->compressedrow.use) { 92e4a0ef16SKarl Rupp SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_ARG_WRONG, "ViennaCL: Cannot handle row compression for GPU matrices"); 93e4a0ef16SKarl Rupp } else { 94e4a0ef16SKarl Rupp 95e4a0ef16SKarl Rupp if ((PetscInt)Agpu->size1() != m) SETERRQ2(PETSC_COMM_WORLD, PETSC_ERR_ARG_SIZ, "GPU matrix has %d rows, should be %d", Agpu->size1(), m); 96e4a0ef16SKarl Rupp a->nz = Agpu->nnz(); 97e4a0ef16SKarl Rupp a->maxnz = a->nz; /* Since we allocate exactly the right amount */ 98e4a0ef16SKarl Rupp A->preallocated = PETSC_TRUE; 99e4a0ef16SKarl Rupp if (a->singlemalloc) { 100e4a0ef16SKarl Rupp if (a->a) {ierr = PetscFree3(a->a,a->j,a->i);CHKERRQ(ierr);} 101e4a0ef16SKarl Rupp } else { 102e4a0ef16SKarl Rupp if (a->i) {ierr = PetscFree(a->i);CHKERRQ(ierr);} 103e4a0ef16SKarl Rupp if (a->j) {ierr = PetscFree(a->j);CHKERRQ(ierr);} 104e4a0ef16SKarl Rupp if (a->a) {ierr = PetscFree(a->a);CHKERRQ(ierr);} 105e4a0ef16SKarl Rupp } 106*dcca6d9dSJed Brown ierr = PetscMalloc3(a->nz,&a->a,a->nz,&a->j,m+1,&a->i);CHKERRQ(ierr); 107f7daeb2aSKarl Rupp ierr = PetscLogObjectMemory((PetscObject)A, a->nz*(sizeof(PetscScalar)+sizeof(PetscInt))+(m+1)*sizeof(PetscInt));CHKERRQ(ierr); 108e4a0ef16SKarl Rupp 109e4a0ef16SKarl Rupp a->singlemalloc = PETSC_TRUE; 110e4a0ef16SKarl Rupp 111e4a0ef16SKarl Rupp /* Setup row lengths */ 112e4a0ef16SKarl Rupp if (a->imax) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 113*dcca6d9dSJed Brown ierr = PetscMalloc2(m,&a->imax,m,&a->ilen);CHKERRQ(ierr); 114f7daeb2aSKarl Rupp ierr = PetscLogObjectMemory((PetscObject)A, 2*m*sizeof(PetscInt));CHKERRQ(ierr); 115e4a0ef16SKarl Rupp 116e4a0ef16SKarl Rupp /* Copy data back from GPU */ 117e4a0ef16SKarl Rupp viennacl::backend::typesafe_host_array<unsigned int> row_buffer; row_buffer.raw_resize(Agpu->handle1(), Agpu->size1() + 1); 118e4a0ef16SKarl Rupp 119e4a0ef16SKarl Rupp // copy row array 120e4a0ef16SKarl Rupp viennacl::backend::memory_read(Agpu->handle1(), 0, row_buffer.raw_size(), row_buffer.get()); 121e4a0ef16SKarl Rupp (a->i)[0] = row_buffer[0]; 122e4a0ef16SKarl Rupp for (PetscInt i = 0; i < (PetscInt)Agpu->size1(); ++i) { 123e4a0ef16SKarl Rupp (a->i)[i+1] = row_buffer[i+1]; 124e4a0ef16SKarl Rupp a->imax[i] = a->ilen[i] = a->i[i+1] - a->i[i]; //Set imax[] and ilen[] arrays at the same time as i[] for better cache reuse 125e4a0ef16SKarl Rupp } 126e4a0ef16SKarl Rupp 127e4a0ef16SKarl Rupp // copy column indices 128e4a0ef16SKarl Rupp viennacl::backend::typesafe_host_array<unsigned int> col_buffer; col_buffer.raw_resize(Agpu->handle2(), Agpu->nnz()); 129e4a0ef16SKarl Rupp viennacl::backend::memory_read(Agpu->handle2(), 0, col_buffer.raw_size(), col_buffer.get()); 130e4a0ef16SKarl Rupp for (PetscInt i=0; i < (PetscInt)Agpu->nnz(); ++i) 131e4a0ef16SKarl Rupp (a->j)[i] = col_buffer[i]; 132e4a0ef16SKarl Rupp 133e4a0ef16SKarl Rupp // copy nonzero entries directly to destination (no conversion required) 134e4a0ef16SKarl Rupp viennacl::backend::memory_read(Agpu->handle(), 0, sizeof(PetscScalar)*Agpu->nnz(), a->a); 135e4a0ef16SKarl Rupp 1364cf1874eSKarl Rupp ViennaCLWaitForGPU(); 137023073b3SKarl Rupp /* TODO: Once a->diag is moved out of MatAssemblyEnd(), invalidate it here. */ 138e4a0ef16SKarl Rupp } 1394076e183SKarl Rupp } catch(std::exception const & ex) { 1404076e183SKarl Rupp SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what()); 141e4a0ef16SKarl Rupp } 142e4a0ef16SKarl Rupp 143e4a0ef16SKarl Rupp /* This assembly prevents resetting the flag to PETSC_VIENNACL_CPU and recopying */ 144e4a0ef16SKarl Rupp ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 145e4a0ef16SKarl Rupp ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 146e4a0ef16SKarl Rupp 147e4a0ef16SKarl Rupp A->valid_GPU_matrix = PETSC_VIENNACL_BOTH; 148e4a0ef16SKarl Rupp } else { 149e4a0ef16SKarl Rupp SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_ARG_WRONG, "ViennaCL error: Only valid for unallocated GPU matrices"); 150e4a0ef16SKarl Rupp } 151e4a0ef16SKarl Rupp PetscFunctionReturn(0); 152e4a0ef16SKarl Rupp } 153e4a0ef16SKarl Rupp 154e4a0ef16SKarl Rupp #undef __FUNCT__ 155e4a0ef16SKarl Rupp #define __FUNCT__ "MatGetVecs_SeqAIJViennaCL" 156e4a0ef16SKarl Rupp PetscErrorCode MatGetVecs_SeqAIJViennaCL(Mat mat, Vec *right, Vec *left) 157e4a0ef16SKarl Rupp { 158e4a0ef16SKarl Rupp PetscErrorCode ierr; 159e4a0ef16SKarl Rupp 160e4a0ef16SKarl Rupp PetscFunctionBegin; 161e4a0ef16SKarl Rupp if (right) { 162e4a0ef16SKarl Rupp ierr = VecCreate(PetscObjectComm((PetscObject)mat),right);CHKERRQ(ierr); 163e4a0ef16SKarl Rupp ierr = VecSetSizes(*right,mat->cmap->n,PETSC_DETERMINE);CHKERRQ(ierr); 164e4a0ef16SKarl Rupp ierr = VecSetBlockSize(*right,mat->rmap->bs);CHKERRQ(ierr); 165e4a0ef16SKarl Rupp ierr = VecSetType(*right,VECSEQVIENNACL);CHKERRQ(ierr); 166e4a0ef16SKarl Rupp ierr = PetscLayoutReference(mat->cmap,&(*right)->map);CHKERRQ(ierr); 167e4a0ef16SKarl Rupp } 168e4a0ef16SKarl Rupp if (left) { 169e4a0ef16SKarl Rupp ierr = VecCreate(PetscObjectComm((PetscObject)mat),left);CHKERRQ(ierr); 170e4a0ef16SKarl Rupp ierr = VecSetSizes(*left,mat->rmap->n,PETSC_DETERMINE);CHKERRQ(ierr); 171e4a0ef16SKarl Rupp ierr = VecSetBlockSize(*left,mat->rmap->bs);CHKERRQ(ierr); 172e4a0ef16SKarl Rupp ierr = VecSetType(*left,VECSEQVIENNACL);CHKERRQ(ierr); 173e4a0ef16SKarl Rupp ierr = PetscLayoutReference(mat->rmap,&(*left)->map);CHKERRQ(ierr); 174e4a0ef16SKarl Rupp } 175e4a0ef16SKarl Rupp PetscFunctionReturn(0); 176e4a0ef16SKarl Rupp } 177e4a0ef16SKarl Rupp 178e4a0ef16SKarl Rupp #undef __FUNCT__ 179e4a0ef16SKarl Rupp #define __FUNCT__ "MatMult_SeqAIJViennaCL" 180e4a0ef16SKarl Rupp PetscErrorCode MatMult_SeqAIJViennaCL(Mat A,Vec xx,Vec yy) 181e4a0ef16SKarl Rupp { 182e4a0ef16SKarl Rupp Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 183e4a0ef16SKarl Rupp PetscErrorCode ierr; 184e4a0ef16SKarl Rupp Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr; 1850d73d530SKarl Rupp const ViennaCLVector *xgpu=NULL; 1860d73d530SKarl Rupp ViennaCLVector *ygpu=NULL; 187e4a0ef16SKarl Rupp 188e4a0ef16SKarl Rupp PetscFunctionBegin; 18967c87b7fSKarl Rupp if (A->rmap->n > 0 && A->cmap->n > 0) { 190e4a0ef16SKarl Rupp ierr = VecViennaCLGetArrayRead(xx,&xgpu);CHKERRQ(ierr); 191e4a0ef16SKarl Rupp ierr = VecViennaCLGetArrayWrite(yy,&ygpu);CHKERRQ(ierr); 192e4a0ef16SKarl Rupp try { 193e4a0ef16SKarl Rupp *ygpu = viennacl::linalg::prod(*viennaclstruct->mat,*xgpu); 1944cf1874eSKarl Rupp ViennaCLWaitForGPU(); 1954076e183SKarl Rupp } catch (std::exception const & ex) { 1964076e183SKarl Rupp SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what()); 197e4a0ef16SKarl Rupp } 198e4a0ef16SKarl Rupp ierr = VecViennaCLRestoreArrayRead(xx,&xgpu);CHKERRQ(ierr); 199e4a0ef16SKarl Rupp ierr = VecViennaCLRestoreArrayWrite(yy,&ygpu);CHKERRQ(ierr); 200e4a0ef16SKarl Rupp ierr = PetscLogFlops(2.0*a->nz - viennaclstruct->mat->nnz());CHKERRQ(ierr); 20167c87b7fSKarl Rupp } 202e4a0ef16SKarl Rupp PetscFunctionReturn(0); 203e4a0ef16SKarl Rupp } 204e4a0ef16SKarl Rupp 205e4a0ef16SKarl Rupp 206e4a0ef16SKarl Rupp 207e4a0ef16SKarl Rupp #undef __FUNCT__ 208e4a0ef16SKarl Rupp #define __FUNCT__ "MatMultAdd_SeqAIJViennaCL" 209e4a0ef16SKarl Rupp PetscErrorCode MatMultAdd_SeqAIJViennaCL(Mat A,Vec xx,Vec yy,Vec zz) 210e4a0ef16SKarl Rupp { 211e4a0ef16SKarl Rupp Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 212e4a0ef16SKarl Rupp PetscErrorCode ierr; 213e4a0ef16SKarl Rupp Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr; 2140d73d530SKarl Rupp const ViennaCLVector *xgpu=NULL,*ygpu=NULL; 2150d73d530SKarl Rupp ViennaCLVector *zgpu=NULL; 216e4a0ef16SKarl Rupp 217e4a0ef16SKarl Rupp PetscFunctionBegin; 21867c87b7fSKarl Rupp if (A->rmap->n > 0 && A->cmap->n > 0) { 219e4a0ef16SKarl Rupp try { 220e4a0ef16SKarl Rupp ierr = VecViennaCLGetArrayRead(xx,&xgpu);CHKERRQ(ierr); 221e4a0ef16SKarl Rupp ierr = VecViennaCLGetArrayRead(yy,&ygpu);CHKERRQ(ierr); 222e4a0ef16SKarl Rupp ierr = VecViennaCLGetArrayWrite(zz,&zgpu);CHKERRQ(ierr); 223e4a0ef16SKarl Rupp 224e4a0ef16SKarl Rupp if (a->compressedrow.use) { 225e4a0ef16SKarl Rupp SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: Compressed CSR (only nonzero rows) not yet supported"); 226e4a0ef16SKarl Rupp } else { 227e4a0ef16SKarl Rupp if (zz == xx || zz == yy) { //temporary required 228e4a0ef16SKarl Rupp ViennaCLVector temp = viennacl::linalg::prod(*viennaclstruct->mat, *xgpu); 229e4a0ef16SKarl Rupp *zgpu = *ygpu + temp; 2304cf1874eSKarl Rupp ViennaCLWaitForGPU(); 231e4a0ef16SKarl Rupp } else { 232e4a0ef16SKarl Rupp *zgpu = viennacl::linalg::prod(*viennaclstruct->mat, *xgpu); 233e4a0ef16SKarl Rupp *zgpu += *ygpu; 2344cf1874eSKarl Rupp ViennaCLWaitForGPU(); 235e4a0ef16SKarl Rupp } 236e4a0ef16SKarl Rupp } 237e4a0ef16SKarl Rupp 238e4a0ef16SKarl Rupp ierr = VecViennaCLRestoreArrayRead(xx,&xgpu);CHKERRQ(ierr); 239e4a0ef16SKarl Rupp ierr = VecViennaCLRestoreArrayRead(yy,&ygpu);CHKERRQ(ierr); 240e4a0ef16SKarl Rupp ierr = VecViennaCLRestoreArrayWrite(zz,&zgpu);CHKERRQ(ierr); 241e4a0ef16SKarl Rupp 2424076e183SKarl Rupp } catch(std::exception const & ex) { 2434076e183SKarl Rupp SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what()); 244e4a0ef16SKarl Rupp } 245e4a0ef16SKarl Rupp ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 24667c87b7fSKarl Rupp } 247e4a0ef16SKarl Rupp PetscFunctionReturn(0); 248e4a0ef16SKarl Rupp } 249e4a0ef16SKarl Rupp 250e4a0ef16SKarl Rupp #undef __FUNCT__ 251e4a0ef16SKarl Rupp #define __FUNCT__ "MatAssemblyEnd_SeqAIJViennaCL" 252e4a0ef16SKarl Rupp PetscErrorCode MatAssemblyEnd_SeqAIJViennaCL(Mat A,MatAssemblyType mode) 253e4a0ef16SKarl Rupp { 254e4a0ef16SKarl Rupp PetscErrorCode ierr; 255e4a0ef16SKarl Rupp 256e4a0ef16SKarl Rupp PetscFunctionBegin; 257e4a0ef16SKarl Rupp ierr = MatAssemblyEnd_SeqAIJ(A,mode);CHKERRQ(ierr); 258e4a0ef16SKarl Rupp ierr = MatViennaCLCopyToGPU(A);CHKERRQ(ierr); 259e4a0ef16SKarl Rupp if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 260e4a0ef16SKarl Rupp A->ops->mult = MatMult_SeqAIJViennaCL; 261e4a0ef16SKarl Rupp A->ops->multadd = MatMultAdd_SeqAIJViennaCL; 262e4a0ef16SKarl Rupp PetscFunctionReturn(0); 263e4a0ef16SKarl Rupp } 264e4a0ef16SKarl Rupp 265e4a0ef16SKarl Rupp /* --------------------------------------------------------------------------------*/ 266e4a0ef16SKarl Rupp #undef __FUNCT__ 267e4a0ef16SKarl Rupp #define __FUNCT__ "MatCreateSeqAIJViennaCL" 268e4a0ef16SKarl Rupp /*@ 269e4a0ef16SKarl Rupp MatCreateSeqAIJViennaCL - Creates a sparse matrix in AIJ (compressed row) format 27019fddfadSKarl Rupp (the default parallel PETSc format). This matrix will ultimately be pushed down 271e4a0ef16SKarl Rupp to GPUs and use the ViennaCL library for calculations. For good matrix 272e4a0ef16SKarl Rupp assembly performance the user should preallocate the matrix storage by setting 273e4a0ef16SKarl Rupp the parameter nz (or the array nnz). By setting these parameters accurately, 274e4a0ef16SKarl Rupp performance during matrix assembly can be increased substantially. 275e4a0ef16SKarl Rupp 276e4a0ef16SKarl Rupp 277e4a0ef16SKarl Rupp Collective on MPI_Comm 278e4a0ef16SKarl Rupp 279e4a0ef16SKarl Rupp Input Parameters: 280e4a0ef16SKarl Rupp + comm - MPI communicator, set to PETSC_COMM_SELF 281e4a0ef16SKarl Rupp . m - number of rows 282e4a0ef16SKarl Rupp . n - number of columns 283e4a0ef16SKarl Rupp . nz - number of nonzeros per row (same for all rows) 284e4a0ef16SKarl Rupp - nnz - array containing the number of nonzeros in the various rows 285e4a0ef16SKarl Rupp (possibly different for each row) or NULL 286e4a0ef16SKarl Rupp 287e4a0ef16SKarl Rupp Output Parameter: 288e4a0ef16SKarl Rupp . A - the matrix 289e4a0ef16SKarl Rupp 290e4a0ef16SKarl Rupp It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 291e4a0ef16SKarl Rupp MatXXXXSetPreallocation() paradigm instead of this routine directly. 292e4a0ef16SKarl Rupp [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 293e4a0ef16SKarl Rupp 294e4a0ef16SKarl Rupp Notes: 295e4a0ef16SKarl Rupp If nnz is given then nz is ignored 296e4a0ef16SKarl Rupp 297e4a0ef16SKarl Rupp The AIJ format (also called the Yale sparse matrix format or 298e4a0ef16SKarl Rupp compressed row storage), is fully compatible with standard Fortran 77 299e4a0ef16SKarl Rupp storage. That is, the stored row and column indices can begin at 300e4a0ef16SKarl Rupp either one (as in Fortran) or zero. See the users' manual for details. 301e4a0ef16SKarl Rupp 302e4a0ef16SKarl Rupp Specify the preallocated storage with either nz or nnz (not both). 303e4a0ef16SKarl Rupp Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 304e4a0ef16SKarl Rupp allocation. For large problems you MUST preallocate memory or you 305e4a0ef16SKarl Rupp will get TERRIBLE performance, see the users' manual chapter on matrices. 306e4a0ef16SKarl Rupp 307e4a0ef16SKarl Rupp Level: intermediate 308e4a0ef16SKarl Rupp 309e4a0ef16SKarl Rupp .seealso: MatCreate(), MatCreateAIJ(), MatCreateAIJCUSP(), MatSetValues(), MatSeqAIJSetColumnIndices(), MatCreateSeqAIJWithArrays(), MatCreateAIJ() 310e4a0ef16SKarl Rupp 311e4a0ef16SKarl Rupp @*/ 312e4a0ef16SKarl Rupp PetscErrorCode MatCreateSeqAIJViennaCL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 313e4a0ef16SKarl Rupp { 314e4a0ef16SKarl Rupp PetscErrorCode ierr; 315e4a0ef16SKarl Rupp 316e4a0ef16SKarl Rupp PetscFunctionBegin; 317e4a0ef16SKarl Rupp ierr = MatCreate(comm,A);CHKERRQ(ierr); 318e4a0ef16SKarl Rupp ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 319e4a0ef16SKarl Rupp ierr = MatSetType(*A,MATSEQAIJVIENNACL);CHKERRQ(ierr); 320e4a0ef16SKarl Rupp ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,(PetscInt*)nnz);CHKERRQ(ierr); 321e4a0ef16SKarl Rupp PetscFunctionReturn(0); 322e4a0ef16SKarl Rupp } 323e4a0ef16SKarl Rupp 324e4a0ef16SKarl Rupp 325e4a0ef16SKarl Rupp #undef __FUNCT__ 326e4a0ef16SKarl Rupp #define __FUNCT__ "MatDestroy_SeqAIJViennaCL" 327e4a0ef16SKarl Rupp PetscErrorCode MatDestroy_SeqAIJViennaCL(Mat A) 328e4a0ef16SKarl Rupp { 329e4a0ef16SKarl Rupp PetscErrorCode ierr; 330e4a0ef16SKarl Rupp Mat_SeqAIJViennaCL *viennaclcontainer = (Mat_SeqAIJViennaCL*)A->spptr; 331e4a0ef16SKarl Rupp 332e4a0ef16SKarl Rupp PetscFunctionBegin; 333e4a0ef16SKarl Rupp try { 334e4a0ef16SKarl Rupp if (A->valid_GPU_matrix != PETSC_VIENNACL_UNALLOCATED) delete (ViennaCLAIJMatrix*)(viennaclcontainer->mat); 335e4a0ef16SKarl Rupp delete viennaclcontainer; 336e4a0ef16SKarl Rupp A->valid_GPU_matrix = PETSC_VIENNACL_UNALLOCATED; 3374076e183SKarl Rupp } catch(std::exception const & ex) { 3384076e183SKarl Rupp SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what()); 339e4a0ef16SKarl Rupp } 340e4a0ef16SKarl Rupp /* this next line is because MatDestroy tries to PetscFree spptr if it is not zero, and PetscFree only works if the memory was allocated with PetscNew or PetscMalloc, which don't call the constructor */ 341e4a0ef16SKarl Rupp A->spptr = 0; 342e4a0ef16SKarl Rupp ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 343e4a0ef16SKarl Rupp PetscFunctionReturn(0); 344e4a0ef16SKarl Rupp } 345e4a0ef16SKarl Rupp 346e4a0ef16SKarl Rupp 347e4a0ef16SKarl Rupp #undef __FUNCT__ 348e4a0ef16SKarl Rupp #define __FUNCT__ "MatCreate_SeqAIJViennaCL" 349e4a0ef16SKarl Rupp PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJViennaCL(Mat B) 350e4a0ef16SKarl Rupp { 351e4a0ef16SKarl Rupp PetscErrorCode ierr; 352e4a0ef16SKarl Rupp Mat_SeqAIJ *aij; 353e4a0ef16SKarl Rupp 354e4a0ef16SKarl Rupp PetscFunctionBegin; 355e4a0ef16SKarl Rupp ierr = MatCreate_SeqAIJ(B);CHKERRQ(ierr); 356e4a0ef16SKarl Rupp aij = (Mat_SeqAIJ*)B->data; 357e4a0ef16SKarl Rupp aij->inode.use = PETSC_FALSE; 358e4a0ef16SKarl Rupp B->ops->mult = MatMult_SeqAIJViennaCL; 359e4a0ef16SKarl Rupp B->ops->multadd = MatMultAdd_SeqAIJViennaCL; 360e4a0ef16SKarl Rupp B->spptr = new Mat_SeqAIJViennaCL(); 361e4a0ef16SKarl Rupp 362e4a0ef16SKarl Rupp ((Mat_SeqAIJViennaCL*)B->spptr)->mat = 0; 363e4a0ef16SKarl Rupp 364e4a0ef16SKarl Rupp B->ops->assemblyend = MatAssemblyEnd_SeqAIJViennaCL; 365e4a0ef16SKarl Rupp B->ops->destroy = MatDestroy_SeqAIJViennaCL; 366e4a0ef16SKarl Rupp B->ops->getvecs = MatGetVecs_SeqAIJViennaCL; 367e4a0ef16SKarl Rupp 368e4a0ef16SKarl Rupp ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJVIENNACL);CHKERRQ(ierr); 369e4a0ef16SKarl Rupp 370e4a0ef16SKarl Rupp B->valid_GPU_matrix = PETSC_VIENNACL_UNALLOCATED; 371e4a0ef16SKarl Rupp PetscFunctionReturn(0); 372e4a0ef16SKarl Rupp } 373e4a0ef16SKarl Rupp 374e4a0ef16SKarl Rupp 375e4a0ef16SKarl Rupp /*M 376e4a0ef16SKarl Rupp MATSEQAIJVIENNACL - MATAIJVIENNACL = "aijviennacl" = "seqaijviennacl" - A matrix type to be used for sparse matrices. 377e4a0ef16SKarl Rupp 378e4a0ef16SKarl Rupp A matrix type type whose data resides on GPUs. These matrices are in CSR format by 379e4a0ef16SKarl Rupp default. All matrix calculations are performed using the ViennaCL library. 380e4a0ef16SKarl Rupp 381e4a0ef16SKarl Rupp Options Database Keys: 382e4a0ef16SKarl Rupp + -mat_type aijviennacl - sets the matrix type to "seqaijviennacl" during a call to MatSetFromOptions() 383e4a0ef16SKarl Rupp . -mat_viennacl_storage_format csr - sets the storage format of matrices for MatMult during a call to MatSetFromOptions(). 384e4a0ef16SKarl Rupp - -mat_viennacl_mult_storage_format csr - sets the storage format of matrices for MatMult during a call to MatSetFromOptions(). 385e4a0ef16SKarl Rupp 386e4a0ef16SKarl Rupp Level: beginner 387e4a0ef16SKarl Rupp 388e4a0ef16SKarl Rupp .seealso: MatCreateSeqAIJViennaCL(), MATAIJVIENNACL, MatCreateAIJViennaCL() 389e4a0ef16SKarl Rupp M*/ 390e4a0ef16SKarl Rupp 391