1e4a0ef16SKarl Rupp 2e4a0ef16SKarl Rupp /* 3e4a0ef16SKarl Rupp Defines the basic matrix operations for the AIJ (compressed row) 4e4a0ef16SKarl Rupp matrix storage format. 5e4a0ef16SKarl Rupp */ 6e4a0ef16SKarl Rupp 7aaa7dc30SBarry Smith #include <petscconf.h> 899acd6aaSStefano Zampini #define PETSC_SKIP_IMMINTRIN_H_CUDAWORKAROUND 1 9aaa7dc30SBarry Smith #include <../src/mat/impls/aij/seq/aij.h> /*I "petscmat.h" I*/ 10aaa7dc30SBarry Smith #include <petscbt.h> 11aaa7dc30SBarry Smith #include <../src/vec/vec/impls/dvecimpl.h> 12af0996ceSBarry Smith #include <petsc/private/vecimpl.h> 13e4a0ef16SKarl Rupp 14aaa7dc30SBarry Smith #include <../src/mat/impls/aij/seq/seqviennacl/viennaclmatimpl.h> 15e4a0ef16SKarl Rupp 16e4a0ef16SKarl Rupp #include <algorithm> 17e4a0ef16SKarl Rupp #include <vector> 18e4a0ef16SKarl Rupp #include <string> 19e4a0ef16SKarl Rupp 20e4a0ef16SKarl Rupp #include "viennacl/linalg/prod.hpp" 21e4a0ef16SKarl Rupp 228713a8baSPatrick Sanan PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJViennaCL(Mat A, MatType type, MatReuse reuse, Mat *newmat); 2372367587SKarl Rupp PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat, MatFactorType, Mat *); 244222ddf1SHong Zhang PETSC_INTERN PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqDense(Mat); 258713a8baSPatrick Sanan 26*9371c9d4SSatish Balay PetscErrorCode MatViennaCLCopyToGPU(Mat A) { 27e4a0ef16SKarl Rupp Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL *)A->spptr; 28e4a0ef16SKarl Rupp Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 29e4a0ef16SKarl Rupp 30e4a0ef16SKarl Rupp PetscFunctionBegin; 31bf1781e8SStefano Zampini if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) { //some OpenCL SDKs have issues with buffers of size 0 32c70f7ee4SJunchao Zhang if (A->offloadmask == PETSC_OFFLOAD_UNALLOCATED || A->offloadmask == PETSC_OFFLOAD_CPU) { 339566063dSJacob Faibussowitsch PetscCall(PetscLogEventBegin(MAT_ViennaCLCopyToGPU, A, 0, 0, 0)); 34e4a0ef16SKarl Rupp 35e4a0ef16SKarl Rupp try { 36e4a0ef16SKarl Rupp if (a->compressedrow.use) { 37a3430c56SKarl Rupp if (!viennaclstruct->compressed_mat) viennaclstruct->compressed_mat = new ViennaCLCompressedAIJMatrix(); 38e4a0ef16SKarl Rupp 39a3430c56SKarl Rupp // Since PetscInt is different from cl_uint, we have to convert: 40a3430c56SKarl Rupp viennacl::backend::mem_handle dummy; 41e4a0ef16SKarl Rupp 42*9371c9d4SSatish Balay viennacl::backend::typesafe_host_array<unsigned int> row_buffer; 43*9371c9d4SSatish Balay row_buffer.raw_resize(dummy, a->compressedrow.nrows + 1); 44*9371c9d4SSatish Balay for (PetscInt i = 0; i <= a->compressedrow.nrows; ++i) row_buffer.set(i, (a->compressedrow.i)[i]); 45e4a0ef16SKarl Rupp 46*9371c9d4SSatish Balay viennacl::backend::typesafe_host_array<unsigned int> row_indices; 47*9371c9d4SSatish Balay row_indices.raw_resize(dummy, a->compressedrow.nrows); 48*9371c9d4SSatish Balay for (PetscInt i = 0; i < a->compressedrow.nrows; ++i) row_indices.set(i, (a->compressedrow.rindex)[i]); 49a3430c56SKarl Rupp 50*9371c9d4SSatish Balay viennacl::backend::typesafe_host_array<unsigned int> col_buffer; 51*9371c9d4SSatish Balay col_buffer.raw_resize(dummy, a->nz); 52*9371c9d4SSatish Balay for (PetscInt i = 0; i < a->nz; ++i) col_buffer.set(i, (a->j)[i]); 53a3430c56SKarl Rupp 54a3430c56SKarl Rupp viennaclstruct->compressed_mat->set(row_buffer.get(), row_indices.get(), col_buffer.get(), a->a, A->rmap->n, A->cmap->n, a->compressedrow.nrows, a->nz); 559566063dSJacob Faibussowitsch PetscCall(PetscLogCpuToGpu(((2 * a->compressedrow.nrows) + 1 + a->nz) * sizeof(PetscInt) + (a->nz) * sizeof(PetscScalar))); 56e4a0ef16SKarl Rupp } else { 57a3430c56SKarl Rupp if (!viennaclstruct->mat) viennaclstruct->mat = new ViennaCLAIJMatrix(); 58e4a0ef16SKarl Rupp 59e4a0ef16SKarl Rupp // Since PetscInt is in general different from cl_uint, we have to convert: 60e4a0ef16SKarl Rupp viennacl::backend::mem_handle dummy; 61e4a0ef16SKarl Rupp 62*9371c9d4SSatish Balay viennacl::backend::typesafe_host_array<unsigned int> row_buffer; 63*9371c9d4SSatish Balay row_buffer.raw_resize(dummy, A->rmap->n + 1); 64*9371c9d4SSatish Balay for (PetscInt i = 0; i <= A->rmap->n; ++i) row_buffer.set(i, (a->i)[i]); 65e4a0ef16SKarl Rupp 66*9371c9d4SSatish Balay viennacl::backend::typesafe_host_array<unsigned int> col_buffer; 67*9371c9d4SSatish Balay col_buffer.raw_resize(dummy, a->nz); 68*9371c9d4SSatish Balay for (PetscInt i = 0; i < a->nz; ++i) col_buffer.set(i, (a->j)[i]); 69e4a0ef16SKarl Rupp 70e4a0ef16SKarl Rupp viennaclstruct->mat->set(row_buffer.get(), col_buffer.get(), a->a, A->rmap->n, A->cmap->n, a->nz); 719566063dSJacob Faibussowitsch PetscCall(PetscLogCpuToGpu(((A->rmap->n + 1) + a->nz) * sizeof(PetscInt) + (a->nz) * sizeof(PetscScalar))); 72e4a0ef16SKarl Rupp } 734cf1874eSKarl Rupp ViennaCLWaitForGPU(); 74*9371c9d4SSatish Balay } catch (std::exception const &ex) { SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what()); } 75e4a0ef16SKarl Rupp 76a3430c56SKarl Rupp // Create temporary vector for v += A*x: 77a3430c56SKarl Rupp if (viennaclstruct->tempvec) { 789b66742cSDave May if (viennaclstruct->tempvec->size() != static_cast<std::size_t>(A->rmap->n)) { 79a3430c56SKarl Rupp delete (ViennaCLVector *)viennaclstruct->tempvec; 809b66742cSDave May viennaclstruct->tempvec = new ViennaCLVector(A->rmap->n); 81a3430c56SKarl Rupp } else { 82a3430c56SKarl Rupp viennaclstruct->tempvec->clear(); 83a3430c56SKarl Rupp } 84a3430c56SKarl Rupp } else { 859b66742cSDave May viennaclstruct->tempvec = new ViennaCLVector(A->rmap->n); 86a3430c56SKarl Rupp } 87a3430c56SKarl Rupp 88c70f7ee4SJunchao Zhang A->offloadmask = PETSC_OFFLOAD_BOTH; 89e4a0ef16SKarl Rupp 909566063dSJacob Faibussowitsch PetscCall(PetscLogEventEnd(MAT_ViennaCLCopyToGPU, A, 0, 0, 0)); 91e4a0ef16SKarl Rupp } 9267c87b7fSKarl Rupp } 93e4a0ef16SKarl Rupp PetscFunctionReturn(0); 94e4a0ef16SKarl Rupp } 95e4a0ef16SKarl Rupp 96*9371c9d4SSatish Balay PetscErrorCode MatViennaCLCopyFromGPU(Mat A, const ViennaCLAIJMatrix *Agpu) { 97f38c1e66SStefano Zampini Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL *)A->spptr; 98e4a0ef16SKarl Rupp Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 99e4a0ef16SKarl Rupp PetscInt m = A->rmap->n; 100e4a0ef16SKarl Rupp 101e4a0ef16SKarl Rupp PetscFunctionBegin; 102c70f7ee4SJunchao Zhang if (A->offloadmask == PETSC_OFFLOAD_BOTH) PetscFunctionReturn(0); 103c70f7ee4SJunchao Zhang if (A->offloadmask == PETSC_OFFLOAD_UNALLOCATED && Agpu) { 104e4a0ef16SKarl Rupp try { 1052c71b3e2SJacob Faibussowitsch PetscCheck(!a->compressedrow.use, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ViennaCL: Cannot handle row compression for GPU matrices"); 1062c71b3e2SJacob Faibussowitsch PetscCheck((PetscInt)Agpu->size1() == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "GPU matrix has %lu rows, should be %" PetscInt_FMT, Agpu->size1(), m); 107e4a0ef16SKarl Rupp a->nz = Agpu->nnz(); 108e4a0ef16SKarl Rupp a->maxnz = a->nz; /* Since we allocate exactly the right amount */ 109e4a0ef16SKarl Rupp A->preallocated = PETSC_TRUE; 110e4a0ef16SKarl Rupp if (a->singlemalloc) { 1119566063dSJacob Faibussowitsch if (a->a) PetscCall(PetscFree3(a->a, a->j, a->i)); 112e4a0ef16SKarl Rupp } else { 1139566063dSJacob Faibussowitsch if (a->i) PetscCall(PetscFree(a->i)); 1149566063dSJacob Faibussowitsch if (a->j) PetscCall(PetscFree(a->j)); 1159566063dSJacob Faibussowitsch if (a->a) PetscCall(PetscFree(a->a)); 116e4a0ef16SKarl Rupp } 1179566063dSJacob Faibussowitsch PetscCall(PetscMalloc3(a->nz, &a->a, a->nz, &a->j, m + 1, &a->i)); 1189566063dSJacob Faibussowitsch PetscCall(PetscLogObjectMemory((PetscObject)A, a->nz * (sizeof(PetscScalar) + sizeof(PetscInt)) + (m + 1) * sizeof(PetscInt))); 119e4a0ef16SKarl Rupp 120e4a0ef16SKarl Rupp a->singlemalloc = PETSC_TRUE; 121e4a0ef16SKarl Rupp 122e4a0ef16SKarl Rupp /* Setup row lengths */ 1239566063dSJacob Faibussowitsch PetscCall(PetscFree(a->imax)); 1249566063dSJacob Faibussowitsch PetscCall(PetscFree(a->ilen)); 1259566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(m, &a->imax)); 1269566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(m, &a->ilen)); 1279566063dSJacob Faibussowitsch PetscCall(PetscLogObjectMemory((PetscObject)A, 2 * m * sizeof(PetscInt))); 128e4a0ef16SKarl Rupp 129e4a0ef16SKarl Rupp /* Copy data back from GPU */ 130*9371c9d4SSatish Balay viennacl::backend::typesafe_host_array<unsigned int> row_buffer; 131*9371c9d4SSatish Balay row_buffer.raw_resize(Agpu->handle1(), Agpu->size1() + 1); 132e4a0ef16SKarl Rupp 133e4a0ef16SKarl Rupp // copy row array 134e4a0ef16SKarl Rupp viennacl::backend::memory_read(Agpu->handle1(), 0, row_buffer.raw_size(), row_buffer.get()); 135e4a0ef16SKarl Rupp (a->i)[0] = row_buffer[0]; 136e4a0ef16SKarl Rupp for (PetscInt i = 0; i < (PetscInt)Agpu->size1(); ++i) { 137e4a0ef16SKarl Rupp (a->i)[i + 1] = row_buffer[i + 1]; 138e4a0ef16SKarl Rupp a->imax[i] = a->ilen[i] = a->i[i + 1] - a->i[i]; //Set imax[] and ilen[] arrays at the same time as i[] for better cache reuse 139e4a0ef16SKarl Rupp } 140e4a0ef16SKarl Rupp 141e4a0ef16SKarl Rupp // copy column indices 142*9371c9d4SSatish Balay viennacl::backend::typesafe_host_array<unsigned int> col_buffer; 143*9371c9d4SSatish Balay col_buffer.raw_resize(Agpu->handle2(), Agpu->nnz()); 144e4a0ef16SKarl Rupp viennacl::backend::memory_read(Agpu->handle2(), 0, col_buffer.raw_size(), col_buffer.get()); 145*9371c9d4SSatish Balay for (PetscInt i = 0; i < (PetscInt)Agpu->nnz(); ++i) (a->j)[i] = col_buffer[i]; 146e4a0ef16SKarl Rupp 147e4a0ef16SKarl Rupp // copy nonzero entries directly to destination (no conversion required) 148e4a0ef16SKarl Rupp viennacl::backend::memory_read(Agpu->handle(), 0, sizeof(PetscScalar) * Agpu->nnz(), a->a); 149e4a0ef16SKarl Rupp 1509566063dSJacob Faibussowitsch PetscCall(PetscLogGpuToCpu(row_buffer.raw_size() + col_buffer.raw_size() + (Agpu->nnz() * sizeof(PetscScalar)))); 1514cf1874eSKarl Rupp ViennaCLWaitForGPU(); 152023073b3SKarl Rupp /* TODO: Once a->diag is moved out of MatAssemblyEnd(), invalidate it here. */ 153*9371c9d4SSatish Balay } catch (std::exception const &ex) { SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what()); } 154c70f7ee4SJunchao Zhang } else if (A->offloadmask == PETSC_OFFLOAD_UNALLOCATED) { 155f38c1e66SStefano Zampini PetscFunctionReturn(0); 156f38c1e66SStefano Zampini } else { 157c70f7ee4SJunchao Zhang if (!Agpu && A->offloadmask != PETSC_OFFLOAD_GPU) PetscFunctionReturn(0); 158e4a0ef16SKarl Rupp 1592c71b3e2SJacob Faibussowitsch PetscCheck(!a->compressedrow.use, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ViennaCL: Cannot handle row compression for GPU matrices"); 160f38c1e66SStefano Zampini if (!Agpu) { 161f38c1e66SStefano Zampini viennacl::backend::memory_read(viennaclstruct->mat->handle(), 0, sizeof(PetscScalar) * viennaclstruct->mat->nnz(), a->a); 162f38c1e66SStefano Zampini } else { 163f38c1e66SStefano Zampini viennacl::backend::memory_read(Agpu->handle(), 0, sizeof(PetscScalar) * Agpu->nnz(), a->a); 164f38c1e66SStefano Zampini } 165f38c1e66SStefano Zampini } 166c70f7ee4SJunchao Zhang A->offloadmask = PETSC_OFFLOAD_BOTH; 167b8ced49eSKarl Rupp /* This assembly prevents resetting the flag to PETSC_OFFLOAD_CPU and recopying */ 1689566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 1699566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 170e4a0ef16SKarl Rupp PetscFunctionReturn(0); 171e4a0ef16SKarl Rupp } 172e4a0ef16SKarl Rupp 173*9371c9d4SSatish Balay PetscErrorCode MatMult_SeqAIJViennaCL(Mat A, Vec xx, Vec yy) { 174e4a0ef16SKarl Rupp Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 175e4a0ef16SKarl Rupp Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL *)A->spptr; 1760d73d530SKarl Rupp const ViennaCLVector *xgpu = NULL; 1770d73d530SKarl Rupp ViennaCLVector *ygpu = NULL; 178e4a0ef16SKarl Rupp 179e4a0ef16SKarl Rupp PetscFunctionBegin; 180837a59e1SRichard Tran Mills /* The line below is necessary due to the operations that modify the matrix on the CPU (axpy, scale, etc) */ 1819566063dSJacob Faibussowitsch PetscCall(MatViennaCLCopyToGPU(A)); 182bf1781e8SStefano Zampini if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) { 1839566063dSJacob Faibussowitsch PetscCall(VecViennaCLGetArrayRead(xx, &xgpu)); 1849566063dSJacob Faibussowitsch PetscCall(VecViennaCLGetArrayWrite(yy, &ygpu)); 1859566063dSJacob Faibussowitsch PetscCall(PetscLogGpuTimeBegin()); 186e4a0ef16SKarl Rupp try { 187b7832b47SStefano Zampini if (a->compressedrow.use) { 188b7832b47SStefano Zampini *ygpu = viennacl::linalg::prod(*viennaclstruct->compressed_mat, *xgpu); 189b7832b47SStefano Zampini } else { 190e4a0ef16SKarl Rupp *ygpu = viennacl::linalg::prod(*viennaclstruct->mat, *xgpu); 191b7832b47SStefano Zampini } 1924cf1874eSKarl Rupp ViennaCLWaitForGPU(); 193*9371c9d4SSatish Balay } catch (std::exception const &ex) { SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what()); } 1949566063dSJacob Faibussowitsch PetscCall(PetscLogGpuTimeEnd()); 1959566063dSJacob Faibussowitsch PetscCall(VecViennaCLRestoreArrayRead(xx, &xgpu)); 1969566063dSJacob Faibussowitsch PetscCall(VecViennaCLRestoreArrayWrite(yy, &ygpu)); 1979566063dSJacob Faibussowitsch PetscCall(PetscLogGpuFlops(2.0 * a->nz - a->nonzerorowcnt)); 198bf1781e8SStefano Zampini } else { 1999566063dSJacob Faibussowitsch PetscCall(VecSet_SeqViennaCL(yy, 0)); 20067c87b7fSKarl Rupp } 201e4a0ef16SKarl Rupp PetscFunctionReturn(0); 202e4a0ef16SKarl Rupp } 203e4a0ef16SKarl Rupp 204*9371c9d4SSatish Balay PetscErrorCode MatMultAdd_SeqAIJViennaCL(Mat A, Vec xx, Vec yy, Vec zz) { 205e4a0ef16SKarl Rupp Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 206e4a0ef16SKarl Rupp Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL *)A->spptr; 2070d73d530SKarl Rupp const ViennaCLVector *xgpu = NULL, *ygpu = NULL; 2080d73d530SKarl Rupp ViennaCLVector *zgpu = NULL; 209e4a0ef16SKarl Rupp 210e4a0ef16SKarl Rupp PetscFunctionBegin; 211837a59e1SRichard Tran Mills /* The line below is necessary due to the operations that modify the matrix on the CPU (axpy, scale, etc) */ 2129566063dSJacob Faibussowitsch PetscCall(MatViennaCLCopyToGPU(A)); 213bf1781e8SStefano Zampini if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) { 214e4a0ef16SKarl Rupp try { 2159566063dSJacob Faibussowitsch PetscCall(VecViennaCLGetArrayRead(xx, &xgpu)); 2169566063dSJacob Faibussowitsch PetscCall(VecViennaCLGetArrayRead(yy, &ygpu)); 2179566063dSJacob Faibussowitsch PetscCall(VecViennaCLGetArrayWrite(zz, &zgpu)); 2189566063dSJacob Faibussowitsch PetscCall(PetscLogGpuTimeBegin()); 219f38c1e66SStefano Zampini if (a->compressedrow.use) *viennaclstruct->tempvec = viennacl::linalg::prod(*viennaclstruct->compressed_mat, *xgpu); 220f38c1e66SStefano Zampini else *viennaclstruct->tempvec = viennacl::linalg::prod(*viennaclstruct->mat, *xgpu); 221f38c1e66SStefano Zampini if (zz != yy) *zgpu = *ygpu + *viennaclstruct->tempvec; 222f38c1e66SStefano Zampini else *zgpu += *viennaclstruct->tempvec; 2234cf1874eSKarl Rupp ViennaCLWaitForGPU(); 2249566063dSJacob Faibussowitsch PetscCall(PetscLogGpuTimeEnd()); 2259566063dSJacob Faibussowitsch PetscCall(VecViennaCLRestoreArrayRead(xx, &xgpu)); 2269566063dSJacob Faibussowitsch PetscCall(VecViennaCLRestoreArrayRead(yy, &ygpu)); 2279566063dSJacob Faibussowitsch PetscCall(VecViennaCLRestoreArrayWrite(zz, &zgpu)); 228e4a0ef16SKarl Rupp 229*9371c9d4SSatish Balay } catch (std::exception const &ex) { SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what()); } 2309566063dSJacob Faibussowitsch PetscCall(PetscLogGpuFlops(2.0 * a->nz)); 231bf1781e8SStefano Zampini } else { 2329566063dSJacob Faibussowitsch PetscCall(VecCopy_SeqViennaCL(yy, zz)); 23367c87b7fSKarl Rupp } 234e4a0ef16SKarl Rupp PetscFunctionReturn(0); 235e4a0ef16SKarl Rupp } 236e4a0ef16SKarl Rupp 237*9371c9d4SSatish Balay PetscErrorCode MatAssemblyEnd_SeqAIJViennaCL(Mat A, MatAssemblyType mode) { 238e4a0ef16SKarl Rupp PetscFunctionBegin; 2399566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd_SeqAIJ(A, mode)); 240489de41dSStefano Zampini if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 2419566063dSJacob Faibussowitsch if (!A->boundtocpu) PetscCall(MatViennaCLCopyToGPU(A)); 242e4a0ef16SKarl Rupp PetscFunctionReturn(0); 243e4a0ef16SKarl Rupp } 244e4a0ef16SKarl Rupp 245e4a0ef16SKarl Rupp /* --------------------------------------------------------------------------------*/ 246cab5ea25SPierre Jolivet /*@C 247e4a0ef16SKarl Rupp MatCreateSeqAIJViennaCL - Creates a sparse matrix in AIJ (compressed row) format 24819fddfadSKarl Rupp (the default parallel PETSc format). This matrix will ultimately be pushed down 249e4a0ef16SKarl Rupp to GPUs and use the ViennaCL library for calculations. For good matrix 250e4a0ef16SKarl Rupp assembly performance the user should preallocate the matrix storage by setting 251e4a0ef16SKarl Rupp the parameter nz (or the array nnz). By setting these parameters accurately, 252e4a0ef16SKarl Rupp performance during matrix assembly can be increased substantially. 253e4a0ef16SKarl Rupp 254d083f849SBarry Smith Collective 255e4a0ef16SKarl Rupp 256e4a0ef16SKarl Rupp Input Parameters: 257e4a0ef16SKarl Rupp + comm - MPI communicator, set to PETSC_COMM_SELF 258e4a0ef16SKarl Rupp . m - number of rows 259e4a0ef16SKarl Rupp . n - number of columns 260e4a0ef16SKarl Rupp . nz - number of nonzeros per row (same for all rows) 261e4a0ef16SKarl Rupp - nnz - array containing the number of nonzeros in the various rows 262e4a0ef16SKarl Rupp (possibly different for each row) or NULL 263e4a0ef16SKarl Rupp 264e4a0ef16SKarl Rupp Output Parameter: 265e4a0ef16SKarl Rupp . A - the matrix 266e4a0ef16SKarl Rupp 267e4a0ef16SKarl Rupp It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 268e4a0ef16SKarl Rupp MatXXXXSetPreallocation() paradigm instead of this routine directly. 269e4a0ef16SKarl Rupp [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 270e4a0ef16SKarl Rupp 271e4a0ef16SKarl Rupp Notes: 272e4a0ef16SKarl Rupp If nnz is given then nz is ignored 273e4a0ef16SKarl Rupp 274e4a0ef16SKarl Rupp The AIJ format (also called the Yale sparse matrix format or 275e4a0ef16SKarl Rupp compressed row storage), is fully compatible with standard Fortran 77 276e4a0ef16SKarl Rupp storage. That is, the stored row and column indices can begin at 277e4a0ef16SKarl Rupp either one (as in Fortran) or zero. See the users' manual for details. 278e4a0ef16SKarl Rupp 279e4a0ef16SKarl Rupp Specify the preallocated storage with either nz or nnz (not both). 280e4a0ef16SKarl Rupp Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 281e4a0ef16SKarl Rupp allocation. For large problems you MUST preallocate memory or you 282e4a0ef16SKarl Rupp will get TERRIBLE performance, see the users' manual chapter on matrices. 283e4a0ef16SKarl Rupp 284e4a0ef16SKarl Rupp Level: intermediate 285e4a0ef16SKarl Rupp 286db781477SPatrick Sanan .seealso: `MatCreate()`, `MatCreateAIJ()`, `MatCreateAIJCUSPARSE()`, `MatSetValues()`, `MatSeqAIJSetColumnIndices()`, `MatCreateSeqAIJWithArrays()`, `MatCreateAIJ()` 287e4a0ef16SKarl Rupp 288e4a0ef16SKarl Rupp @*/ 289*9371c9d4SSatish Balay PetscErrorCode MatCreateSeqAIJViennaCL(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A) { 290e4a0ef16SKarl Rupp PetscFunctionBegin; 2919566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, A)); 2929566063dSJacob Faibussowitsch PetscCall(MatSetSizes(*A, m, n, m, n)); 2939566063dSJacob Faibussowitsch PetscCall(MatSetType(*A, MATSEQAIJVIENNACL)); 2949566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(*A, nz, (PetscInt *)nnz)); 295e4a0ef16SKarl Rupp PetscFunctionReturn(0); 296e4a0ef16SKarl Rupp } 297e4a0ef16SKarl Rupp 298*9371c9d4SSatish Balay PetscErrorCode MatDestroy_SeqAIJViennaCL(Mat A) { 299e4a0ef16SKarl Rupp Mat_SeqAIJViennaCL *viennaclcontainer = (Mat_SeqAIJViennaCL *)A->spptr; 300e4a0ef16SKarl Rupp 301e4a0ef16SKarl Rupp PetscFunctionBegin; 302e4a0ef16SKarl Rupp try { 3036447cd05SKarl Rupp if (viennaclcontainer) { 3046447cd05SKarl Rupp delete viennaclcontainer->tempvec; 3056447cd05SKarl Rupp delete viennaclcontainer->mat; 3066447cd05SKarl Rupp delete viennaclcontainer->compressed_mat; 307e4a0ef16SKarl Rupp delete viennaclcontainer; 3086447cd05SKarl Rupp } 309c70f7ee4SJunchao Zhang A->offloadmask = PETSC_OFFLOAD_UNALLOCATED; 310*9371c9d4SSatish Balay } catch (std::exception const &ex) { SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what()); } 3118713a8baSPatrick Sanan 3129566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqaij_seqaijviennacl_C", NULL)); 3139566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaijviennacl_seqdense_C", NULL)); 3149566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaijviennacl_seqaij_C", NULL)); 3158713a8baSPatrick Sanan 316e4a0ef16SKarl Rupp /* this next line is because MatDestroy tries to PetscFree spptr if it is not zero, and PetscFree only works if the memory was allocated with PetscNew or PetscMalloc, which don't call the constructor */ 317e4a0ef16SKarl Rupp A->spptr = 0; 3189566063dSJacob Faibussowitsch PetscCall(MatDestroy_SeqAIJ(A)); 319e4a0ef16SKarl Rupp PetscFunctionReturn(0); 320e4a0ef16SKarl Rupp } 321e4a0ef16SKarl Rupp 322*9371c9d4SSatish Balay PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJViennaCL(Mat B) { 323e4a0ef16SKarl Rupp PetscFunctionBegin; 3249566063dSJacob Faibussowitsch PetscCall(MatCreate_SeqAIJ(B)); 3259566063dSJacob Faibussowitsch PetscCall(MatConvert_SeqAIJ_SeqAIJViennaCL(B, MATSEQAIJVIENNACL, MAT_INPLACE_MATRIX, &B)); 3268713a8baSPatrick Sanan PetscFunctionReturn(0); 3278713a8baSPatrick Sanan } 3288713a8baSPatrick Sanan 329b470e4b4SRichard Tran Mills static PetscErrorCode MatBindToCPU_SeqAIJViennaCL(Mat, PetscBool); 330*9371c9d4SSatish Balay static PetscErrorCode MatDuplicate_SeqAIJViennaCL(Mat A, MatDuplicateOption cpvalues, Mat *B) { 331c3cca76eSKarl Rupp Mat C; 332c3cca76eSKarl Rupp 333c3cca76eSKarl Rupp PetscFunctionBegin; 3349566063dSJacob Faibussowitsch PetscCall(MatDuplicate_SeqAIJ(A, cpvalues, B)); 335c3cca76eSKarl Rupp C = *B; 336c3cca76eSKarl Rupp 3379566063dSJacob Faibussowitsch PetscCall(MatBindToCPU_SeqAIJViennaCL(A, PETSC_FALSE)); 33892f9df4aSRichard Tran Mills C->ops->bindtocpu = MatBindToCPU_SeqAIJViennaCL; 339c3cca76eSKarl Rupp 340c3cca76eSKarl Rupp C->spptr = new Mat_SeqAIJViennaCL(); 341c3cca76eSKarl Rupp ((Mat_SeqAIJViennaCL *)C->spptr)->tempvec = NULL; 342c3cca76eSKarl Rupp ((Mat_SeqAIJViennaCL *)C->spptr)->mat = NULL; 343c3cca76eSKarl Rupp ((Mat_SeqAIJViennaCL *)C->spptr)->compressed_mat = NULL; 344c3cca76eSKarl Rupp 3459566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)C, MATSEQAIJVIENNACL)); 346c3cca76eSKarl Rupp 347c70f7ee4SJunchao Zhang C->offloadmask = PETSC_OFFLOAD_UNALLOCATED; 348c3cca76eSKarl Rupp 349c3cca76eSKarl Rupp /* If the source matrix is already assembled, copy the destination matrix to the GPU */ 3509566063dSJacob Faibussowitsch if (C->assembled) PetscCall(MatViennaCLCopyToGPU(C)); 351c3cca76eSKarl Rupp PetscFunctionReturn(0); 352c3cca76eSKarl Rupp } 353c3cca76eSKarl Rupp 354*9371c9d4SSatish Balay static PetscErrorCode MatSeqAIJGetArray_SeqAIJViennaCL(Mat A, PetscScalar *array[]) { 355f38c1e66SStefano Zampini PetscFunctionBegin; 3569566063dSJacob Faibussowitsch PetscCall(MatViennaCLCopyFromGPU(A, (const ViennaCLAIJMatrix *)NULL)); 357c1a4f3baSJunchao Zhang *array = ((Mat_SeqAIJ *)A->data)->a; 358f38c1e66SStefano Zampini PetscFunctionReturn(0); 359f38c1e66SStefano Zampini } 360f38c1e66SStefano Zampini 361*9371c9d4SSatish Balay static PetscErrorCode MatSeqAIJRestoreArray_SeqAIJViennaCL(Mat A, PetscScalar *array[]) { 362f38c1e66SStefano Zampini PetscFunctionBegin; 363c1a4f3baSJunchao Zhang A->offloadmask = PETSC_OFFLOAD_CPU; 364c1a4f3baSJunchao Zhang *array = NULL; 365c1a4f3baSJunchao Zhang PetscFunctionReturn(0); 366c1a4f3baSJunchao Zhang } 367c1a4f3baSJunchao Zhang 368*9371c9d4SSatish Balay static PetscErrorCode MatSeqAIJGetArrayRead_SeqAIJViennaCL(Mat A, const PetscScalar *array[]) { 369c1a4f3baSJunchao Zhang PetscFunctionBegin; 3709566063dSJacob Faibussowitsch PetscCall(MatViennaCLCopyFromGPU(A, (const ViennaCLAIJMatrix *)NULL)); 371c1a4f3baSJunchao Zhang *array = ((Mat_SeqAIJ *)A->data)->a; 372c1a4f3baSJunchao Zhang PetscFunctionReturn(0); 373c1a4f3baSJunchao Zhang } 374c1a4f3baSJunchao Zhang 375*9371c9d4SSatish Balay static PetscErrorCode MatSeqAIJRestoreArrayRead_SeqAIJViennaCL(Mat A, const PetscScalar *array[]) { 376c1a4f3baSJunchao Zhang PetscFunctionBegin; 377c1a4f3baSJunchao Zhang *array = NULL; 378c1a4f3baSJunchao Zhang /* No A->offloadmask = PETSC_OFFLOAD_CPU since if A->offloadmask was PETSC_OFFLOAD_BOTH, it is still BOTH */ 379c1a4f3baSJunchao Zhang PetscFunctionReturn(0); 380c1a4f3baSJunchao Zhang } 381c1a4f3baSJunchao Zhang 382*9371c9d4SSatish Balay static PetscErrorCode MatSeqAIJGetArrayWrite_SeqAIJViennaCL(Mat A, PetscScalar *array[]) { 383c1a4f3baSJunchao Zhang PetscFunctionBegin; 384c1a4f3baSJunchao Zhang *array = ((Mat_SeqAIJ *)A->data)->a; 385c1a4f3baSJunchao Zhang PetscFunctionReturn(0); 386c1a4f3baSJunchao Zhang } 387c1a4f3baSJunchao Zhang 388*9371c9d4SSatish Balay static PetscErrorCode MatSeqAIJRestoreArrayWrite_SeqAIJViennaCL(Mat A, PetscScalar *array[]) { 389c1a4f3baSJunchao Zhang PetscFunctionBegin; 390c1a4f3baSJunchao Zhang A->offloadmask = PETSC_OFFLOAD_CPU; 391f38c1e66SStefano Zampini *array = NULL; 392f38c1e66SStefano Zampini PetscFunctionReturn(0); 393f38c1e66SStefano Zampini } 394f38c1e66SStefano Zampini 395*9371c9d4SSatish Balay static PetscErrorCode MatBindToCPU_SeqAIJViennaCL(Mat A, PetscBool flg) { 396c1a4f3baSJunchao Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 397f38c1e66SStefano Zampini 398e7e92044SBarry Smith PetscFunctionBegin; 399b470e4b4SRichard Tran Mills A->boundtocpu = flg; 400c1a4f3baSJunchao Zhang if (flg && a->inode.size) { 401c1a4f3baSJunchao Zhang a->inode.use = PETSC_TRUE; 402ea500dcfSRichard Tran Mills } else { 403c1a4f3baSJunchao Zhang a->inode.use = PETSC_FALSE; 404ea500dcfSRichard Tran Mills } 405e7e92044SBarry Smith if (flg) { 406f38c1e66SStefano Zampini /* make sure we have an up-to-date copy on the CPU */ 4079566063dSJacob Faibussowitsch PetscCall(MatViennaCLCopyFromGPU(A, (const ViennaCLAIJMatrix *)NULL)); 408e7e92044SBarry Smith A->ops->mult = MatMult_SeqAIJ; 409e7e92044SBarry Smith A->ops->multadd = MatMultAdd_SeqAIJ; 410e7e92044SBarry Smith A->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 411e7e92044SBarry Smith A->ops->duplicate = MatDuplicate_SeqAIJ; 4129566063dSJacob Faibussowitsch PetscCall(PetscMemzero(a->ops, sizeof(Mat_SeqAIJOps))); 413e7e92044SBarry Smith } else { 414e7e92044SBarry Smith A->ops->mult = MatMult_SeqAIJViennaCL; 415e7e92044SBarry Smith A->ops->multadd = MatMultAdd_SeqAIJViennaCL; 416e7e92044SBarry Smith A->ops->assemblyend = MatAssemblyEnd_SeqAIJViennaCL; 417e7e92044SBarry Smith A->ops->destroy = MatDestroy_SeqAIJViennaCL; 418e7e92044SBarry Smith A->ops->duplicate = MatDuplicate_SeqAIJViennaCL; 419c1a4f3baSJunchao Zhang 420c1a4f3baSJunchao Zhang a->ops->getarray = MatSeqAIJGetArray_SeqAIJViennaCL; 421c1a4f3baSJunchao Zhang a->ops->restorearray = MatSeqAIJRestoreArray_SeqAIJViennaCL; 422c1a4f3baSJunchao Zhang a->ops->getarrayread = MatSeqAIJGetArrayRead_SeqAIJViennaCL; 423c1a4f3baSJunchao Zhang a->ops->restorearrayread = MatSeqAIJRestoreArrayRead_SeqAIJViennaCL; 424c1a4f3baSJunchao Zhang a->ops->getarraywrite = MatSeqAIJGetArrayWrite_SeqAIJViennaCL; 425c1a4f3baSJunchao Zhang a->ops->restorearraywrite = MatSeqAIJRestoreArrayWrite_SeqAIJViennaCL; 426e7e92044SBarry Smith } 427e7e92044SBarry Smith PetscFunctionReturn(0); 428e7e92044SBarry Smith } 429e7e92044SBarry Smith 430*9371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJViennaCL(Mat A, MatType type, MatReuse reuse, Mat *newmat) { 4318713a8baSPatrick Sanan Mat B; 4328713a8baSPatrick Sanan 4338713a8baSPatrick Sanan PetscFunctionBegin; 4345f80ce2aSJacob Faibussowitsch PetscCheck(reuse != MAT_REUSE_MATRIX, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MAT_REUSE_MATRIX is not supported. Consider using MAT_INPLACE_MATRIX instead"); 4358713a8baSPatrick Sanan 4369566063dSJacob Faibussowitsch if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, newmat)); 4378713a8baSPatrick Sanan 4388713a8baSPatrick Sanan B = *newmat; 4398713a8baSPatrick Sanan 440e4a0ef16SKarl Rupp B->spptr = new Mat_SeqAIJViennaCL(); 441e4a0ef16SKarl Rupp 442a3430c56SKarl Rupp ((Mat_SeqAIJViennaCL *)B->spptr)->tempvec = NULL; 443a3430c56SKarl Rupp ((Mat_SeqAIJViennaCL *)B->spptr)->mat = NULL; 444a3430c56SKarl Rupp ((Mat_SeqAIJViennaCL *)B->spptr)->compressed_mat = NULL; 445e4a0ef16SKarl Rupp 4469566063dSJacob Faibussowitsch PetscCall(MatBindToCPU_SeqAIJViennaCL(A, PETSC_FALSE)); 44792f9df4aSRichard Tran Mills A->ops->bindtocpu = MatBindToCPU_SeqAIJViennaCL; 448e4a0ef16SKarl Rupp 4499566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJVIENNACL)); 4509566063dSJacob Faibussowitsch PetscCall(PetscFree(B->defaultvectype)); 4519566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(VECVIENNACL, &B->defaultvectype)); 452e4a0ef16SKarl Rupp 4539566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqaij_seqaijviennacl_C", MatConvert_SeqAIJ_SeqAIJViennaCL)); 4549566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaijviennacl_seqdense_C", MatProductSetFromOptions_SeqAIJ_SeqDense)); 4559566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaijviennacl_seqaij_C", MatProductSetFromOptions_SeqAIJ)); 4568713a8baSPatrick Sanan 457c70f7ee4SJunchao Zhang B->offloadmask = PETSC_OFFLOAD_UNALLOCATED; 4588713a8baSPatrick Sanan 4598713a8baSPatrick Sanan /* If the source matrix is already assembled, copy the destination matrix to the GPU */ 4609566063dSJacob Faibussowitsch if (B->assembled) PetscCall(MatViennaCLCopyToGPU(B)); 461e4a0ef16SKarl Rupp PetscFunctionReturn(0); 462e4a0ef16SKarl Rupp } 463e4a0ef16SKarl Rupp 4643ca39a21SBarry Smith /*MC 465e4a0ef16SKarl Rupp MATSEQAIJVIENNACL - MATAIJVIENNACL = "aijviennacl" = "seqaijviennacl" - A matrix type to be used for sparse matrices. 466e4a0ef16SKarl Rupp 467e4a0ef16SKarl Rupp A matrix type type whose data resides on GPUs. These matrices are in CSR format by 468e4a0ef16SKarl Rupp default. All matrix calculations are performed using the ViennaCL library. 469e4a0ef16SKarl Rupp 470e4a0ef16SKarl Rupp Options Database Keys: 471e4a0ef16SKarl Rupp + -mat_type aijviennacl - sets the matrix type to "seqaijviennacl" during a call to MatSetFromOptions() 472e4a0ef16SKarl Rupp . -mat_viennacl_storage_format csr - sets the storage format of matrices for MatMult during a call to MatSetFromOptions(). 473e4a0ef16SKarl Rupp - -mat_viennacl_mult_storage_format csr - sets the storage format of matrices for MatMult during a call to MatSetFromOptions(). 474e4a0ef16SKarl Rupp 475e4a0ef16SKarl Rupp Level: beginner 476e4a0ef16SKarl Rupp 477db781477SPatrick Sanan .seealso: `MatCreateSeqAIJViennaCL()`, `MATAIJVIENNACL`, `MatCreateAIJViennaCL()` 478e4a0ef16SKarl Rupp M*/ 479e4a0ef16SKarl Rupp 480*9371c9d4SSatish Balay PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ViennaCL(void) { 48172367587SKarl Rupp PetscFunctionBegin; 4829566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_LU, MatGetFactor_seqaij_petsc)); 4839566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_CHOLESKY, MatGetFactor_seqaij_petsc)); 4849566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_ILU, MatGetFactor_seqaij_petsc)); 4859566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_ICC, MatGetFactor_seqaij_petsc)); 48672367587SKarl Rupp PetscFunctionReturn(0); 48772367587SKarl Rupp } 488