1e4a0ef16SKarl Rupp /* 2e4a0ef16SKarl Rupp Defines the basic matrix operations for the AIJ (compressed row) 3e4a0ef16SKarl Rupp matrix storage format. 4e4a0ef16SKarl Rupp */ 5e4a0ef16SKarl Rupp 6aaa7dc30SBarry Smith #include <petscconf.h> 799acd6aaSStefano Zampini #define PETSC_SKIP_IMMINTRIN_H_CUDAWORKAROUND 1 8aaa7dc30SBarry Smith #include <../src/mat/impls/aij/seq/aij.h> /*I "petscmat.h" I*/ 9aaa7dc30SBarry Smith #include <petscbt.h> 10aaa7dc30SBarry Smith #include <../src/vec/vec/impls/dvecimpl.h> 11af0996ceSBarry Smith #include <petsc/private/vecimpl.h> 12e4a0ef16SKarl Rupp 13aaa7dc30SBarry Smith #include <../src/mat/impls/aij/seq/seqviennacl/viennaclmatimpl.h> 14e4a0ef16SKarl Rupp 15e4a0ef16SKarl Rupp #include <algorithm> 16e4a0ef16SKarl Rupp #include <vector> 17e4a0ef16SKarl Rupp #include <string> 18e4a0ef16SKarl Rupp 19e4a0ef16SKarl Rupp #include "viennacl/linalg/prod.hpp" 20e4a0ef16SKarl Rupp 218713a8baSPatrick Sanan PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJViennaCL(Mat A, MatType type, MatReuse reuse, Mat *newmat); 2272367587SKarl Rupp PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat, MatFactorType, Mat *); 234222ddf1SHong Zhang PETSC_INTERN PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqDense(Mat); 248713a8baSPatrick Sanan 25d71ae5a4SJacob Faibussowitsch PetscErrorCode MatViennaCLCopyToGPU(Mat A) 26d71ae5a4SJacob Faibussowitsch { 27e4a0ef16SKarl Rupp Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL *)A->spptr; 28e4a0ef16SKarl Rupp Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 29e4a0ef16SKarl Rupp 30e4a0ef16SKarl Rupp PetscFunctionBegin; 31bf1781e8SStefano Zampini if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) { //some OpenCL SDKs have issues with buffers of size 0 32c70f7ee4SJunchao Zhang if (A->offloadmask == PETSC_OFFLOAD_UNALLOCATED || A->offloadmask == PETSC_OFFLOAD_CPU) { 339566063dSJacob Faibussowitsch PetscCall(PetscLogEventBegin(MAT_ViennaCLCopyToGPU, A, 0, 0, 0)); 34e4a0ef16SKarl Rupp 35e4a0ef16SKarl Rupp try { 36e4a0ef16SKarl Rupp if (a->compressedrow.use) { 37a3430c56SKarl Rupp if (!viennaclstruct->compressed_mat) viennaclstruct->compressed_mat = new ViennaCLCompressedAIJMatrix(); 38e4a0ef16SKarl Rupp 39a3430c56SKarl Rupp // Since PetscInt is different from cl_uint, we have to convert: 40a3430c56SKarl Rupp viennacl::backend::mem_handle dummy; 41e4a0ef16SKarl Rupp 429371c9d4SSatish Balay viennacl::backend::typesafe_host_array<unsigned int> row_buffer; 439371c9d4SSatish Balay row_buffer.raw_resize(dummy, a->compressedrow.nrows + 1); 449371c9d4SSatish Balay for (PetscInt i = 0; i <= a->compressedrow.nrows; ++i) row_buffer.set(i, (a->compressedrow.i)[i]); 45e4a0ef16SKarl Rupp 469371c9d4SSatish Balay viennacl::backend::typesafe_host_array<unsigned int> row_indices; 479371c9d4SSatish Balay row_indices.raw_resize(dummy, a->compressedrow.nrows); 489371c9d4SSatish Balay for (PetscInt i = 0; i < a->compressedrow.nrows; ++i) row_indices.set(i, (a->compressedrow.rindex)[i]); 49a3430c56SKarl Rupp 509371c9d4SSatish Balay viennacl::backend::typesafe_host_array<unsigned int> col_buffer; 519371c9d4SSatish Balay col_buffer.raw_resize(dummy, a->nz); 529371c9d4SSatish Balay for (PetscInt i = 0; i < a->nz; ++i) col_buffer.set(i, (a->j)[i]); 53a3430c56SKarl Rupp 54a3430c56SKarl Rupp viennaclstruct->compressed_mat->set(row_buffer.get(), row_indices.get(), col_buffer.get(), a->a, A->rmap->n, A->cmap->n, a->compressedrow.nrows, a->nz); 559566063dSJacob Faibussowitsch PetscCall(PetscLogCpuToGpu(((2 * a->compressedrow.nrows) + 1 + a->nz) * sizeof(PetscInt) + (a->nz) * sizeof(PetscScalar))); 56e4a0ef16SKarl Rupp } else { 57a3430c56SKarl Rupp if (!viennaclstruct->mat) viennaclstruct->mat = new ViennaCLAIJMatrix(); 58e4a0ef16SKarl Rupp 59e4a0ef16SKarl Rupp // Since PetscInt is in general different from cl_uint, we have to convert: 60e4a0ef16SKarl Rupp viennacl::backend::mem_handle dummy; 61e4a0ef16SKarl Rupp 629371c9d4SSatish Balay viennacl::backend::typesafe_host_array<unsigned int> row_buffer; 639371c9d4SSatish Balay row_buffer.raw_resize(dummy, A->rmap->n + 1); 649371c9d4SSatish Balay for (PetscInt i = 0; i <= A->rmap->n; ++i) row_buffer.set(i, (a->i)[i]); 65e4a0ef16SKarl Rupp 669371c9d4SSatish Balay viennacl::backend::typesafe_host_array<unsigned int> col_buffer; 679371c9d4SSatish Balay col_buffer.raw_resize(dummy, a->nz); 689371c9d4SSatish Balay for (PetscInt i = 0; i < a->nz; ++i) col_buffer.set(i, (a->j)[i]); 69e4a0ef16SKarl Rupp 70e4a0ef16SKarl Rupp viennaclstruct->mat->set(row_buffer.get(), col_buffer.get(), a->a, A->rmap->n, A->cmap->n, a->nz); 719566063dSJacob Faibussowitsch PetscCall(PetscLogCpuToGpu(((A->rmap->n + 1) + a->nz) * sizeof(PetscInt) + (a->nz) * sizeof(PetscScalar))); 72e4a0ef16SKarl Rupp } 734cf1874eSKarl Rupp ViennaCLWaitForGPU(); 74d71ae5a4SJacob Faibussowitsch } catch (std::exception const &ex) { 75d71ae5a4SJacob Faibussowitsch SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what()); 76d71ae5a4SJacob Faibussowitsch } 77e4a0ef16SKarl Rupp 78a3430c56SKarl Rupp // Create temporary vector for v += A*x: 79a3430c56SKarl Rupp if (viennaclstruct->tempvec) { 809b66742cSDave May if (viennaclstruct->tempvec->size() != static_cast<std::size_t>(A->rmap->n)) { 81a3430c56SKarl Rupp delete (ViennaCLVector *)viennaclstruct->tempvec; 829b66742cSDave May viennaclstruct->tempvec = new ViennaCLVector(A->rmap->n); 83a3430c56SKarl Rupp } else { 84a3430c56SKarl Rupp viennaclstruct->tempvec->clear(); 85a3430c56SKarl Rupp } 86a3430c56SKarl Rupp } else { 879b66742cSDave May viennaclstruct->tempvec = new ViennaCLVector(A->rmap->n); 88a3430c56SKarl Rupp } 89a3430c56SKarl Rupp 90c70f7ee4SJunchao Zhang A->offloadmask = PETSC_OFFLOAD_BOTH; 91e4a0ef16SKarl Rupp 929566063dSJacob Faibussowitsch PetscCall(PetscLogEventEnd(MAT_ViennaCLCopyToGPU, A, 0, 0, 0)); 93e4a0ef16SKarl Rupp } 9467c87b7fSKarl Rupp } 953ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 96e4a0ef16SKarl Rupp } 97e4a0ef16SKarl Rupp 98d71ae5a4SJacob Faibussowitsch PetscErrorCode MatViennaCLCopyFromGPU(Mat A, const ViennaCLAIJMatrix *Agpu) 99d71ae5a4SJacob Faibussowitsch { 100f38c1e66SStefano Zampini Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL *)A->spptr; 101e4a0ef16SKarl Rupp Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 102e4a0ef16SKarl Rupp PetscInt m = A->rmap->n; 103e4a0ef16SKarl Rupp 104e4a0ef16SKarl Rupp PetscFunctionBegin; 1053ba16761SJacob Faibussowitsch if (A->offloadmask == PETSC_OFFLOAD_BOTH) PetscFunctionReturn(PETSC_SUCCESS); 106c70f7ee4SJunchao Zhang if (A->offloadmask == PETSC_OFFLOAD_UNALLOCATED && Agpu) { 107e4a0ef16SKarl Rupp try { 1082c71b3e2SJacob Faibussowitsch PetscCheck(!a->compressedrow.use, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ViennaCL: Cannot handle row compression for GPU matrices"); 1092c71b3e2SJacob Faibussowitsch PetscCheck((PetscInt)Agpu->size1() == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "GPU matrix has %lu rows, should be %" PetscInt_FMT, Agpu->size1(), m); 110e4a0ef16SKarl Rupp a->nz = Agpu->nnz(); 111e4a0ef16SKarl Rupp a->maxnz = a->nz; /* Since we allocate exactly the right amount */ 112e4a0ef16SKarl Rupp A->preallocated = PETSC_TRUE; 1139f0612e4SBarry Smith if (a->free_a) PetscCall(PetscShmgetDeallocateArray((void **)a->a)); 1149f0612e4SBarry Smith if (a->free_ij) PetscCall(PetscShmgetDeallocateArray((void **)a->j)); 1159f0612e4SBarry Smith if (a->free_ij) PetscCall(PetscShmgetDeallocateArray((void **)a->i)); 1169f0612e4SBarry Smith PetscCall(PetscShmgetAllocateArray(a->nz, sizeof(PetscScalar), (void **)&a->a)); 1179f0612e4SBarry Smith PetscCall(PetscShmgetAllocateArray(a->nz, sizeof(PetscInt), (void **)&a->j)); 1189f0612e4SBarry Smith PetscCall(PetscShmgetAllocateArray(m + 1, sizeof(PetscInt), (void **)&a->i)); 1199f0612e4SBarry Smith a->free_a = PETSC_TRUE; 1209f0612e4SBarry Smith a->free_ij = PETSC_TRUE; 121e4a0ef16SKarl Rupp 122e4a0ef16SKarl Rupp /* Setup row lengths */ 1239566063dSJacob Faibussowitsch PetscCall(PetscFree(a->imax)); 1249566063dSJacob Faibussowitsch PetscCall(PetscFree(a->ilen)); 1259566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(m, &a->imax)); 1269566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(m, &a->ilen)); 127e4a0ef16SKarl Rupp 128e4a0ef16SKarl Rupp /* Copy data back from GPU */ 1299371c9d4SSatish Balay viennacl::backend::typesafe_host_array<unsigned int> row_buffer; 1309371c9d4SSatish Balay row_buffer.raw_resize(Agpu->handle1(), Agpu->size1() + 1); 131e4a0ef16SKarl Rupp 132e4a0ef16SKarl Rupp // copy row array 133e4a0ef16SKarl Rupp viennacl::backend::memory_read(Agpu->handle1(), 0, row_buffer.raw_size(), row_buffer.get()); 134e4a0ef16SKarl Rupp (a->i)[0] = row_buffer[0]; 135e4a0ef16SKarl Rupp for (PetscInt i = 0; i < (PetscInt)Agpu->size1(); ++i) { 136e4a0ef16SKarl Rupp (a->i)[i + 1] = row_buffer[i + 1]; 137e4a0ef16SKarl Rupp a->imax[i] = a->ilen[i] = a->i[i + 1] - a->i[i]; //Set imax[] and ilen[] arrays at the same time as i[] for better cache reuse 138e4a0ef16SKarl Rupp } 139e4a0ef16SKarl Rupp 140e4a0ef16SKarl Rupp // copy column indices 1419371c9d4SSatish Balay viennacl::backend::typesafe_host_array<unsigned int> col_buffer; 1429371c9d4SSatish Balay col_buffer.raw_resize(Agpu->handle2(), Agpu->nnz()); 143e4a0ef16SKarl Rupp viennacl::backend::memory_read(Agpu->handle2(), 0, col_buffer.raw_size(), col_buffer.get()); 1449371c9d4SSatish Balay for (PetscInt i = 0; i < (PetscInt)Agpu->nnz(); ++i) (a->j)[i] = col_buffer[i]; 145e4a0ef16SKarl Rupp 146e4a0ef16SKarl Rupp // copy nonzero entries directly to destination (no conversion required) 147e4a0ef16SKarl Rupp viennacl::backend::memory_read(Agpu->handle(), 0, sizeof(PetscScalar) * Agpu->nnz(), a->a); 148e4a0ef16SKarl Rupp 1499566063dSJacob Faibussowitsch PetscCall(PetscLogGpuToCpu(row_buffer.raw_size() + col_buffer.raw_size() + (Agpu->nnz() * sizeof(PetscScalar)))); 1504cf1874eSKarl Rupp ViennaCLWaitForGPU(); 151d71ae5a4SJacob Faibussowitsch } catch (std::exception const &ex) { 152d71ae5a4SJacob Faibussowitsch SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what()); 153d71ae5a4SJacob Faibussowitsch } 154c70f7ee4SJunchao Zhang } else if (A->offloadmask == PETSC_OFFLOAD_UNALLOCATED) { 1553ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 156f38c1e66SStefano Zampini } else { 1573ba16761SJacob Faibussowitsch if (!Agpu && A->offloadmask != PETSC_OFFLOAD_GPU) PetscFunctionReturn(PETSC_SUCCESS); 158e4a0ef16SKarl Rupp 1592c71b3e2SJacob Faibussowitsch PetscCheck(!a->compressedrow.use, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ViennaCL: Cannot handle row compression for GPU matrices"); 160f38c1e66SStefano Zampini if (!Agpu) { 161f38c1e66SStefano Zampini viennacl::backend::memory_read(viennaclstruct->mat->handle(), 0, sizeof(PetscScalar) * viennaclstruct->mat->nnz(), a->a); 162f38c1e66SStefano Zampini } else { 163f38c1e66SStefano Zampini viennacl::backend::memory_read(Agpu->handle(), 0, sizeof(PetscScalar) * Agpu->nnz(), a->a); 164f38c1e66SStefano Zampini } 165f38c1e66SStefano Zampini } 166c70f7ee4SJunchao Zhang A->offloadmask = PETSC_OFFLOAD_BOTH; 167b8ced49eSKarl Rupp /* This assembly prevents resetting the flag to PETSC_OFFLOAD_CPU and recopying */ 1689566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 1699566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 1703ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 171e4a0ef16SKarl Rupp } 172e4a0ef16SKarl Rupp 17366976f2fSJacob Faibussowitsch static PetscErrorCode MatMult_SeqAIJViennaCL(Mat A, Vec xx, Vec yy) 174d71ae5a4SJacob Faibussowitsch { 175e4a0ef16SKarl Rupp Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 176e4a0ef16SKarl Rupp Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL *)A->spptr; 1770d73d530SKarl Rupp const ViennaCLVector *xgpu = NULL; 1780d73d530SKarl Rupp ViennaCLVector *ygpu = NULL; 179e4a0ef16SKarl Rupp 180e4a0ef16SKarl Rupp PetscFunctionBegin; 181837a59e1SRichard Tran Mills /* The line below is necessary due to the operations that modify the matrix on the CPU (axpy, scale, etc) */ 1829566063dSJacob Faibussowitsch PetscCall(MatViennaCLCopyToGPU(A)); 183bf1781e8SStefano Zampini if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) { 1849566063dSJacob Faibussowitsch PetscCall(VecViennaCLGetArrayRead(xx, &xgpu)); 1859566063dSJacob Faibussowitsch PetscCall(VecViennaCLGetArrayWrite(yy, &ygpu)); 1869566063dSJacob Faibussowitsch PetscCall(PetscLogGpuTimeBegin()); 187e4a0ef16SKarl Rupp try { 188b7832b47SStefano Zampini if (a->compressedrow.use) { 189b7832b47SStefano Zampini *ygpu = viennacl::linalg::prod(*viennaclstruct->compressed_mat, *xgpu); 190b7832b47SStefano Zampini } else { 191e4a0ef16SKarl Rupp *ygpu = viennacl::linalg::prod(*viennaclstruct->mat, *xgpu); 192b7832b47SStefano Zampini } 1934cf1874eSKarl Rupp ViennaCLWaitForGPU(); 194d71ae5a4SJacob Faibussowitsch } catch (std::exception const &ex) { 195d71ae5a4SJacob Faibussowitsch SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what()); 196d71ae5a4SJacob Faibussowitsch } 1979566063dSJacob Faibussowitsch PetscCall(PetscLogGpuTimeEnd()); 1989566063dSJacob Faibussowitsch PetscCall(VecViennaCLRestoreArrayRead(xx, &xgpu)); 1999566063dSJacob Faibussowitsch PetscCall(VecViennaCLRestoreArrayWrite(yy, &ygpu)); 2009566063dSJacob Faibussowitsch PetscCall(PetscLogGpuFlops(2.0 * a->nz - a->nonzerorowcnt)); 201bf1781e8SStefano Zampini } else { 2029566063dSJacob Faibussowitsch PetscCall(VecSet_SeqViennaCL(yy, 0)); 20367c87b7fSKarl Rupp } 2043ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 205e4a0ef16SKarl Rupp } 206e4a0ef16SKarl Rupp 20766976f2fSJacob Faibussowitsch static PetscErrorCode MatMultAdd_SeqAIJViennaCL(Mat A, Vec xx, Vec yy, Vec zz) 208d71ae5a4SJacob Faibussowitsch { 209e4a0ef16SKarl Rupp Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 210e4a0ef16SKarl Rupp Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL *)A->spptr; 2110d73d530SKarl Rupp const ViennaCLVector *xgpu = NULL, *ygpu = NULL; 2120d73d530SKarl Rupp ViennaCLVector *zgpu = NULL; 213e4a0ef16SKarl Rupp 214e4a0ef16SKarl Rupp PetscFunctionBegin; 215837a59e1SRichard Tran Mills /* The line below is necessary due to the operations that modify the matrix on the CPU (axpy, scale, etc) */ 2169566063dSJacob Faibussowitsch PetscCall(MatViennaCLCopyToGPU(A)); 217bf1781e8SStefano Zampini if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) { 218e4a0ef16SKarl Rupp try { 2199566063dSJacob Faibussowitsch PetscCall(VecViennaCLGetArrayRead(xx, &xgpu)); 2209566063dSJacob Faibussowitsch PetscCall(VecViennaCLGetArrayRead(yy, &ygpu)); 2219566063dSJacob Faibussowitsch PetscCall(VecViennaCLGetArrayWrite(zz, &zgpu)); 2229566063dSJacob Faibussowitsch PetscCall(PetscLogGpuTimeBegin()); 223f38c1e66SStefano Zampini if (a->compressedrow.use) *viennaclstruct->tempvec = viennacl::linalg::prod(*viennaclstruct->compressed_mat, *xgpu); 224f38c1e66SStefano Zampini else *viennaclstruct->tempvec = viennacl::linalg::prod(*viennaclstruct->mat, *xgpu); 225f38c1e66SStefano Zampini if (zz != yy) *zgpu = *ygpu + *viennaclstruct->tempvec; 226f38c1e66SStefano Zampini else *zgpu += *viennaclstruct->tempvec; 2274cf1874eSKarl Rupp ViennaCLWaitForGPU(); 2289566063dSJacob Faibussowitsch PetscCall(PetscLogGpuTimeEnd()); 2299566063dSJacob Faibussowitsch PetscCall(VecViennaCLRestoreArrayRead(xx, &xgpu)); 2309566063dSJacob Faibussowitsch PetscCall(VecViennaCLRestoreArrayRead(yy, &ygpu)); 2319566063dSJacob Faibussowitsch PetscCall(VecViennaCLRestoreArrayWrite(zz, &zgpu)); 232e4a0ef16SKarl Rupp 233d71ae5a4SJacob Faibussowitsch } catch (std::exception const &ex) { 234d71ae5a4SJacob Faibussowitsch SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what()); 235d71ae5a4SJacob Faibussowitsch } 2369566063dSJacob Faibussowitsch PetscCall(PetscLogGpuFlops(2.0 * a->nz)); 237bf1781e8SStefano Zampini } else { 2389566063dSJacob Faibussowitsch PetscCall(VecCopy_SeqViennaCL(yy, zz)); 23967c87b7fSKarl Rupp } 2403ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 241e4a0ef16SKarl Rupp } 242e4a0ef16SKarl Rupp 24366976f2fSJacob Faibussowitsch static PetscErrorCode MatAssemblyEnd_SeqAIJViennaCL(Mat A, MatAssemblyType mode) 244d71ae5a4SJacob Faibussowitsch { 245e4a0ef16SKarl Rupp PetscFunctionBegin; 2469566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd_SeqAIJ(A, mode)); 2473ba16761SJacob Faibussowitsch if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(PETSC_SUCCESS); 2489566063dSJacob Faibussowitsch if (!A->boundtocpu) PetscCall(MatViennaCLCopyToGPU(A)); 2493ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 250e4a0ef16SKarl Rupp } 251e4a0ef16SKarl Rupp 252cab5ea25SPierre Jolivet /*@C 25311a5261eSBarry Smith MatCreateSeqAIJViennaCL - Creates a sparse matrix in `MATSEQAIJVIENNACL` (compressed row) format 25419fddfadSKarl Rupp (the default parallel PETSc format). This matrix will ultimately be pushed down 25520f4b53cSBarry Smith to GPUs and use the ViennaCL library for calculations. 256e4a0ef16SKarl Rupp 257d083f849SBarry Smith Collective 258e4a0ef16SKarl Rupp 259e4a0ef16SKarl Rupp Input Parameters: 26011a5261eSBarry Smith + comm - MPI communicator, set to `PETSC_COMM_SELF` 261e4a0ef16SKarl Rupp . m - number of rows 262e4a0ef16SKarl Rupp . n - number of columns 26320f4b53cSBarry Smith . nz - number of nonzeros per row (same for all rows), ignored if `nnz` is set 26420f4b53cSBarry Smith - nnz - array containing the number of nonzeros in the various rows (possibly different for each row) or `NULL` 265e4a0ef16SKarl Rupp 266e4a0ef16SKarl Rupp Output Parameter: 267e4a0ef16SKarl Rupp . A - the matrix 268e4a0ef16SKarl Rupp 26911a5261eSBarry Smith It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 270e4a0ef16SKarl Rupp MatXXXXSetPreallocation() paradigm instead of this routine directly. 27111a5261eSBarry Smith [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 272e4a0ef16SKarl Rupp 273e4a0ef16SKarl Rupp Notes: 27411a5261eSBarry Smith The AIJ format, also called 2752ef1f0ffSBarry Smith compressed row storage, is fully compatible with standard Fortran 276e4a0ef16SKarl Rupp storage. That is, the stored row and column indices can begin at 27720f4b53cSBarry Smith either one (as in Fortran) or zero. 278e4a0ef16SKarl Rupp 27920f4b53cSBarry Smith Specify the preallocated storage with either `nz` or `nnz` (not both). 28020f4b53cSBarry Smith Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory 28120f4b53cSBarry Smith allocation. 282e4a0ef16SKarl Rupp 283e4a0ef16SKarl Rupp Level: intermediate 284e4a0ef16SKarl Rupp 285fe59aa6dSJacob Faibussowitsch .seealso: `MATSEQAIJVIENNACL`, `MatCreate()`, `MatCreateAIJ()`, `MatCreateAIJCUSPARSE()`, `MatSetValues()`, `MatSeqAIJSetColumnIndices()`, `MatCreateSeqAIJWithArrays()` 286e4a0ef16SKarl Rupp @*/ 287d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateSeqAIJViennaCL(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A) 288d71ae5a4SJacob Faibussowitsch { 289e4a0ef16SKarl Rupp PetscFunctionBegin; 2909566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, A)); 2919566063dSJacob Faibussowitsch PetscCall(MatSetSizes(*A, m, n, m, n)); 2929566063dSJacob Faibussowitsch PetscCall(MatSetType(*A, MATSEQAIJVIENNACL)); 2939566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(*A, nz, (PetscInt *)nnz)); 2943ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 295e4a0ef16SKarl Rupp } 296e4a0ef16SKarl Rupp 29766976f2fSJacob Faibussowitsch static PetscErrorCode MatDestroy_SeqAIJViennaCL(Mat A) 298d71ae5a4SJacob Faibussowitsch { 299e4a0ef16SKarl Rupp Mat_SeqAIJViennaCL *viennaclcontainer = (Mat_SeqAIJViennaCL *)A->spptr; 300e4a0ef16SKarl Rupp 301e4a0ef16SKarl Rupp PetscFunctionBegin; 302e4a0ef16SKarl Rupp try { 3036447cd05SKarl Rupp if (viennaclcontainer) { 3046447cd05SKarl Rupp delete viennaclcontainer->tempvec; 3056447cd05SKarl Rupp delete viennaclcontainer->mat; 3066447cd05SKarl Rupp delete viennaclcontainer->compressed_mat; 307e4a0ef16SKarl Rupp delete viennaclcontainer; 3086447cd05SKarl Rupp } 309c70f7ee4SJunchao Zhang A->offloadmask = PETSC_OFFLOAD_UNALLOCATED; 310d71ae5a4SJacob Faibussowitsch } catch (std::exception const &ex) { 311d71ae5a4SJacob Faibussowitsch SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what()); 312d71ae5a4SJacob Faibussowitsch } 3138713a8baSPatrick Sanan 3149566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqaij_seqaijviennacl_C", NULL)); 3159566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaijviennacl_seqdense_C", NULL)); 3169566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaijviennacl_seqaij_C", NULL)); 3178713a8baSPatrick Sanan 318e4a0ef16SKarl Rupp /* this next line is because MatDestroy tries to PetscFree spptr if it is not zero, and PetscFree only works if the memory was allocated with PetscNew or PetscMalloc, which don't call the constructor */ 319e4a0ef16SKarl Rupp A->spptr = 0; 3209566063dSJacob Faibussowitsch PetscCall(MatDestroy_SeqAIJ(A)); 3213ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 322e4a0ef16SKarl Rupp } 323e4a0ef16SKarl Rupp 324d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJViennaCL(Mat B) 325d71ae5a4SJacob Faibussowitsch { 326e4a0ef16SKarl Rupp PetscFunctionBegin; 3279566063dSJacob Faibussowitsch PetscCall(MatCreate_SeqAIJ(B)); 3289566063dSJacob Faibussowitsch PetscCall(MatConvert_SeqAIJ_SeqAIJViennaCL(B, MATSEQAIJVIENNACL, MAT_INPLACE_MATRIX, &B)); 3293ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3308713a8baSPatrick Sanan } 3318713a8baSPatrick Sanan 332b470e4b4SRichard Tran Mills static PetscErrorCode MatBindToCPU_SeqAIJViennaCL(Mat, PetscBool); 333d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatDuplicate_SeqAIJViennaCL(Mat A, MatDuplicateOption cpvalues, Mat *B) 334d71ae5a4SJacob Faibussowitsch { 335c3cca76eSKarl Rupp Mat C; 336c3cca76eSKarl Rupp 337c3cca76eSKarl Rupp PetscFunctionBegin; 3389566063dSJacob Faibussowitsch PetscCall(MatDuplicate_SeqAIJ(A, cpvalues, B)); 339c3cca76eSKarl Rupp C = *B; 340c3cca76eSKarl Rupp 3419566063dSJacob Faibussowitsch PetscCall(MatBindToCPU_SeqAIJViennaCL(A, PETSC_FALSE)); 34292f9df4aSRichard Tran Mills C->ops->bindtocpu = MatBindToCPU_SeqAIJViennaCL; 343c3cca76eSKarl Rupp 344c3cca76eSKarl Rupp C->spptr = new Mat_SeqAIJViennaCL(); 345c3cca76eSKarl Rupp ((Mat_SeqAIJViennaCL *)C->spptr)->tempvec = NULL; 346c3cca76eSKarl Rupp ((Mat_SeqAIJViennaCL *)C->spptr)->mat = NULL; 347c3cca76eSKarl Rupp ((Mat_SeqAIJViennaCL *)C->spptr)->compressed_mat = NULL; 348c3cca76eSKarl Rupp 3499566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)C, MATSEQAIJVIENNACL)); 350c3cca76eSKarl Rupp 351c70f7ee4SJunchao Zhang C->offloadmask = PETSC_OFFLOAD_UNALLOCATED; 352c3cca76eSKarl Rupp 353c3cca76eSKarl Rupp /* If the source matrix is already assembled, copy the destination matrix to the GPU */ 3549566063dSJacob Faibussowitsch if (C->assembled) PetscCall(MatViennaCLCopyToGPU(C)); 3553ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 356c3cca76eSKarl Rupp } 357c3cca76eSKarl Rupp 358d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatSeqAIJGetArray_SeqAIJViennaCL(Mat A, PetscScalar *array[]) 359d71ae5a4SJacob Faibussowitsch { 360f38c1e66SStefano Zampini PetscFunctionBegin; 3619566063dSJacob Faibussowitsch PetscCall(MatViennaCLCopyFromGPU(A, (const ViennaCLAIJMatrix *)NULL)); 362c1a4f3baSJunchao Zhang *array = ((Mat_SeqAIJ *)A->data)->a; 3633ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 364f38c1e66SStefano Zampini } 365f38c1e66SStefano Zampini 366d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatSeqAIJRestoreArray_SeqAIJViennaCL(Mat A, PetscScalar *array[]) 367d71ae5a4SJacob Faibussowitsch { 368f38c1e66SStefano Zampini PetscFunctionBegin; 369c1a4f3baSJunchao Zhang A->offloadmask = PETSC_OFFLOAD_CPU; 370c1a4f3baSJunchao Zhang *array = NULL; 3713ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 372c1a4f3baSJunchao Zhang } 373c1a4f3baSJunchao Zhang 374d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatSeqAIJGetArrayRead_SeqAIJViennaCL(Mat A, const PetscScalar *array[]) 375d71ae5a4SJacob Faibussowitsch { 376c1a4f3baSJunchao Zhang PetscFunctionBegin; 3779566063dSJacob Faibussowitsch PetscCall(MatViennaCLCopyFromGPU(A, (const ViennaCLAIJMatrix *)NULL)); 378c1a4f3baSJunchao Zhang *array = ((Mat_SeqAIJ *)A->data)->a; 3793ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 380c1a4f3baSJunchao Zhang } 381c1a4f3baSJunchao Zhang 382d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatSeqAIJRestoreArrayRead_SeqAIJViennaCL(Mat A, const PetscScalar *array[]) 383d71ae5a4SJacob Faibussowitsch { 384c1a4f3baSJunchao Zhang PetscFunctionBegin; 385c1a4f3baSJunchao Zhang *array = NULL; 386c1a4f3baSJunchao Zhang /* No A->offloadmask = PETSC_OFFLOAD_CPU since if A->offloadmask was PETSC_OFFLOAD_BOTH, it is still BOTH */ 3873ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 388c1a4f3baSJunchao Zhang } 389c1a4f3baSJunchao Zhang 390d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatSeqAIJGetArrayWrite_SeqAIJViennaCL(Mat A, PetscScalar *array[]) 391d71ae5a4SJacob Faibussowitsch { 392c1a4f3baSJunchao Zhang PetscFunctionBegin; 393c1a4f3baSJunchao Zhang *array = ((Mat_SeqAIJ *)A->data)->a; 3943ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 395c1a4f3baSJunchao Zhang } 396c1a4f3baSJunchao Zhang 397d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatSeqAIJRestoreArrayWrite_SeqAIJViennaCL(Mat A, PetscScalar *array[]) 398d71ae5a4SJacob Faibussowitsch { 399c1a4f3baSJunchao Zhang PetscFunctionBegin; 400c1a4f3baSJunchao Zhang A->offloadmask = PETSC_OFFLOAD_CPU; 401f38c1e66SStefano Zampini *array = NULL; 4023ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 403f38c1e66SStefano Zampini } 404f38c1e66SStefano Zampini 405d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatBindToCPU_SeqAIJViennaCL(Mat A, PetscBool flg) 406d71ae5a4SJacob Faibussowitsch { 407c1a4f3baSJunchao Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 408f38c1e66SStefano Zampini 409e7e92044SBarry Smith PetscFunctionBegin; 410b470e4b4SRichard Tran Mills A->boundtocpu = flg; 411*4d12350bSJunchao Zhang if (flg && a->inode.size_csr) { 412c1a4f3baSJunchao Zhang a->inode.use = PETSC_TRUE; 413ea500dcfSRichard Tran Mills } else { 414c1a4f3baSJunchao Zhang a->inode.use = PETSC_FALSE; 415ea500dcfSRichard Tran Mills } 416e7e92044SBarry Smith if (flg) { 417f38c1e66SStefano Zampini /* make sure we have an up-to-date copy on the CPU */ 4189566063dSJacob Faibussowitsch PetscCall(MatViennaCLCopyFromGPU(A, (const ViennaCLAIJMatrix *)NULL)); 419e7e92044SBarry Smith A->ops->mult = MatMult_SeqAIJ; 420e7e92044SBarry Smith A->ops->multadd = MatMultAdd_SeqAIJ; 421e7e92044SBarry Smith A->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 422e7e92044SBarry Smith A->ops->duplicate = MatDuplicate_SeqAIJ; 4239566063dSJacob Faibussowitsch PetscCall(PetscMemzero(a->ops, sizeof(Mat_SeqAIJOps))); 424e7e92044SBarry Smith } else { 425e7e92044SBarry Smith A->ops->mult = MatMult_SeqAIJViennaCL; 426e7e92044SBarry Smith A->ops->multadd = MatMultAdd_SeqAIJViennaCL; 427e7e92044SBarry Smith A->ops->assemblyend = MatAssemblyEnd_SeqAIJViennaCL; 428e7e92044SBarry Smith A->ops->destroy = MatDestroy_SeqAIJViennaCL; 429e7e92044SBarry Smith A->ops->duplicate = MatDuplicate_SeqAIJViennaCL; 430c1a4f3baSJunchao Zhang 431c1a4f3baSJunchao Zhang a->ops->getarray = MatSeqAIJGetArray_SeqAIJViennaCL; 432c1a4f3baSJunchao Zhang a->ops->restorearray = MatSeqAIJRestoreArray_SeqAIJViennaCL; 433c1a4f3baSJunchao Zhang a->ops->getarrayread = MatSeqAIJGetArrayRead_SeqAIJViennaCL; 434c1a4f3baSJunchao Zhang a->ops->restorearrayread = MatSeqAIJRestoreArrayRead_SeqAIJViennaCL; 435c1a4f3baSJunchao Zhang a->ops->getarraywrite = MatSeqAIJGetArrayWrite_SeqAIJViennaCL; 436c1a4f3baSJunchao Zhang a->ops->restorearraywrite = MatSeqAIJRestoreArrayWrite_SeqAIJViennaCL; 437e7e92044SBarry Smith } 4383ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 439e7e92044SBarry Smith } 440e7e92044SBarry Smith 441d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJViennaCL(Mat A, MatType type, MatReuse reuse, Mat *newmat) 442d71ae5a4SJacob Faibussowitsch { 4438713a8baSPatrick Sanan Mat B; 4448713a8baSPatrick Sanan 4458713a8baSPatrick Sanan PetscFunctionBegin; 4465f80ce2aSJacob Faibussowitsch PetscCheck(reuse != MAT_REUSE_MATRIX, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MAT_REUSE_MATRIX is not supported. Consider using MAT_INPLACE_MATRIX instead"); 4478713a8baSPatrick Sanan 4489566063dSJacob Faibussowitsch if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, newmat)); 4498713a8baSPatrick Sanan 4508713a8baSPatrick Sanan B = *newmat; 4518713a8baSPatrick Sanan 452e4a0ef16SKarl Rupp B->spptr = new Mat_SeqAIJViennaCL(); 453e4a0ef16SKarl Rupp 454a3430c56SKarl Rupp ((Mat_SeqAIJViennaCL *)B->spptr)->tempvec = NULL; 455a3430c56SKarl Rupp ((Mat_SeqAIJViennaCL *)B->spptr)->mat = NULL; 456a3430c56SKarl Rupp ((Mat_SeqAIJViennaCL *)B->spptr)->compressed_mat = NULL; 457e4a0ef16SKarl Rupp 4589566063dSJacob Faibussowitsch PetscCall(MatBindToCPU_SeqAIJViennaCL(A, PETSC_FALSE)); 45992f9df4aSRichard Tran Mills A->ops->bindtocpu = MatBindToCPU_SeqAIJViennaCL; 460e4a0ef16SKarl Rupp 4619566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJVIENNACL)); 4629566063dSJacob Faibussowitsch PetscCall(PetscFree(B->defaultvectype)); 4639566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(VECVIENNACL, &B->defaultvectype)); 464e4a0ef16SKarl Rupp 4659566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqaij_seqaijviennacl_C", MatConvert_SeqAIJ_SeqAIJViennaCL)); 4669566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaijviennacl_seqdense_C", MatProductSetFromOptions_SeqAIJ_SeqDense)); 4679566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaijviennacl_seqaij_C", MatProductSetFromOptions_SeqAIJ)); 4688713a8baSPatrick Sanan 469c70f7ee4SJunchao Zhang B->offloadmask = PETSC_OFFLOAD_UNALLOCATED; 4708713a8baSPatrick Sanan 4718713a8baSPatrick Sanan /* If the source matrix is already assembled, copy the destination matrix to the GPU */ 4729566063dSJacob Faibussowitsch if (B->assembled) PetscCall(MatViennaCLCopyToGPU(B)); 4733ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 474e4a0ef16SKarl Rupp } 475e4a0ef16SKarl Rupp 4763ca39a21SBarry Smith /*MC 477e4a0ef16SKarl Rupp MATSEQAIJVIENNACL - MATAIJVIENNACL = "aijviennacl" = "seqaijviennacl" - A matrix type to be used for sparse matrices. 478e4a0ef16SKarl Rupp 47915229ffcSPierre Jolivet A matrix type whose data resides on GPUs. These matrices are in CSR format by 480e4a0ef16SKarl Rupp default. All matrix calculations are performed using the ViennaCL library. 481e4a0ef16SKarl Rupp 482e4a0ef16SKarl Rupp Options Database Keys: 48311a5261eSBarry Smith + -mat_type aijviennacl - sets the matrix type to `MATSEQAIJVIENNACL` during a call to `MatSetFromOptions() 48411a5261eSBarry Smith . -mat_viennacl_storage_format csr - sets the storage format of matrices for `MatMult()` during a call to `MatSetFromOptions()`. 48511a5261eSBarry Smith - -mat_viennacl_mult_storage_format csr - sets the storage format of matrices for `MatMult()` during a call to `MatSetFromOptions()`. 486e4a0ef16SKarl Rupp 487e4a0ef16SKarl Rupp Level: beginner 488e4a0ef16SKarl Rupp 489db781477SPatrick Sanan .seealso: `MatCreateSeqAIJViennaCL()`, `MATAIJVIENNACL`, `MatCreateAIJViennaCL()` 490e4a0ef16SKarl Rupp M*/ 491e4a0ef16SKarl Rupp 492d1f0640dSPierre Jolivet PETSC_INTERN PetscErrorCode MatSolverTypeRegister_ViennaCL(void) 493d71ae5a4SJacob Faibussowitsch { 49472367587SKarl Rupp PetscFunctionBegin; 4959566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_LU, MatGetFactor_seqaij_petsc)); 4969566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_CHOLESKY, MatGetFactor_seqaij_petsc)); 4979566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_ILU, MatGetFactor_seqaij_petsc)); 4989566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_ICC, MatGetFactor_seqaij_petsc)); 4993ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 50072367587SKarl Rupp } 501