1e4a0ef16SKarl Rupp 2e4a0ef16SKarl Rupp /* 3e4a0ef16SKarl Rupp Defines the basic matrix operations for the AIJ (compressed row) 4e4a0ef16SKarl Rupp matrix storage format. 5e4a0ef16SKarl Rupp */ 6e4a0ef16SKarl Rupp 7aaa7dc30SBarry Smith #include <petscconf.h> 899acd6aaSStefano Zampini #define PETSC_SKIP_IMMINTRIN_H_CUDAWORKAROUND 1 9aaa7dc30SBarry Smith #include <../src/mat/impls/aij/seq/aij.h> /*I "petscmat.h" I*/ 10aaa7dc30SBarry Smith #include <petscbt.h> 11aaa7dc30SBarry Smith #include <../src/vec/vec/impls/dvecimpl.h> 12af0996ceSBarry Smith #include <petsc/private/vecimpl.h> 13e4a0ef16SKarl Rupp 14aaa7dc30SBarry Smith #include <../src/mat/impls/aij/seq/seqviennacl/viennaclmatimpl.h> 15e4a0ef16SKarl Rupp 16e4a0ef16SKarl Rupp #include <algorithm> 17e4a0ef16SKarl Rupp #include <vector> 18e4a0ef16SKarl Rupp #include <string> 19e4a0ef16SKarl Rupp 20e4a0ef16SKarl Rupp #include "viennacl/linalg/prod.hpp" 21e4a0ef16SKarl Rupp 228713a8baSPatrick Sanan PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJViennaCL(Mat A, MatType type, MatReuse reuse, Mat *newmat); 2372367587SKarl Rupp PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat, MatFactorType, Mat *); 244222ddf1SHong Zhang PETSC_INTERN PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqDense(Mat); 258713a8baSPatrick Sanan 26d71ae5a4SJacob Faibussowitsch PetscErrorCode MatViennaCLCopyToGPU(Mat A) 27d71ae5a4SJacob Faibussowitsch { 28e4a0ef16SKarl Rupp Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL *)A->spptr; 29e4a0ef16SKarl Rupp Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 30e4a0ef16SKarl Rupp 31e4a0ef16SKarl Rupp PetscFunctionBegin; 32bf1781e8SStefano Zampini if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) { //some OpenCL SDKs have issues with buffers of size 0 33c70f7ee4SJunchao Zhang if (A->offloadmask == PETSC_OFFLOAD_UNALLOCATED || A->offloadmask == PETSC_OFFLOAD_CPU) { 349566063dSJacob Faibussowitsch PetscCall(PetscLogEventBegin(MAT_ViennaCLCopyToGPU, A, 0, 0, 0)); 35e4a0ef16SKarl Rupp 36e4a0ef16SKarl Rupp try { 37e4a0ef16SKarl Rupp if (a->compressedrow.use) { 38a3430c56SKarl Rupp if (!viennaclstruct->compressed_mat) viennaclstruct->compressed_mat = new ViennaCLCompressedAIJMatrix(); 39e4a0ef16SKarl Rupp 40a3430c56SKarl Rupp // Since PetscInt is different from cl_uint, we have to convert: 41a3430c56SKarl Rupp viennacl::backend::mem_handle dummy; 42e4a0ef16SKarl Rupp 439371c9d4SSatish Balay viennacl::backend::typesafe_host_array<unsigned int> row_buffer; 449371c9d4SSatish Balay row_buffer.raw_resize(dummy, a->compressedrow.nrows + 1); 459371c9d4SSatish Balay for (PetscInt i = 0; i <= a->compressedrow.nrows; ++i) row_buffer.set(i, (a->compressedrow.i)[i]); 46e4a0ef16SKarl Rupp 479371c9d4SSatish Balay viennacl::backend::typesafe_host_array<unsigned int> row_indices; 489371c9d4SSatish Balay row_indices.raw_resize(dummy, a->compressedrow.nrows); 499371c9d4SSatish Balay for (PetscInt i = 0; i < a->compressedrow.nrows; ++i) row_indices.set(i, (a->compressedrow.rindex)[i]); 50a3430c56SKarl Rupp 519371c9d4SSatish Balay viennacl::backend::typesafe_host_array<unsigned int> col_buffer; 529371c9d4SSatish Balay col_buffer.raw_resize(dummy, a->nz); 539371c9d4SSatish Balay for (PetscInt i = 0; i < a->nz; ++i) col_buffer.set(i, (a->j)[i]); 54a3430c56SKarl Rupp 55a3430c56SKarl Rupp viennaclstruct->compressed_mat->set(row_buffer.get(), row_indices.get(), col_buffer.get(), a->a, A->rmap->n, A->cmap->n, a->compressedrow.nrows, a->nz); 569566063dSJacob Faibussowitsch PetscCall(PetscLogCpuToGpu(((2 * a->compressedrow.nrows) + 1 + a->nz) * sizeof(PetscInt) + (a->nz) * sizeof(PetscScalar))); 57e4a0ef16SKarl Rupp } else { 58a3430c56SKarl Rupp if (!viennaclstruct->mat) viennaclstruct->mat = new ViennaCLAIJMatrix(); 59e4a0ef16SKarl Rupp 60e4a0ef16SKarl Rupp // Since PetscInt is in general different from cl_uint, we have to convert: 61e4a0ef16SKarl Rupp viennacl::backend::mem_handle dummy; 62e4a0ef16SKarl Rupp 639371c9d4SSatish Balay viennacl::backend::typesafe_host_array<unsigned int> row_buffer; 649371c9d4SSatish Balay row_buffer.raw_resize(dummy, A->rmap->n + 1); 659371c9d4SSatish Balay for (PetscInt i = 0; i <= A->rmap->n; ++i) row_buffer.set(i, (a->i)[i]); 66e4a0ef16SKarl Rupp 679371c9d4SSatish Balay viennacl::backend::typesafe_host_array<unsigned int> col_buffer; 689371c9d4SSatish Balay col_buffer.raw_resize(dummy, a->nz); 699371c9d4SSatish Balay for (PetscInt i = 0; i < a->nz; ++i) col_buffer.set(i, (a->j)[i]); 70e4a0ef16SKarl Rupp 71e4a0ef16SKarl Rupp viennaclstruct->mat->set(row_buffer.get(), col_buffer.get(), a->a, A->rmap->n, A->cmap->n, a->nz); 729566063dSJacob Faibussowitsch PetscCall(PetscLogCpuToGpu(((A->rmap->n + 1) + a->nz) * sizeof(PetscInt) + (a->nz) * sizeof(PetscScalar))); 73e4a0ef16SKarl Rupp } 744cf1874eSKarl Rupp ViennaCLWaitForGPU(); 75d71ae5a4SJacob Faibussowitsch } catch (std::exception const &ex) { 76d71ae5a4SJacob Faibussowitsch SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what()); 77d71ae5a4SJacob Faibussowitsch } 78e4a0ef16SKarl Rupp 79a3430c56SKarl Rupp // Create temporary vector for v += A*x: 80a3430c56SKarl Rupp if (viennaclstruct->tempvec) { 819b66742cSDave May if (viennaclstruct->tempvec->size() != static_cast<std::size_t>(A->rmap->n)) { 82a3430c56SKarl Rupp delete (ViennaCLVector *)viennaclstruct->tempvec; 839b66742cSDave May viennaclstruct->tempvec = new ViennaCLVector(A->rmap->n); 84a3430c56SKarl Rupp } else { 85a3430c56SKarl Rupp viennaclstruct->tempvec->clear(); 86a3430c56SKarl Rupp } 87a3430c56SKarl Rupp } else { 889b66742cSDave May viennaclstruct->tempvec = new ViennaCLVector(A->rmap->n); 89a3430c56SKarl Rupp } 90a3430c56SKarl Rupp 91c70f7ee4SJunchao Zhang A->offloadmask = PETSC_OFFLOAD_BOTH; 92e4a0ef16SKarl Rupp 939566063dSJacob Faibussowitsch PetscCall(PetscLogEventEnd(MAT_ViennaCLCopyToGPU, A, 0, 0, 0)); 94e4a0ef16SKarl Rupp } 9567c87b7fSKarl Rupp } 963ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 97e4a0ef16SKarl Rupp } 98e4a0ef16SKarl Rupp 99d71ae5a4SJacob Faibussowitsch PetscErrorCode MatViennaCLCopyFromGPU(Mat A, const ViennaCLAIJMatrix *Agpu) 100d71ae5a4SJacob Faibussowitsch { 101f38c1e66SStefano Zampini Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL *)A->spptr; 102e4a0ef16SKarl Rupp Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 103e4a0ef16SKarl Rupp PetscInt m = A->rmap->n; 104e4a0ef16SKarl Rupp 105e4a0ef16SKarl Rupp PetscFunctionBegin; 1063ba16761SJacob Faibussowitsch if (A->offloadmask == PETSC_OFFLOAD_BOTH) PetscFunctionReturn(PETSC_SUCCESS); 107c70f7ee4SJunchao Zhang if (A->offloadmask == PETSC_OFFLOAD_UNALLOCATED && Agpu) { 108e4a0ef16SKarl Rupp try { 1092c71b3e2SJacob Faibussowitsch PetscCheck(!a->compressedrow.use, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ViennaCL: Cannot handle row compression for GPU matrices"); 1102c71b3e2SJacob Faibussowitsch PetscCheck((PetscInt)Agpu->size1() == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "GPU matrix has %lu rows, should be %" PetscInt_FMT, Agpu->size1(), m); 111e4a0ef16SKarl Rupp a->nz = Agpu->nnz(); 112e4a0ef16SKarl Rupp a->maxnz = a->nz; /* Since we allocate exactly the right amount */ 113e4a0ef16SKarl Rupp A->preallocated = PETSC_TRUE; 114e4a0ef16SKarl Rupp if (a->singlemalloc) { 1159566063dSJacob Faibussowitsch if (a->a) PetscCall(PetscFree3(a->a, a->j, a->i)); 116e4a0ef16SKarl Rupp } else { 1179566063dSJacob Faibussowitsch if (a->i) PetscCall(PetscFree(a->i)); 1189566063dSJacob Faibussowitsch if (a->j) PetscCall(PetscFree(a->j)); 1199566063dSJacob Faibussowitsch if (a->a) PetscCall(PetscFree(a->a)); 120e4a0ef16SKarl Rupp } 1219566063dSJacob Faibussowitsch PetscCall(PetscMalloc3(a->nz, &a->a, a->nz, &a->j, m + 1, &a->i)); 122e4a0ef16SKarl Rupp 123e4a0ef16SKarl Rupp a->singlemalloc = PETSC_TRUE; 124e4a0ef16SKarl Rupp 125e4a0ef16SKarl Rupp /* Setup row lengths */ 1269566063dSJacob Faibussowitsch PetscCall(PetscFree(a->imax)); 1279566063dSJacob Faibussowitsch PetscCall(PetscFree(a->ilen)); 1289566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(m, &a->imax)); 1299566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(m, &a->ilen)); 130e4a0ef16SKarl Rupp 131e4a0ef16SKarl Rupp /* Copy data back from GPU */ 1329371c9d4SSatish Balay viennacl::backend::typesafe_host_array<unsigned int> row_buffer; 1339371c9d4SSatish Balay row_buffer.raw_resize(Agpu->handle1(), Agpu->size1() + 1); 134e4a0ef16SKarl Rupp 135e4a0ef16SKarl Rupp // copy row array 136e4a0ef16SKarl Rupp viennacl::backend::memory_read(Agpu->handle1(), 0, row_buffer.raw_size(), row_buffer.get()); 137e4a0ef16SKarl Rupp (a->i)[0] = row_buffer[0]; 138e4a0ef16SKarl Rupp for (PetscInt i = 0; i < (PetscInt)Agpu->size1(); ++i) { 139e4a0ef16SKarl Rupp (a->i)[i + 1] = row_buffer[i + 1]; 140e4a0ef16SKarl Rupp a->imax[i] = a->ilen[i] = a->i[i + 1] - a->i[i]; //Set imax[] and ilen[] arrays at the same time as i[] for better cache reuse 141e4a0ef16SKarl Rupp } 142e4a0ef16SKarl Rupp 143e4a0ef16SKarl Rupp // copy column indices 1449371c9d4SSatish Balay viennacl::backend::typesafe_host_array<unsigned int> col_buffer; 1459371c9d4SSatish Balay col_buffer.raw_resize(Agpu->handle2(), Agpu->nnz()); 146e4a0ef16SKarl Rupp viennacl::backend::memory_read(Agpu->handle2(), 0, col_buffer.raw_size(), col_buffer.get()); 1479371c9d4SSatish Balay for (PetscInt i = 0; i < (PetscInt)Agpu->nnz(); ++i) (a->j)[i] = col_buffer[i]; 148e4a0ef16SKarl Rupp 149e4a0ef16SKarl Rupp // copy nonzero entries directly to destination (no conversion required) 150e4a0ef16SKarl Rupp viennacl::backend::memory_read(Agpu->handle(), 0, sizeof(PetscScalar) * Agpu->nnz(), a->a); 151e4a0ef16SKarl Rupp 1529566063dSJacob Faibussowitsch PetscCall(PetscLogGpuToCpu(row_buffer.raw_size() + col_buffer.raw_size() + (Agpu->nnz() * sizeof(PetscScalar)))); 1534cf1874eSKarl Rupp ViennaCLWaitForGPU(); 154023073b3SKarl Rupp /* TODO: Once a->diag is moved out of MatAssemblyEnd(), invalidate it here. */ 155d71ae5a4SJacob Faibussowitsch } catch (std::exception const &ex) { 156d71ae5a4SJacob Faibussowitsch SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what()); 157d71ae5a4SJacob Faibussowitsch } 158c70f7ee4SJunchao Zhang } else if (A->offloadmask == PETSC_OFFLOAD_UNALLOCATED) { 1593ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 160f38c1e66SStefano Zampini } else { 1613ba16761SJacob Faibussowitsch if (!Agpu && A->offloadmask != PETSC_OFFLOAD_GPU) PetscFunctionReturn(PETSC_SUCCESS); 162e4a0ef16SKarl Rupp 1632c71b3e2SJacob Faibussowitsch PetscCheck(!a->compressedrow.use, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ViennaCL: Cannot handle row compression for GPU matrices"); 164f38c1e66SStefano Zampini if (!Agpu) { 165f38c1e66SStefano Zampini viennacl::backend::memory_read(viennaclstruct->mat->handle(), 0, sizeof(PetscScalar) * viennaclstruct->mat->nnz(), a->a); 166f38c1e66SStefano Zampini } else { 167f38c1e66SStefano Zampini viennacl::backend::memory_read(Agpu->handle(), 0, sizeof(PetscScalar) * Agpu->nnz(), a->a); 168f38c1e66SStefano Zampini } 169f38c1e66SStefano Zampini } 170c70f7ee4SJunchao Zhang A->offloadmask = PETSC_OFFLOAD_BOTH; 171b8ced49eSKarl Rupp /* This assembly prevents resetting the flag to PETSC_OFFLOAD_CPU and recopying */ 1729566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 1739566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 1743ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 175e4a0ef16SKarl Rupp } 176e4a0ef16SKarl Rupp 177d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMult_SeqAIJViennaCL(Mat A, Vec xx, Vec yy) 178d71ae5a4SJacob Faibussowitsch { 179e4a0ef16SKarl Rupp Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 180e4a0ef16SKarl Rupp Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL *)A->spptr; 1810d73d530SKarl Rupp const ViennaCLVector *xgpu = NULL; 1820d73d530SKarl Rupp ViennaCLVector *ygpu = NULL; 183e4a0ef16SKarl Rupp 184e4a0ef16SKarl Rupp PetscFunctionBegin; 185837a59e1SRichard Tran Mills /* The line below is necessary due to the operations that modify the matrix on the CPU (axpy, scale, etc) */ 1869566063dSJacob Faibussowitsch PetscCall(MatViennaCLCopyToGPU(A)); 187bf1781e8SStefano Zampini if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) { 1889566063dSJacob Faibussowitsch PetscCall(VecViennaCLGetArrayRead(xx, &xgpu)); 1899566063dSJacob Faibussowitsch PetscCall(VecViennaCLGetArrayWrite(yy, &ygpu)); 1909566063dSJacob Faibussowitsch PetscCall(PetscLogGpuTimeBegin()); 191e4a0ef16SKarl Rupp try { 192b7832b47SStefano Zampini if (a->compressedrow.use) { 193b7832b47SStefano Zampini *ygpu = viennacl::linalg::prod(*viennaclstruct->compressed_mat, *xgpu); 194b7832b47SStefano Zampini } else { 195e4a0ef16SKarl Rupp *ygpu = viennacl::linalg::prod(*viennaclstruct->mat, *xgpu); 196b7832b47SStefano Zampini } 1974cf1874eSKarl Rupp ViennaCLWaitForGPU(); 198d71ae5a4SJacob Faibussowitsch } catch (std::exception const &ex) { 199d71ae5a4SJacob Faibussowitsch SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what()); 200d71ae5a4SJacob Faibussowitsch } 2019566063dSJacob Faibussowitsch PetscCall(PetscLogGpuTimeEnd()); 2029566063dSJacob Faibussowitsch PetscCall(VecViennaCLRestoreArrayRead(xx, &xgpu)); 2039566063dSJacob Faibussowitsch PetscCall(VecViennaCLRestoreArrayWrite(yy, &ygpu)); 2049566063dSJacob Faibussowitsch PetscCall(PetscLogGpuFlops(2.0 * a->nz - a->nonzerorowcnt)); 205bf1781e8SStefano Zampini } else { 2069566063dSJacob Faibussowitsch PetscCall(VecSet_SeqViennaCL(yy, 0)); 20767c87b7fSKarl Rupp } 2083ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 209e4a0ef16SKarl Rupp } 210e4a0ef16SKarl Rupp 211d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultAdd_SeqAIJViennaCL(Mat A, Vec xx, Vec yy, Vec zz) 212d71ae5a4SJacob Faibussowitsch { 213e4a0ef16SKarl Rupp Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 214e4a0ef16SKarl Rupp Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL *)A->spptr; 2150d73d530SKarl Rupp const ViennaCLVector *xgpu = NULL, *ygpu = NULL; 2160d73d530SKarl Rupp ViennaCLVector *zgpu = NULL; 217e4a0ef16SKarl Rupp 218e4a0ef16SKarl Rupp PetscFunctionBegin; 219837a59e1SRichard Tran Mills /* The line below is necessary due to the operations that modify the matrix on the CPU (axpy, scale, etc) */ 2209566063dSJacob Faibussowitsch PetscCall(MatViennaCLCopyToGPU(A)); 221bf1781e8SStefano Zampini if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) { 222e4a0ef16SKarl Rupp try { 2239566063dSJacob Faibussowitsch PetscCall(VecViennaCLGetArrayRead(xx, &xgpu)); 2249566063dSJacob Faibussowitsch PetscCall(VecViennaCLGetArrayRead(yy, &ygpu)); 2259566063dSJacob Faibussowitsch PetscCall(VecViennaCLGetArrayWrite(zz, &zgpu)); 2269566063dSJacob Faibussowitsch PetscCall(PetscLogGpuTimeBegin()); 227f38c1e66SStefano Zampini if (a->compressedrow.use) *viennaclstruct->tempvec = viennacl::linalg::prod(*viennaclstruct->compressed_mat, *xgpu); 228f38c1e66SStefano Zampini else *viennaclstruct->tempvec = viennacl::linalg::prod(*viennaclstruct->mat, *xgpu); 229f38c1e66SStefano Zampini if (zz != yy) *zgpu = *ygpu + *viennaclstruct->tempvec; 230f38c1e66SStefano Zampini else *zgpu += *viennaclstruct->tempvec; 2314cf1874eSKarl Rupp ViennaCLWaitForGPU(); 2329566063dSJacob Faibussowitsch PetscCall(PetscLogGpuTimeEnd()); 2339566063dSJacob Faibussowitsch PetscCall(VecViennaCLRestoreArrayRead(xx, &xgpu)); 2349566063dSJacob Faibussowitsch PetscCall(VecViennaCLRestoreArrayRead(yy, &ygpu)); 2359566063dSJacob Faibussowitsch PetscCall(VecViennaCLRestoreArrayWrite(zz, &zgpu)); 236e4a0ef16SKarl Rupp 237d71ae5a4SJacob Faibussowitsch } catch (std::exception const &ex) { 238d71ae5a4SJacob Faibussowitsch SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what()); 239d71ae5a4SJacob Faibussowitsch } 2409566063dSJacob Faibussowitsch PetscCall(PetscLogGpuFlops(2.0 * a->nz)); 241bf1781e8SStefano Zampini } else { 2429566063dSJacob Faibussowitsch PetscCall(VecCopy_SeqViennaCL(yy, zz)); 24367c87b7fSKarl Rupp } 2443ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 245e4a0ef16SKarl Rupp } 246e4a0ef16SKarl Rupp 247d71ae5a4SJacob Faibussowitsch PetscErrorCode MatAssemblyEnd_SeqAIJViennaCL(Mat A, MatAssemblyType mode) 248d71ae5a4SJacob Faibussowitsch { 249e4a0ef16SKarl Rupp PetscFunctionBegin; 2509566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd_SeqAIJ(A, mode)); 2513ba16761SJacob Faibussowitsch if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(PETSC_SUCCESS); 2529566063dSJacob Faibussowitsch if (!A->boundtocpu) PetscCall(MatViennaCLCopyToGPU(A)); 2533ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 254e4a0ef16SKarl Rupp } 255e4a0ef16SKarl Rupp 256e4a0ef16SKarl Rupp /* --------------------------------------------------------------------------------*/ 257cab5ea25SPierre Jolivet /*@C 25811a5261eSBarry Smith MatCreateSeqAIJViennaCL - Creates a sparse matrix in `MATSEQAIJVIENNACL` (compressed row) format 25919fddfadSKarl Rupp (the default parallel PETSc format). This matrix will ultimately be pushed down 260*20f4b53cSBarry Smith to GPUs and use the ViennaCL library for calculations. 261e4a0ef16SKarl Rupp 262d083f849SBarry Smith Collective 263e4a0ef16SKarl Rupp 264e4a0ef16SKarl Rupp Input Parameters: 26511a5261eSBarry Smith + comm - MPI communicator, set to `PETSC_COMM_SELF` 266e4a0ef16SKarl Rupp . m - number of rows 267e4a0ef16SKarl Rupp . n - number of columns 268*20f4b53cSBarry Smith . nz - number of nonzeros per row (same for all rows), ignored if `nnz` is set 269*20f4b53cSBarry Smith - nnz - array containing the number of nonzeros in the various rows (possibly different for each row) or `NULL` 270e4a0ef16SKarl Rupp 271e4a0ef16SKarl Rupp Output Parameter: 272e4a0ef16SKarl Rupp . A - the matrix 273e4a0ef16SKarl Rupp 27411a5261eSBarry Smith It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 275e4a0ef16SKarl Rupp MatXXXXSetPreallocation() paradigm instead of this routine directly. 27611a5261eSBarry Smith [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 277e4a0ef16SKarl Rupp 278e4a0ef16SKarl Rupp Notes: 27911a5261eSBarry Smith The AIJ format, also called 2802ef1f0ffSBarry Smith compressed row storage, is fully compatible with standard Fortran 281e4a0ef16SKarl Rupp storage. That is, the stored row and column indices can begin at 282*20f4b53cSBarry Smith either one (as in Fortran) or zero. 283e4a0ef16SKarl Rupp 284*20f4b53cSBarry Smith Specify the preallocated storage with either `nz` or `nnz` (not both). 285*20f4b53cSBarry Smith Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory 286*20f4b53cSBarry Smith allocation. 287e4a0ef16SKarl Rupp 288e4a0ef16SKarl Rupp Level: intermediate 289e4a0ef16SKarl Rupp 29011a5261eSBarry Smith .seealso: `MATSEQAIJVIENNACL`, `MatCreate()`, `MatCreateAIJ()`, `MatCreateAIJCUSPARSE()`, `MatSetValues()`, `MatSeqAIJSetColumnIndices()`, `MatCreateSeqAIJWithArrays()`, `MatCreateAIJ()` 291e4a0ef16SKarl Rupp @*/ 292d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateSeqAIJViennaCL(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A) 293d71ae5a4SJacob Faibussowitsch { 294e4a0ef16SKarl Rupp PetscFunctionBegin; 2959566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, A)); 2969566063dSJacob Faibussowitsch PetscCall(MatSetSizes(*A, m, n, m, n)); 2979566063dSJacob Faibussowitsch PetscCall(MatSetType(*A, MATSEQAIJVIENNACL)); 2989566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(*A, nz, (PetscInt *)nnz)); 2993ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 300e4a0ef16SKarl Rupp } 301e4a0ef16SKarl Rupp 302d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDestroy_SeqAIJViennaCL(Mat A) 303d71ae5a4SJacob Faibussowitsch { 304e4a0ef16SKarl Rupp Mat_SeqAIJViennaCL *viennaclcontainer = (Mat_SeqAIJViennaCL *)A->spptr; 305e4a0ef16SKarl Rupp 306e4a0ef16SKarl Rupp PetscFunctionBegin; 307e4a0ef16SKarl Rupp try { 3086447cd05SKarl Rupp if (viennaclcontainer) { 3096447cd05SKarl Rupp delete viennaclcontainer->tempvec; 3106447cd05SKarl Rupp delete viennaclcontainer->mat; 3116447cd05SKarl Rupp delete viennaclcontainer->compressed_mat; 312e4a0ef16SKarl Rupp delete viennaclcontainer; 3136447cd05SKarl Rupp } 314c70f7ee4SJunchao Zhang A->offloadmask = PETSC_OFFLOAD_UNALLOCATED; 315d71ae5a4SJacob Faibussowitsch } catch (std::exception const &ex) { 316d71ae5a4SJacob Faibussowitsch SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what()); 317d71ae5a4SJacob Faibussowitsch } 3188713a8baSPatrick Sanan 3199566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqaij_seqaijviennacl_C", NULL)); 3209566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaijviennacl_seqdense_C", NULL)); 3219566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaijviennacl_seqaij_C", NULL)); 3228713a8baSPatrick Sanan 323e4a0ef16SKarl Rupp /* this next line is because MatDestroy tries to PetscFree spptr if it is not zero, and PetscFree only works if the memory was allocated with PetscNew or PetscMalloc, which don't call the constructor */ 324e4a0ef16SKarl Rupp A->spptr = 0; 3259566063dSJacob Faibussowitsch PetscCall(MatDestroy_SeqAIJ(A)); 3263ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 327e4a0ef16SKarl Rupp } 328e4a0ef16SKarl Rupp 329d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJViennaCL(Mat B) 330d71ae5a4SJacob Faibussowitsch { 331e4a0ef16SKarl Rupp PetscFunctionBegin; 3329566063dSJacob Faibussowitsch PetscCall(MatCreate_SeqAIJ(B)); 3339566063dSJacob Faibussowitsch PetscCall(MatConvert_SeqAIJ_SeqAIJViennaCL(B, MATSEQAIJVIENNACL, MAT_INPLACE_MATRIX, &B)); 3343ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 3358713a8baSPatrick Sanan } 3368713a8baSPatrick Sanan 337b470e4b4SRichard Tran Mills static PetscErrorCode MatBindToCPU_SeqAIJViennaCL(Mat, PetscBool); 338d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatDuplicate_SeqAIJViennaCL(Mat A, MatDuplicateOption cpvalues, Mat *B) 339d71ae5a4SJacob Faibussowitsch { 340c3cca76eSKarl Rupp Mat C; 341c3cca76eSKarl Rupp 342c3cca76eSKarl Rupp PetscFunctionBegin; 3439566063dSJacob Faibussowitsch PetscCall(MatDuplicate_SeqAIJ(A, cpvalues, B)); 344c3cca76eSKarl Rupp C = *B; 345c3cca76eSKarl Rupp 3469566063dSJacob Faibussowitsch PetscCall(MatBindToCPU_SeqAIJViennaCL(A, PETSC_FALSE)); 34792f9df4aSRichard Tran Mills C->ops->bindtocpu = MatBindToCPU_SeqAIJViennaCL; 348c3cca76eSKarl Rupp 349c3cca76eSKarl Rupp C->spptr = new Mat_SeqAIJViennaCL(); 350c3cca76eSKarl Rupp ((Mat_SeqAIJViennaCL *)C->spptr)->tempvec = NULL; 351c3cca76eSKarl Rupp ((Mat_SeqAIJViennaCL *)C->spptr)->mat = NULL; 352c3cca76eSKarl Rupp ((Mat_SeqAIJViennaCL *)C->spptr)->compressed_mat = NULL; 353c3cca76eSKarl Rupp 3549566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)C, MATSEQAIJVIENNACL)); 355c3cca76eSKarl Rupp 356c70f7ee4SJunchao Zhang C->offloadmask = PETSC_OFFLOAD_UNALLOCATED; 357c3cca76eSKarl Rupp 358c3cca76eSKarl Rupp /* If the source matrix is already assembled, copy the destination matrix to the GPU */ 3599566063dSJacob Faibussowitsch if (C->assembled) PetscCall(MatViennaCLCopyToGPU(C)); 3603ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 361c3cca76eSKarl Rupp } 362c3cca76eSKarl Rupp 363d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatSeqAIJGetArray_SeqAIJViennaCL(Mat A, PetscScalar *array[]) 364d71ae5a4SJacob Faibussowitsch { 365f38c1e66SStefano Zampini PetscFunctionBegin; 3669566063dSJacob Faibussowitsch PetscCall(MatViennaCLCopyFromGPU(A, (const ViennaCLAIJMatrix *)NULL)); 367c1a4f3baSJunchao Zhang *array = ((Mat_SeqAIJ *)A->data)->a; 3683ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 369f38c1e66SStefano Zampini } 370f38c1e66SStefano Zampini 371d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatSeqAIJRestoreArray_SeqAIJViennaCL(Mat A, PetscScalar *array[]) 372d71ae5a4SJacob Faibussowitsch { 373f38c1e66SStefano Zampini PetscFunctionBegin; 374c1a4f3baSJunchao Zhang A->offloadmask = PETSC_OFFLOAD_CPU; 375c1a4f3baSJunchao Zhang *array = NULL; 3763ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 377c1a4f3baSJunchao Zhang } 378c1a4f3baSJunchao Zhang 379d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatSeqAIJGetArrayRead_SeqAIJViennaCL(Mat A, const PetscScalar *array[]) 380d71ae5a4SJacob Faibussowitsch { 381c1a4f3baSJunchao Zhang PetscFunctionBegin; 3829566063dSJacob Faibussowitsch PetscCall(MatViennaCLCopyFromGPU(A, (const ViennaCLAIJMatrix *)NULL)); 383c1a4f3baSJunchao Zhang *array = ((Mat_SeqAIJ *)A->data)->a; 3843ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 385c1a4f3baSJunchao Zhang } 386c1a4f3baSJunchao Zhang 387d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatSeqAIJRestoreArrayRead_SeqAIJViennaCL(Mat A, const PetscScalar *array[]) 388d71ae5a4SJacob Faibussowitsch { 389c1a4f3baSJunchao Zhang PetscFunctionBegin; 390c1a4f3baSJunchao Zhang *array = NULL; 391c1a4f3baSJunchao Zhang /* No A->offloadmask = PETSC_OFFLOAD_CPU since if A->offloadmask was PETSC_OFFLOAD_BOTH, it is still BOTH */ 3923ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 393c1a4f3baSJunchao Zhang } 394c1a4f3baSJunchao Zhang 395d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatSeqAIJGetArrayWrite_SeqAIJViennaCL(Mat A, PetscScalar *array[]) 396d71ae5a4SJacob Faibussowitsch { 397c1a4f3baSJunchao Zhang PetscFunctionBegin; 398c1a4f3baSJunchao Zhang *array = ((Mat_SeqAIJ *)A->data)->a; 3993ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 400c1a4f3baSJunchao Zhang } 401c1a4f3baSJunchao Zhang 402d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatSeqAIJRestoreArrayWrite_SeqAIJViennaCL(Mat A, PetscScalar *array[]) 403d71ae5a4SJacob Faibussowitsch { 404c1a4f3baSJunchao Zhang PetscFunctionBegin; 405c1a4f3baSJunchao Zhang A->offloadmask = PETSC_OFFLOAD_CPU; 406f38c1e66SStefano Zampini *array = NULL; 4073ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 408f38c1e66SStefano Zampini } 409f38c1e66SStefano Zampini 410d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatBindToCPU_SeqAIJViennaCL(Mat A, PetscBool flg) 411d71ae5a4SJacob Faibussowitsch { 412c1a4f3baSJunchao Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 413f38c1e66SStefano Zampini 414e7e92044SBarry Smith PetscFunctionBegin; 415b470e4b4SRichard Tran Mills A->boundtocpu = flg; 416c1a4f3baSJunchao Zhang if (flg && a->inode.size) { 417c1a4f3baSJunchao Zhang a->inode.use = PETSC_TRUE; 418ea500dcfSRichard Tran Mills } else { 419c1a4f3baSJunchao Zhang a->inode.use = PETSC_FALSE; 420ea500dcfSRichard Tran Mills } 421e7e92044SBarry Smith if (flg) { 422f38c1e66SStefano Zampini /* make sure we have an up-to-date copy on the CPU */ 4239566063dSJacob Faibussowitsch PetscCall(MatViennaCLCopyFromGPU(A, (const ViennaCLAIJMatrix *)NULL)); 424e7e92044SBarry Smith A->ops->mult = MatMult_SeqAIJ; 425e7e92044SBarry Smith A->ops->multadd = MatMultAdd_SeqAIJ; 426e7e92044SBarry Smith A->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 427e7e92044SBarry Smith A->ops->duplicate = MatDuplicate_SeqAIJ; 4289566063dSJacob Faibussowitsch PetscCall(PetscMemzero(a->ops, sizeof(Mat_SeqAIJOps))); 429e7e92044SBarry Smith } else { 430e7e92044SBarry Smith A->ops->mult = MatMult_SeqAIJViennaCL; 431e7e92044SBarry Smith A->ops->multadd = MatMultAdd_SeqAIJViennaCL; 432e7e92044SBarry Smith A->ops->assemblyend = MatAssemblyEnd_SeqAIJViennaCL; 433e7e92044SBarry Smith A->ops->destroy = MatDestroy_SeqAIJViennaCL; 434e7e92044SBarry Smith A->ops->duplicate = MatDuplicate_SeqAIJViennaCL; 435c1a4f3baSJunchao Zhang 436c1a4f3baSJunchao Zhang a->ops->getarray = MatSeqAIJGetArray_SeqAIJViennaCL; 437c1a4f3baSJunchao Zhang a->ops->restorearray = MatSeqAIJRestoreArray_SeqAIJViennaCL; 438c1a4f3baSJunchao Zhang a->ops->getarrayread = MatSeqAIJGetArrayRead_SeqAIJViennaCL; 439c1a4f3baSJunchao Zhang a->ops->restorearrayread = MatSeqAIJRestoreArrayRead_SeqAIJViennaCL; 440c1a4f3baSJunchao Zhang a->ops->getarraywrite = MatSeqAIJGetArrayWrite_SeqAIJViennaCL; 441c1a4f3baSJunchao Zhang a->ops->restorearraywrite = MatSeqAIJRestoreArrayWrite_SeqAIJViennaCL; 442e7e92044SBarry Smith } 4433ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 444e7e92044SBarry Smith } 445e7e92044SBarry Smith 446d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJViennaCL(Mat A, MatType type, MatReuse reuse, Mat *newmat) 447d71ae5a4SJacob Faibussowitsch { 4488713a8baSPatrick Sanan Mat B; 4498713a8baSPatrick Sanan 4508713a8baSPatrick Sanan PetscFunctionBegin; 4515f80ce2aSJacob Faibussowitsch PetscCheck(reuse != MAT_REUSE_MATRIX, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MAT_REUSE_MATRIX is not supported. Consider using MAT_INPLACE_MATRIX instead"); 4528713a8baSPatrick Sanan 4539566063dSJacob Faibussowitsch if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, newmat)); 4548713a8baSPatrick Sanan 4558713a8baSPatrick Sanan B = *newmat; 4568713a8baSPatrick Sanan 457e4a0ef16SKarl Rupp B->spptr = new Mat_SeqAIJViennaCL(); 458e4a0ef16SKarl Rupp 459a3430c56SKarl Rupp ((Mat_SeqAIJViennaCL *)B->spptr)->tempvec = NULL; 460a3430c56SKarl Rupp ((Mat_SeqAIJViennaCL *)B->spptr)->mat = NULL; 461a3430c56SKarl Rupp ((Mat_SeqAIJViennaCL *)B->spptr)->compressed_mat = NULL; 462e4a0ef16SKarl Rupp 4639566063dSJacob Faibussowitsch PetscCall(MatBindToCPU_SeqAIJViennaCL(A, PETSC_FALSE)); 46492f9df4aSRichard Tran Mills A->ops->bindtocpu = MatBindToCPU_SeqAIJViennaCL; 465e4a0ef16SKarl Rupp 4669566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJVIENNACL)); 4679566063dSJacob Faibussowitsch PetscCall(PetscFree(B->defaultvectype)); 4689566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(VECVIENNACL, &B->defaultvectype)); 469e4a0ef16SKarl Rupp 4709566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqaij_seqaijviennacl_C", MatConvert_SeqAIJ_SeqAIJViennaCL)); 4719566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaijviennacl_seqdense_C", MatProductSetFromOptions_SeqAIJ_SeqDense)); 4729566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaijviennacl_seqaij_C", MatProductSetFromOptions_SeqAIJ)); 4738713a8baSPatrick Sanan 474c70f7ee4SJunchao Zhang B->offloadmask = PETSC_OFFLOAD_UNALLOCATED; 4758713a8baSPatrick Sanan 4768713a8baSPatrick Sanan /* If the source matrix is already assembled, copy the destination matrix to the GPU */ 4779566063dSJacob Faibussowitsch if (B->assembled) PetscCall(MatViennaCLCopyToGPU(B)); 4783ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 479e4a0ef16SKarl Rupp } 480e4a0ef16SKarl Rupp 4813ca39a21SBarry Smith /*MC 482e4a0ef16SKarl Rupp MATSEQAIJVIENNACL - MATAIJVIENNACL = "aijviennacl" = "seqaijviennacl" - A matrix type to be used for sparse matrices. 483e4a0ef16SKarl Rupp 484e4a0ef16SKarl Rupp A matrix type type whose data resides on GPUs. These matrices are in CSR format by 485e4a0ef16SKarl Rupp default. All matrix calculations are performed using the ViennaCL library. 486e4a0ef16SKarl Rupp 487e4a0ef16SKarl Rupp Options Database Keys: 48811a5261eSBarry Smith + -mat_type aijviennacl - sets the matrix type to `MATSEQAIJVIENNACL` during a call to `MatSetFromOptions() 48911a5261eSBarry Smith . -mat_viennacl_storage_format csr - sets the storage format of matrices for `MatMult()` during a call to `MatSetFromOptions()`. 49011a5261eSBarry Smith - -mat_viennacl_mult_storage_format csr - sets the storage format of matrices for `MatMult()` during a call to `MatSetFromOptions()`. 491e4a0ef16SKarl Rupp 492e4a0ef16SKarl Rupp Level: beginner 493e4a0ef16SKarl Rupp 494db781477SPatrick Sanan .seealso: `MatCreateSeqAIJViennaCL()`, `MATAIJVIENNACL`, `MatCreateAIJViennaCL()` 495e4a0ef16SKarl Rupp M*/ 496e4a0ef16SKarl Rupp 497d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ViennaCL(void) 498d71ae5a4SJacob Faibussowitsch { 49972367587SKarl Rupp PetscFunctionBegin; 5009566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_LU, MatGetFactor_seqaij_petsc)); 5019566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_CHOLESKY, MatGetFactor_seqaij_petsc)); 5029566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_ILU, MatGetFactor_seqaij_petsc)); 5039566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_ICC, MatGetFactor_seqaij_petsc)); 5043ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 50572367587SKarl Rupp } 506