199acd6aaSStefano Zampini #define PETSC_SKIP_IMMINTRIN_H_CUDAWORKAROUND 1 299acd6aaSStefano Zampini 3aaa7dc30SBarry Smith #include <petscconf.h> 48f86e40fSKarl Rupp #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 565e3cb35SKarl Rupp #include <../src/mat/impls/aij/seq/seqviennacl/viennaclmatimpl.h> 68f86e40fSKarl Rupp 7*ba38deedSJacob Faibussowitsch static PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJViennaCL(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 8d71ae5a4SJacob Faibussowitsch { 98f86e40fSKarl Rupp Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 108f86e40fSKarl Rupp 118f86e40fSKarl Rupp PetscFunctionBegin; 129566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(B->rmap)); 139566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(B->cmap)); 148f86e40fSKarl Rupp if (!B->preallocated) { 158f86e40fSKarl Rupp /* Explicitly create the two MATSEQAIJVIENNACL matrices. */ 169566063dSJacob Faibussowitsch PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 179566063dSJacob Faibussowitsch PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 189566063dSJacob Faibussowitsch PetscCall(MatSetType(b->A, MATSEQAIJVIENNACL)); 199566063dSJacob Faibussowitsch PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 209566063dSJacob Faibussowitsch PetscCall(MatSetSizes(b->B, B->rmap->n, B->cmap->N, B->rmap->n, B->cmap->N)); 219566063dSJacob Faibussowitsch PetscCall(MatSetType(b->B, MATSEQAIJVIENNACL)); 228f86e40fSKarl Rupp } 239566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz)); 249566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz)); 258f86e40fSKarl Rupp B->preallocated = PETSC_TRUE; 263ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 278f86e40fSKarl Rupp } 288f86e40fSKarl Rupp 29*ba38deedSJacob Faibussowitsch static PetscErrorCode MatAssemblyEnd_MPIAIJViennaCL(Mat A, MatAssemblyType mode) 30d71ae5a4SJacob Faibussowitsch { 3174fd8ad3SBarry Smith Mat_MPIAIJ *b = (Mat_MPIAIJ *)A->data; 3274fd8ad3SBarry Smith PetscBool v; 3374fd8ad3SBarry Smith 3474fd8ad3SBarry Smith PetscFunctionBegin; 359566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd_MPIAIJ(A, mode)); 369566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)b->lvec, VECSEQVIENNACL, &v)); 3774fd8ad3SBarry Smith if (!v) { 3874fd8ad3SBarry Smith PetscInt m; 399566063dSJacob Faibussowitsch PetscCall(VecGetSize(b->lvec, &m)); 409566063dSJacob Faibussowitsch PetscCall(VecDestroy(&b->lvec)); 419566063dSJacob Faibussowitsch PetscCall(VecCreateSeqViennaCL(PETSC_COMM_SELF, m, &b->lvec)); 4274fd8ad3SBarry Smith } 433ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 4474fd8ad3SBarry Smith } 458f86e40fSKarl Rupp 46d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJViennaCL(Mat A) 47d71ae5a4SJacob Faibussowitsch { 488f86e40fSKarl Rupp PetscFunctionBegin; 499566063dSJacob Faibussowitsch PetscCall(MatCreate_MPIAIJ(A)); 506f3d89d0SStefano Zampini A->boundtocpu = PETSC_FALSE; 519566063dSJacob Faibussowitsch PetscCall(PetscFree(A->defaultvectype)); 529566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(VECVIENNACL, &A->defaultvectype)); 539566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJViennaCL)); 5474fd8ad3SBarry Smith A->ops->assemblyend = MatAssemblyEnd_MPIAIJViennaCL; 559566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATMPIAIJVIENNACL)); 563ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 578f86e40fSKarl Rupp } 588f86e40fSKarl Rupp 59cab5ea25SPierre Jolivet /*@C 6011a5261eSBarry Smith MatCreateAIJViennaCL - Creates a sparse matrix in `MATAIJ` (compressed row) format 61023073b3SKarl Rupp (the default parallel PETSc format). This matrix will ultimately be pushed down 6220f4b53cSBarry Smith to GPUs and use the ViennaCL library for calculations. 638f86e40fSKarl Rupp 64d083f849SBarry Smith Collective 658f86e40fSKarl Rupp 668f86e40fSKarl Rupp Input Parameters: 6711a5261eSBarry Smith + comm - MPI communicator, set to `PETSC_COMM_SELF` 6820f4b53cSBarry Smith . m - number of rows, or `PETSC_DECIDE` if `M` is provided 6920f4b53cSBarry Smith . n - number of columns, or `PETSC_DECIDE` if `N` is provided 7020f4b53cSBarry Smith . M - number of global rows in the matrix, or `PETSC_DETERMINE` if `m` is provided 7120f4b53cSBarry Smith . N - number of global columns in the matrix, or `PETSC_DETERMINE` if `n` is provided 7220f4b53cSBarry Smith . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 7320f4b53cSBarry Smith (same value is used for all local rows) 7420f4b53cSBarry Smith . d_nnz - array containing the number of nonzeros in the various rows of the 7520f4b53cSBarry Smith DIAGONAL portion of the local submatrix (possibly different for each row) 7620f4b53cSBarry Smith or `NULL`, if `d_nz` is used to specify the nonzero structure. 7720f4b53cSBarry Smith The size of this array is equal to the number of local rows, i.e `m`. 7820f4b53cSBarry Smith For matrices you plan to factor you must leave room for the diagonal entry and 7920f4b53cSBarry Smith put in the entry even if it is zero. 8020f4b53cSBarry Smith . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 8120f4b53cSBarry Smith submatrix (same value is used for all local rows). 8220f4b53cSBarry Smith - o_nnz - array containing the number of nonzeros in the various rows of the 8320f4b53cSBarry Smith OFF-DIAGONAL portion of the local submatrix (possibly different for 8420f4b53cSBarry Smith each row) or `NULL`, if `o_nz` is used to specify the nonzero 8520f4b53cSBarry Smith structure. The size of this array is equal to the number 8620f4b53cSBarry Smith of local rows, i.e `m`. 878f86e40fSKarl Rupp 888f86e40fSKarl Rupp Output Parameter: 898f86e40fSKarl Rupp . A - the matrix 908f86e40fSKarl Rupp 9120f4b53cSBarry Smith Level: intermediate 9220f4b53cSBarry Smith 9320f4b53cSBarry Smith Notes: 9411a5261eSBarry Smith It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 958f86e40fSKarl Rupp MatXXXXSetPreallocation() paradigm instead of this routine directly. 9611a5261eSBarry Smith [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 978f86e40fSKarl Rupp 9811a5261eSBarry Smith The AIJ format, also called 992ef1f0ffSBarry Smith compressed row storage), is fully compatible with standard Fortran 1008f86e40fSKarl Rupp storage. That is, the stored row and column indices can begin at 10120f4b53cSBarry Smith either one (as in Fortran) or zero. 1028f86e40fSKarl Rupp 10320f4b53cSBarry Smith .seealso: `Mat`, `MatCreate()`, `MatCreateAIJ()`, `MatCreateAIJCUSPARSE()`, `MatSetValues()`, `MatSeqAIJSetColumnIndices()`, `MatCreateSeqAIJWithArrays()`, 104fe59aa6dSJacob Faibussowitsch `MATMPIAIJVIENNACL`, `MATAIJVIENNACL` 1058f86e40fSKarl Rupp @*/ 106d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateAIJViennaCL(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 107d71ae5a4SJacob Faibussowitsch { 1088f86e40fSKarl Rupp PetscMPIInt size; 1098f86e40fSKarl Rupp 1108f86e40fSKarl Rupp PetscFunctionBegin; 1119566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, A)); 1129566063dSJacob Faibussowitsch PetscCall(MatSetSizes(*A, m, n, M, N)); 1139566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm, &size)); 1148f86e40fSKarl Rupp if (size > 1) { 1159566063dSJacob Faibussowitsch PetscCall(MatSetType(*A, MATMPIAIJVIENNACL)); 1169566063dSJacob Faibussowitsch PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz)); 1178f86e40fSKarl Rupp } else { 1189566063dSJacob Faibussowitsch PetscCall(MatSetType(*A, MATSEQAIJVIENNACL)); 1199566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz)); 1208f86e40fSKarl Rupp } 1213ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1228f86e40fSKarl Rupp } 1238f86e40fSKarl Rupp 1243ca39a21SBarry Smith /*MC 1258f86e40fSKarl Rupp MATAIJVIENNACL - MATMPIAIJVIENNACL= "aijviennacl" = "mpiaijviennacl" - A matrix type to be used for sparse matrices. 1268f86e40fSKarl Rupp 1278f86e40fSKarl Rupp A matrix type (CSR format) whose data resides on GPUs. 1288f86e40fSKarl Rupp All matrix calculations are performed using the ViennaCL library. 1298f86e40fSKarl Rupp 13011a5261eSBarry Smith This matrix type is identical to `MATSEQAIJVIENNACL` when constructed with a single process communicator, 13111a5261eSBarry Smith and `MATMPIAIJVIENNACL` otherwise. As a result, for single process communicators, 13211a5261eSBarry Smith `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 1338f86e40fSKarl Rupp for communicators controlling multiple processes. It is recommended that you call both of 1348f86e40fSKarl Rupp the above preallocation routines for simplicity. 1358f86e40fSKarl Rupp 1368f86e40fSKarl Rupp Options Database Keys: 13711a5261eSBarry Smith . -mat_type mpiaijviennacl - sets the matrix type to `MATAIJVIENNACL` during a call to `MatSetFromOptions()` 1388f86e40fSKarl Rupp 1398f86e40fSKarl Rupp Level: beginner 1408f86e40fSKarl Rupp 14120f4b53cSBarry Smith .seealso: `Mat`, `MatType`, `MatCreateAIJViennaCL()`, `MATSEQAIJVIENNACL`, `MatCreateSeqAIJVIENNACL()` 1428f86e40fSKarl Rupp M*/ 143