xref: /petsc/src/mat/impls/aij/mpi/mpiviennacl/mpiaijviennacl.cxx (revision 11a5261e40035b7c793f2783a2ba6c7cd4f3b077)
199acd6aaSStefano Zampini #define PETSC_SKIP_IMMINTRIN_H_CUDAWORKAROUND 1
299acd6aaSStefano Zampini 
3aaa7dc30SBarry Smith #include <petscconf.h>
48f86e40fSKarl Rupp #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/
565e3cb35SKarl Rupp #include <../src/mat/impls/aij/seq/seqviennacl/viennaclmatimpl.h>
68f86e40fSKarl Rupp 
79371c9d4SSatish Balay PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJViennaCL(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) {
88f86e40fSKarl Rupp   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
98f86e40fSKarl Rupp 
108f86e40fSKarl Rupp   PetscFunctionBegin;
119566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(B->rmap));
129566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(B->cmap));
138f86e40fSKarl Rupp   if (!B->preallocated) {
148f86e40fSKarl Rupp     /* Explicitly create the two MATSEQAIJVIENNACL matrices. */
159566063dSJacob Faibussowitsch     PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
169566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
179566063dSJacob Faibussowitsch     PetscCall(MatSetType(b->A, MATSEQAIJVIENNACL));
189566063dSJacob Faibussowitsch     PetscCall(PetscLogObjectParent((PetscObject)B, (PetscObject)b->A));
199566063dSJacob Faibussowitsch     PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
209566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(b->B, B->rmap->n, B->cmap->N, B->rmap->n, B->cmap->N));
219566063dSJacob Faibussowitsch     PetscCall(MatSetType(b->B, MATSEQAIJVIENNACL));
229566063dSJacob Faibussowitsch     PetscCall(PetscLogObjectParent((PetscObject)B, (PetscObject)b->B));
238f86e40fSKarl Rupp   }
249566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz));
259566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz));
268f86e40fSKarl Rupp   B->preallocated = PETSC_TRUE;
278f86e40fSKarl Rupp   PetscFunctionReturn(0);
288f86e40fSKarl Rupp }
298f86e40fSKarl Rupp 
309371c9d4SSatish Balay PetscErrorCode MatAssemblyEnd_MPIAIJViennaCL(Mat A, MatAssemblyType mode) {
3174fd8ad3SBarry Smith   Mat_MPIAIJ *b = (Mat_MPIAIJ *)A->data;
3274fd8ad3SBarry Smith   PetscBool   v;
3374fd8ad3SBarry Smith 
3474fd8ad3SBarry Smith   PetscFunctionBegin;
359566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd_MPIAIJ(A, mode));
369566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)b->lvec, VECSEQVIENNACL, &v));
3774fd8ad3SBarry Smith   if (!v) {
3874fd8ad3SBarry Smith     PetscInt m;
399566063dSJacob Faibussowitsch     PetscCall(VecGetSize(b->lvec, &m));
409566063dSJacob Faibussowitsch     PetscCall(VecDestroy(&b->lvec));
419566063dSJacob Faibussowitsch     PetscCall(VecCreateSeqViennaCL(PETSC_COMM_SELF, m, &b->lvec));
4274fd8ad3SBarry Smith   }
4374fd8ad3SBarry Smith   PetscFunctionReturn(0);
4474fd8ad3SBarry Smith }
458f86e40fSKarl Rupp 
469371c9d4SSatish Balay PetscErrorCode MatDestroy_MPIAIJViennaCL(Mat A) {
478f86e40fSKarl Rupp   PetscFunctionBegin;
489566063dSJacob Faibussowitsch   PetscCall(MatDestroy_MPIAIJ(A));
498f86e40fSKarl Rupp   PetscFunctionReturn(0);
508f86e40fSKarl Rupp }
518f86e40fSKarl Rupp 
529371c9d4SSatish Balay PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJViennaCL(Mat A) {
538f86e40fSKarl Rupp   PetscFunctionBegin;
549566063dSJacob Faibussowitsch   PetscCall(MatCreate_MPIAIJ(A));
556f3d89d0SStefano Zampini   A->boundtocpu = PETSC_FALSE;
569566063dSJacob Faibussowitsch   PetscCall(PetscFree(A->defaultvectype));
579566063dSJacob Faibussowitsch   PetscCall(PetscStrallocpy(VECVIENNACL, &A->defaultvectype));
589566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJViennaCL));
5974fd8ad3SBarry Smith   A->ops->assemblyend = MatAssemblyEnd_MPIAIJViennaCL;
609566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATMPIAIJVIENNACL));
618f86e40fSKarl Rupp   PetscFunctionReturn(0);
628f86e40fSKarl Rupp }
638f86e40fSKarl Rupp 
64cab5ea25SPierre Jolivet /*@C
65*11a5261eSBarry Smith    MatCreateAIJViennaCL - Creates a sparse matrix in `MATAIJ` (compressed row) format
66023073b3SKarl Rupp    (the default parallel PETSc format).  This matrix will ultimately be pushed down
678f86e40fSKarl Rupp    to GPUs and use the ViennaCL library for calculations. For good matrix
688f86e40fSKarl Rupp    assembly performance the user should preallocate the matrix storage by setting
698f86e40fSKarl Rupp    the parameter nz (or the array nnz).  By setting these parameters accurately,
708f86e40fSKarl Rupp    performance during matrix assembly can be increased substantially.
718f86e40fSKarl Rupp 
72d083f849SBarry Smith    Collective
738f86e40fSKarl Rupp 
748f86e40fSKarl Rupp    Input Parameters:
75*11a5261eSBarry Smith +  comm - MPI communicator, set to `PETSC_COMM_SELF`
768f86e40fSKarl Rupp .  m - number of rows
778f86e40fSKarl Rupp .  n - number of columns
788f86e40fSKarl Rupp .  nz - number of nonzeros per row (same for all rows)
798f86e40fSKarl Rupp -  nnz - array containing the number of nonzeros in the various rows
808f86e40fSKarl Rupp          (possibly different for each row) or NULL
818f86e40fSKarl Rupp 
828f86e40fSKarl Rupp    Output Parameter:
838f86e40fSKarl Rupp .  A - the matrix
848f86e40fSKarl Rupp 
85*11a5261eSBarry Smith    It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
868f86e40fSKarl Rupp    MatXXXXSetPreallocation() paradigm instead of this routine directly.
87*11a5261eSBarry Smith    [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
888f86e40fSKarl Rupp 
898f86e40fSKarl Rupp    Notes:
908f86e40fSKarl Rupp    If nnz is given then nz is ignored
918f86e40fSKarl Rupp 
92*11a5261eSBarry Smith    The AIJ format, also called
938f86e40fSKarl Rupp    compressed row storage), is fully compatible with standard Fortran 77
948f86e40fSKarl Rupp    storage.  That is, the stored row and column indices can begin at
958f86e40fSKarl Rupp    either one (as in Fortran) or zero.  See the users' manual for details.
968f86e40fSKarl Rupp 
978f86e40fSKarl Rupp    Specify the preallocated storage with either nz or nnz (not both).
98*11a5261eSBarry Smith    Set nz = `PETSC_DEFAULT` and nnz = NULL for PETSc to control dynamic memory
998f86e40fSKarl Rupp    allocation.  For large problems you MUST preallocate memory or you
1008f86e40fSKarl Rupp    will get TERRIBLE performance, see the users' manual chapter on matrices.
1018f86e40fSKarl Rupp 
1028f86e40fSKarl Rupp    Level: intermediate
1038f86e40fSKarl Rupp 
104db781477SPatrick Sanan .seealso: `MatCreate()`, `MatCreateAIJ()`, `MatCreateAIJCUSPARSE()`, `MatSetValues()`, `MatSeqAIJSetColumnIndices()`, `MatCreateSeqAIJWithArrays()`, `MatCreateAIJ()`, `MATMPIAIJVIENNACL`, `MATAIJVIENNACL`
1058f86e40fSKarl Rupp @*/
1069371c9d4SSatish Balay PetscErrorCode MatCreateAIJViennaCL(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) {
1078f86e40fSKarl Rupp   PetscMPIInt size;
1088f86e40fSKarl Rupp 
1098f86e40fSKarl Rupp   PetscFunctionBegin;
1109566063dSJacob Faibussowitsch   PetscCall(MatCreate(comm, A));
1119566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(*A, m, n, M, N));
1129566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
1138f86e40fSKarl Rupp   if (size > 1) {
1149566063dSJacob Faibussowitsch     PetscCall(MatSetType(*A, MATMPIAIJVIENNACL));
1159566063dSJacob Faibussowitsch     PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz));
1168f86e40fSKarl Rupp   } else {
1179566063dSJacob Faibussowitsch     PetscCall(MatSetType(*A, MATSEQAIJVIENNACL));
1189566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz));
1198f86e40fSKarl Rupp   }
1208f86e40fSKarl Rupp   PetscFunctionReturn(0);
1218f86e40fSKarl Rupp }
1228f86e40fSKarl Rupp 
1233ca39a21SBarry Smith /*MC
1248f86e40fSKarl Rupp    MATAIJVIENNACL - MATMPIAIJVIENNACL= "aijviennacl" = "mpiaijviennacl" - A matrix type to be used for sparse matrices.
1258f86e40fSKarl Rupp 
1268f86e40fSKarl Rupp    A matrix type (CSR format) whose data resides on GPUs.
1278f86e40fSKarl Rupp    All matrix calculations are performed using the ViennaCL library.
1288f86e40fSKarl Rupp 
129*11a5261eSBarry Smith    This matrix type is identical to `MATSEQAIJVIENNACL` when constructed with a single process communicator,
130*11a5261eSBarry Smith    and `MATMPIAIJVIENNACL` otherwise.  As a result, for single process communicators,
131*11a5261eSBarry Smith    `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
1328f86e40fSKarl Rupp    for communicators controlling multiple processes.  It is recommended that you call both of
1338f86e40fSKarl Rupp    the above preallocation routines for simplicity.
1348f86e40fSKarl Rupp 
1358f86e40fSKarl Rupp    Options Database Keys:
136*11a5261eSBarry Smith .  -mat_type mpiaijviennacl - sets the matrix type to `MATAIJVIENNACL` during a call to `MatSetFromOptions()`
1378f86e40fSKarl Rupp 
1388f86e40fSKarl Rupp   Level: beginner
1398f86e40fSKarl Rupp 
140db781477SPatrick Sanan  .seealso: `MatCreateAIJViennaCL()`, `MATSEQAIJVIENNACL`, `MatCreateSeqAIJVIENNACL()`
1418f86e40fSKarl Rupp M*/
142