199acd6aaSStefano Zampini #define PETSC_SKIP_IMMINTRIN_H_CUDAWORKAROUND 1 299acd6aaSStefano Zampini 3aaa7dc30SBarry Smith #include <petscconf.h> 48f86e40fSKarl Rupp #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 565e3cb35SKarl Rupp #include <../src/mat/impls/aij/seq/seqviennacl/viennaclmatimpl.h> 68f86e40fSKarl Rupp 78f86e40fSKarl Rupp PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJViennaCL(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 88f86e40fSKarl Rupp { 98f86e40fSKarl Rupp Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 108f86e40fSKarl Rupp 118f86e40fSKarl Rupp PetscFunctionBegin; 12*9566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(B->rmap)); 13*9566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(B->cmap)); 148f86e40fSKarl Rupp if (!B->preallocated) { 158f86e40fSKarl Rupp /* Explicitly create the two MATSEQAIJVIENNACL matrices. */ 16*9566063dSJacob Faibussowitsch PetscCall(MatCreate(PETSC_COMM_SELF,&b->A)); 17*9566063dSJacob Faibussowitsch PetscCall(MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n)); 18*9566063dSJacob Faibussowitsch PetscCall(MatSetType(b->A,MATSEQAIJVIENNACL)); 19*9566063dSJacob Faibussowitsch PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->A)); 20*9566063dSJacob Faibussowitsch PetscCall(MatCreate(PETSC_COMM_SELF,&b->B)); 21*9566063dSJacob Faibussowitsch PetscCall(MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N)); 22*9566063dSJacob Faibussowitsch PetscCall(MatSetType(b->B,MATSEQAIJVIENNACL)); 23*9566063dSJacob Faibussowitsch PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->B)); 248f86e40fSKarl Rupp } 25*9566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz)); 26*9566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz)); 278f86e40fSKarl Rupp B->preallocated = PETSC_TRUE; 288f86e40fSKarl Rupp PetscFunctionReturn(0); 298f86e40fSKarl Rupp } 308f86e40fSKarl Rupp 3174fd8ad3SBarry Smith PetscErrorCode MatAssemblyEnd_MPIAIJViennaCL(Mat A,MatAssemblyType mode) 3274fd8ad3SBarry Smith { 3374fd8ad3SBarry Smith Mat_MPIAIJ *b = (Mat_MPIAIJ*)A->data; 3474fd8ad3SBarry Smith PetscBool v; 3574fd8ad3SBarry Smith 3674fd8ad3SBarry Smith PetscFunctionBegin; 37*9566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd_MPIAIJ(A,mode)); 38*9566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)b->lvec,VECSEQVIENNACL,&v)); 3974fd8ad3SBarry Smith if (!v) { 4074fd8ad3SBarry Smith PetscInt m; 41*9566063dSJacob Faibussowitsch PetscCall(VecGetSize(b->lvec,&m)); 42*9566063dSJacob Faibussowitsch PetscCall(VecDestroy(&b->lvec)); 43*9566063dSJacob Faibussowitsch PetscCall(VecCreateSeqViennaCL(PETSC_COMM_SELF,m,&b->lvec)); 4474fd8ad3SBarry Smith } 4574fd8ad3SBarry Smith PetscFunctionReturn(0); 4674fd8ad3SBarry Smith } 478f86e40fSKarl Rupp 488f86e40fSKarl Rupp PetscErrorCode MatDestroy_MPIAIJViennaCL(Mat A) 498f86e40fSKarl Rupp { 508f86e40fSKarl Rupp PetscFunctionBegin; 51*9566063dSJacob Faibussowitsch PetscCall(MatDestroy_MPIAIJ(A)); 528f86e40fSKarl Rupp PetscFunctionReturn(0); 538f86e40fSKarl Rupp } 548f86e40fSKarl Rupp 558f86e40fSKarl Rupp PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJViennaCL(Mat A) 568f86e40fSKarl Rupp { 578f86e40fSKarl Rupp PetscFunctionBegin; 58*9566063dSJacob Faibussowitsch PetscCall(MatCreate_MPIAIJ(A)); 596f3d89d0SStefano Zampini A->boundtocpu = PETSC_FALSE; 60*9566063dSJacob Faibussowitsch PetscCall(PetscFree(A->defaultvectype)); 61*9566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(VECVIENNACL,&A->defaultvectype)); 62*9566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJViennaCL)); 6374fd8ad3SBarry Smith A->ops->assemblyend = MatAssemblyEnd_MPIAIJViennaCL; 64*9566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)A,MATMPIAIJVIENNACL)); 658f86e40fSKarl Rupp PetscFunctionReturn(0); 668f86e40fSKarl Rupp } 678f86e40fSKarl Rupp 68cab5ea25SPierre Jolivet /*@C 698f86e40fSKarl Rupp MatCreateAIJViennaCL - Creates a sparse matrix in AIJ (compressed row) format 70023073b3SKarl Rupp (the default parallel PETSc format). This matrix will ultimately be pushed down 718f86e40fSKarl Rupp to GPUs and use the ViennaCL library for calculations. For good matrix 728f86e40fSKarl Rupp assembly performance the user should preallocate the matrix storage by setting 738f86e40fSKarl Rupp the parameter nz (or the array nnz). By setting these parameters accurately, 748f86e40fSKarl Rupp performance during matrix assembly can be increased substantially. 758f86e40fSKarl Rupp 76d083f849SBarry Smith Collective 778f86e40fSKarl Rupp 788f86e40fSKarl Rupp Input Parameters: 798f86e40fSKarl Rupp + comm - MPI communicator, set to PETSC_COMM_SELF 808f86e40fSKarl Rupp . m - number of rows 818f86e40fSKarl Rupp . n - number of columns 828f86e40fSKarl Rupp . nz - number of nonzeros per row (same for all rows) 838f86e40fSKarl Rupp - nnz - array containing the number of nonzeros in the various rows 848f86e40fSKarl Rupp (possibly different for each row) or NULL 858f86e40fSKarl Rupp 868f86e40fSKarl Rupp Output Parameter: 878f86e40fSKarl Rupp . A - the matrix 888f86e40fSKarl Rupp 898f86e40fSKarl Rupp It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 908f86e40fSKarl Rupp MatXXXXSetPreallocation() paradigm instead of this routine directly. 918f86e40fSKarl Rupp [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 928f86e40fSKarl Rupp 938f86e40fSKarl Rupp Notes: 948f86e40fSKarl Rupp If nnz is given then nz is ignored 958f86e40fSKarl Rupp 968f86e40fSKarl Rupp The AIJ format (also called the Yale sparse matrix format or 978f86e40fSKarl Rupp compressed row storage), is fully compatible with standard Fortran 77 988f86e40fSKarl Rupp storage. That is, the stored row and column indices can begin at 998f86e40fSKarl Rupp either one (as in Fortran) or zero. See the users' manual for details. 1008f86e40fSKarl Rupp 1018f86e40fSKarl Rupp Specify the preallocated storage with either nz or nnz (not both). 1028f86e40fSKarl Rupp Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 1038f86e40fSKarl Rupp allocation. For large problems you MUST preallocate memory or you 1048f86e40fSKarl Rupp will get TERRIBLE performance, see the users' manual chapter on matrices. 1058f86e40fSKarl Rupp 1068f86e40fSKarl Rupp Level: intermediate 1078f86e40fSKarl Rupp 108e9e886b6SKarl Rupp .seealso: MatCreate(), MatCreateAIJ(), MatCreateAIJCUSPARSE(), MatSetValues(), MatSeqAIJSetColumnIndices(), MatCreateSeqAIJWithArrays(), MatCreateAIJ(), MATMPIAIJVIENNACL, MATAIJVIENNACL 1098f86e40fSKarl Rupp @*/ 1108f86e40fSKarl Rupp PetscErrorCode MatCreateAIJViennaCL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 1118f86e40fSKarl Rupp { 1128f86e40fSKarl Rupp PetscMPIInt size; 1138f86e40fSKarl Rupp 1148f86e40fSKarl Rupp PetscFunctionBegin; 115*9566063dSJacob Faibussowitsch PetscCall(MatCreate(comm,A)); 116*9566063dSJacob Faibussowitsch PetscCall(MatSetSizes(*A,m,n,M,N)); 117*9566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm,&size)); 1188f86e40fSKarl Rupp if (size > 1) { 119*9566063dSJacob Faibussowitsch PetscCall(MatSetType(*A,MATMPIAIJVIENNACL)); 120*9566063dSJacob Faibussowitsch PetscCall(MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz)); 1218f86e40fSKarl Rupp } else { 122*9566063dSJacob Faibussowitsch PetscCall(MatSetType(*A,MATSEQAIJVIENNACL)); 123*9566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation(*A,d_nz,d_nnz)); 1248f86e40fSKarl Rupp } 1258f86e40fSKarl Rupp PetscFunctionReturn(0); 1268f86e40fSKarl Rupp } 1278f86e40fSKarl Rupp 1283ca39a21SBarry Smith /*MC 1298f86e40fSKarl Rupp MATAIJVIENNACL - MATMPIAIJVIENNACL= "aijviennacl" = "mpiaijviennacl" - A matrix type to be used for sparse matrices. 1308f86e40fSKarl Rupp 1318f86e40fSKarl Rupp A matrix type (CSR format) whose data resides on GPUs. 1328f86e40fSKarl Rupp All matrix calculations are performed using the ViennaCL library. 1338f86e40fSKarl Rupp 1348f86e40fSKarl Rupp This matrix type is identical to MATSEQAIJVIENNACL when constructed with a single process communicator, 1358f86e40fSKarl Rupp and MATMPIAIJVIENNACL otherwise. As a result, for single process communicators, 1368f86e40fSKarl Rupp MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 1378f86e40fSKarl Rupp for communicators controlling multiple processes. It is recommended that you call both of 1388f86e40fSKarl Rupp the above preallocation routines for simplicity. 1398f86e40fSKarl Rupp 1408f86e40fSKarl Rupp Options Database Keys: 141a2b725a8SWilliam Gropp . -mat_type mpiaijviennacl - sets the matrix type to "mpiaijviennacl" during a call to MatSetFromOptions() 1428f86e40fSKarl Rupp 1438f86e40fSKarl Rupp Level: beginner 1448f86e40fSKarl Rupp 1458f86e40fSKarl Rupp .seealso: MatCreateAIJViennaCL(), MATSEQAIJVIENNACL, MatCreateSeqAIJVIENNACL() 1468f86e40fSKarl Rupp M*/ 147