1*8f86e40fSKarl Rupp #include "petscconf.h" 2*8f86e40fSKarl Rupp #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3*8f86e40fSKarl Rupp 4*8f86e40fSKarl Rupp #undef __FUNCT__ 5*8f86e40fSKarl Rupp #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJViennaCL" 6*8f86e40fSKarl Rupp PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJViennaCL(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 7*8f86e40fSKarl Rupp { 8*8f86e40fSKarl Rupp Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 9*8f86e40fSKarl Rupp PetscErrorCode ierr; 10*8f86e40fSKarl Rupp PetscInt i; 11*8f86e40fSKarl Rupp 12*8f86e40fSKarl Rupp PetscFunctionBegin; 13*8f86e40fSKarl Rupp if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 14*8f86e40fSKarl Rupp if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 15*8f86e40fSKarl Rupp if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 16*8f86e40fSKarl Rupp if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 17*8f86e40fSKarl Rupp 18*8f86e40fSKarl Rupp ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 19*8f86e40fSKarl Rupp ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 20*8f86e40fSKarl Rupp if (d_nnz) { 21*8f86e40fSKarl Rupp for (i=0; i<B->rmap->n; i++) { 22*8f86e40fSKarl Rupp if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 23*8f86e40fSKarl Rupp } 24*8f86e40fSKarl Rupp } 25*8f86e40fSKarl Rupp if (o_nnz) { 26*8f86e40fSKarl Rupp for (i=0; i<B->rmap->n; i++) { 27*8f86e40fSKarl Rupp if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 28*8f86e40fSKarl Rupp } 29*8f86e40fSKarl Rupp } 30*8f86e40fSKarl Rupp if (!B->preallocated) { 31*8f86e40fSKarl Rupp /* Explicitly create the two MATSEQAIJVIENNACL matrices. */ 32*8f86e40fSKarl Rupp ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 33*8f86e40fSKarl Rupp ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 34*8f86e40fSKarl Rupp ierr = MatSetType(b->A,MATSEQAIJVIENNACL);CHKERRQ(ierr); 35*8f86e40fSKarl Rupp ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 36*8f86e40fSKarl Rupp ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 37*8f86e40fSKarl Rupp ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 38*8f86e40fSKarl Rupp ierr = MatSetType(b->B,MATSEQAIJVIENNACL);CHKERRQ(ierr); 39*8f86e40fSKarl Rupp ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 40*8f86e40fSKarl Rupp } 41*8f86e40fSKarl Rupp ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 42*8f86e40fSKarl Rupp ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 43*8f86e40fSKarl Rupp B->preallocated = PETSC_TRUE; 44*8f86e40fSKarl Rupp PetscFunctionReturn(0); 45*8f86e40fSKarl Rupp } 46*8f86e40fSKarl Rupp 47*8f86e40fSKarl Rupp #undef __FUNCT__ 48*8f86e40fSKarl Rupp #define __FUNCT__ "MatGetVecs_MPIAIJViennaCL" 49*8f86e40fSKarl Rupp PetscErrorCode MatGetVecs_MPIAIJViennaCL(Mat mat,Vec *right,Vec *left) 50*8f86e40fSKarl Rupp { 51*8f86e40fSKarl Rupp PetscErrorCode ierr; 52*8f86e40fSKarl Rupp 53*8f86e40fSKarl Rupp PetscFunctionBegin; 54*8f86e40fSKarl Rupp if (right) { 55*8f86e40fSKarl Rupp ierr = VecCreate(PetscObjectComm((PetscObject)mat),right);CHKERRQ(ierr); 56*8f86e40fSKarl Rupp ierr = VecSetSizes(*right,mat->cmap->n,PETSC_DETERMINE);CHKERRQ(ierr); 57*8f86e40fSKarl Rupp ierr = VecSetBlockSize(*right,mat->rmap->bs);CHKERRQ(ierr); 58*8f86e40fSKarl Rupp ierr = VecSetType(*right,VECVIENNACL);CHKERRQ(ierr); 59*8f86e40fSKarl Rupp ierr = VecSetLayout(*right,mat->cmap);CHKERRQ(ierr); 60*8f86e40fSKarl Rupp } 61*8f86e40fSKarl Rupp if (left) { 62*8f86e40fSKarl Rupp ierr = VecCreate(PetscObjectComm((PetscObject)mat),left);CHKERRQ(ierr); 63*8f86e40fSKarl Rupp ierr = VecSetSizes(*left,mat->rmap->n,PETSC_DETERMINE);CHKERRQ(ierr); 64*8f86e40fSKarl Rupp ierr = VecSetBlockSize(*left,mat->rmap->bs);CHKERRQ(ierr); 65*8f86e40fSKarl Rupp ierr = VecSetType(*left,VECVIENNACL);CHKERRQ(ierr); 66*8f86e40fSKarl Rupp ierr = VecSetLayout(*left,mat->rmap);CHKERRQ(ierr); 67*8f86e40fSKarl Rupp } 68*8f86e40fSKarl Rupp PetscFunctionReturn(0); 69*8f86e40fSKarl Rupp } 70*8f86e40fSKarl Rupp 71*8f86e40fSKarl Rupp 72*8f86e40fSKarl Rupp #undef __FUNCT__ 73*8f86e40fSKarl Rupp #define __FUNCT__ "MatDestroy_MPIAIJViennaCL" 74*8f86e40fSKarl Rupp PetscErrorCode MatDestroy_MPIAIJViennaCL(Mat A) 75*8f86e40fSKarl Rupp { 76*8f86e40fSKarl Rupp PetscErrorCode ierr; 77*8f86e40fSKarl Rupp 78*8f86e40fSKarl Rupp PetscFunctionBegin; 79*8f86e40fSKarl Rupp ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 80*8f86e40fSKarl Rupp PetscFunctionReturn(0); 81*8f86e40fSKarl Rupp } 82*8f86e40fSKarl Rupp 83*8f86e40fSKarl Rupp #undef __FUNCT__ 84*8f86e40fSKarl Rupp #define __FUNCT__ "MatCreate_MPIAIJViennaCL" 85*8f86e40fSKarl Rupp PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJViennaCL(Mat A) 86*8f86e40fSKarl Rupp { 87*8f86e40fSKarl Rupp PetscErrorCode ierr; 88*8f86e40fSKarl Rupp 89*8f86e40fSKarl Rupp PetscFunctionBegin; 90*8f86e40fSKarl Rupp ierr = MatCreate_MPIAIJ(A);CHKERRQ(ierr); 91*8f86e40fSKarl Rupp ierr = PetscObjectComposeFunction((PetscObject)A,"MatMPIAIJSetPreallocation_C","MatMPIAIJSetPreallocation_MPIAIJViennaCL",MatMPIAIJSetPreallocation_MPIAIJViennaCL);CHKERRQ(ierr); 92*8f86e40fSKarl Rupp A->ops->getvecs = MatGetVecs_MPIAIJViennaCL; 93*8f86e40fSKarl Rupp 94*8f86e40fSKarl Rupp ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIAIJVIENNACL);CHKERRQ(ierr); 95*8f86e40fSKarl Rupp PetscFunctionReturn(0); 96*8f86e40fSKarl Rupp } 97*8f86e40fSKarl Rupp 98*8f86e40fSKarl Rupp 99*8f86e40fSKarl Rupp /*@ 100*8f86e40fSKarl Rupp MatCreateAIJViennaCL - Creates a sparse matrix in AIJ (compressed row) format 101*8f86e40fSKarl Rupp (the default parallel PETSc format). This matrix will ultimately pushed down 102*8f86e40fSKarl Rupp to GPUs and use the ViennaCL library for calculations. For good matrix 103*8f86e40fSKarl Rupp assembly performance the user should preallocate the matrix storage by setting 104*8f86e40fSKarl Rupp the parameter nz (or the array nnz). By setting these parameters accurately, 105*8f86e40fSKarl Rupp performance during matrix assembly can be increased substantially. 106*8f86e40fSKarl Rupp 107*8f86e40fSKarl Rupp 108*8f86e40fSKarl Rupp Collective on MPI_Comm 109*8f86e40fSKarl Rupp 110*8f86e40fSKarl Rupp Input Parameters: 111*8f86e40fSKarl Rupp + comm - MPI communicator, set to PETSC_COMM_SELF 112*8f86e40fSKarl Rupp . m - number of rows 113*8f86e40fSKarl Rupp . n - number of columns 114*8f86e40fSKarl Rupp . nz - number of nonzeros per row (same for all rows) 115*8f86e40fSKarl Rupp - nnz - array containing the number of nonzeros in the various rows 116*8f86e40fSKarl Rupp (possibly different for each row) or NULL 117*8f86e40fSKarl Rupp 118*8f86e40fSKarl Rupp Output Parameter: 119*8f86e40fSKarl Rupp . A - the matrix 120*8f86e40fSKarl Rupp 121*8f86e40fSKarl Rupp It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 122*8f86e40fSKarl Rupp MatXXXXSetPreallocation() paradigm instead of this routine directly. 123*8f86e40fSKarl Rupp [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 124*8f86e40fSKarl Rupp 125*8f86e40fSKarl Rupp Notes: 126*8f86e40fSKarl Rupp If nnz is given then nz is ignored 127*8f86e40fSKarl Rupp 128*8f86e40fSKarl Rupp The AIJ format (also called the Yale sparse matrix format or 129*8f86e40fSKarl Rupp compressed row storage), is fully compatible with standard Fortran 77 130*8f86e40fSKarl Rupp storage. That is, the stored row and column indices can begin at 131*8f86e40fSKarl Rupp either one (as in Fortran) or zero. See the users' manual for details. 132*8f86e40fSKarl Rupp 133*8f86e40fSKarl Rupp Specify the preallocated storage with either nz or nnz (not both). 134*8f86e40fSKarl Rupp Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 135*8f86e40fSKarl Rupp allocation. For large problems you MUST preallocate memory or you 136*8f86e40fSKarl Rupp will get TERRIBLE performance, see the users' manual chapter on matrices. 137*8f86e40fSKarl Rupp 138*8f86e40fSKarl Rupp By default, this format uses inodes (identical nodes) when possible, to 139*8f86e40fSKarl Rupp improve numerical efficiency of matrix-vector products and solves. We 140*8f86e40fSKarl Rupp search for consecutive rows with the same nonzero structure, thereby 141*8f86e40fSKarl Rupp reusing matrix information to achieve increased efficiency. 142*8f86e40fSKarl Rupp 143*8f86e40fSKarl Rupp Level: intermediate 144*8f86e40fSKarl Rupp 145*8f86e40fSKarl Rupp .seealso: MatCreate(), MatCreateAIJ(), MatCreateAIJCUSP(), MatSetValues(), MatSeqAIJSetColumnIndices(), MatCreateSeqAIJWithArrays(), MatCreateAIJ(), MATMPIAIJVIENNACL, MATAIJVIENNACL 146*8f86e40fSKarl Rupp @*/ 147*8f86e40fSKarl Rupp #undef __FUNCT__ 148*8f86e40fSKarl Rupp #define __FUNCT__ "MatCreateAIJViennaCL" 149*8f86e40fSKarl Rupp PetscErrorCode MatCreateAIJViennaCL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 150*8f86e40fSKarl Rupp { 151*8f86e40fSKarl Rupp PetscErrorCode ierr; 152*8f86e40fSKarl Rupp PetscMPIInt size; 153*8f86e40fSKarl Rupp 154*8f86e40fSKarl Rupp PetscFunctionBegin; 155*8f86e40fSKarl Rupp ierr = MatCreate(comm,A);CHKERRQ(ierr); 156*8f86e40fSKarl Rupp ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 157*8f86e40fSKarl Rupp ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 158*8f86e40fSKarl Rupp if (size > 1) { 159*8f86e40fSKarl Rupp ierr = MatSetType(*A,MATMPIAIJVIENNACL);CHKERRQ(ierr); 160*8f86e40fSKarl Rupp ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 161*8f86e40fSKarl Rupp } else { 162*8f86e40fSKarl Rupp ierr = MatSetType(*A,MATSEQAIJVIENNACL);CHKERRQ(ierr); 163*8f86e40fSKarl Rupp ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 164*8f86e40fSKarl Rupp } 165*8f86e40fSKarl Rupp PetscFunctionReturn(0); 166*8f86e40fSKarl Rupp } 167*8f86e40fSKarl Rupp 168*8f86e40fSKarl Rupp /*M 169*8f86e40fSKarl Rupp MATAIJVIENNACL - MATMPIAIJVIENNACL= "aijviennacl" = "mpiaijviennacl" - A matrix type to be used for sparse matrices. 170*8f86e40fSKarl Rupp 171*8f86e40fSKarl Rupp A matrix type (CSR format) whose data resides on GPUs. 172*8f86e40fSKarl Rupp All matrix calculations are performed using the ViennaCL library. 173*8f86e40fSKarl Rupp 174*8f86e40fSKarl Rupp This matrix type is identical to MATSEQAIJVIENNACL when constructed with a single process communicator, 175*8f86e40fSKarl Rupp and MATMPIAIJVIENNACL otherwise. As a result, for single process communicators, 176*8f86e40fSKarl Rupp MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 177*8f86e40fSKarl Rupp for communicators controlling multiple processes. It is recommended that you call both of 178*8f86e40fSKarl Rupp the above preallocation routines for simplicity. 179*8f86e40fSKarl Rupp 180*8f86e40fSKarl Rupp Options Database Keys: 181*8f86e40fSKarl Rupp + -mat_type mpiaijviennacl - sets the matrix type to "mpiaijviennacl" during a call to MatSetFromOptions() 182*8f86e40fSKarl Rupp 183*8f86e40fSKarl Rupp Level: beginner 184*8f86e40fSKarl Rupp 185*8f86e40fSKarl Rupp .seealso: MatCreateAIJViennaCL(), MATSEQAIJVIENNACL, MatCreateSeqAIJVIENNACL() 186*8f86e40fSKarl Rupp M*/ 187*8f86e40fSKarl Rupp 188