1*8c3ff71bSJunchao Zhang #include <petscconf.h> 2*8c3ff71bSJunchao Zhang #include <../src/mat/impls/aij/mpi/mpiaij.h> 3*8c3ff71bSJunchao Zhang 4*8c3ff71bSJunchao Zhang PetscErrorCode MatAssemblyEnd_MPIAIJKokkos(Mat A,MatAssemblyType mode) 5*8c3ff71bSJunchao Zhang { 6*8c3ff71bSJunchao Zhang PetscErrorCode ierr; 7*8c3ff71bSJunchao Zhang Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*)A->data; 8*8c3ff71bSJunchao Zhang 9*8c3ff71bSJunchao Zhang PetscFunctionBegin; 10*8c3ff71bSJunchao Zhang ierr = MatAssemblyEnd_MPIAIJ(A,mode);CHKERRQ(ierr); 11*8c3ff71bSJunchao Zhang if (!A->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 12*8c3ff71bSJunchao Zhang ierr = VecSetType(mpiaij->lvec,VECSEQKOKKOS);CHKERRQ(ierr); 13*8c3ff71bSJunchao Zhang } 14*8c3ff71bSJunchao Zhang PetscFunctionReturn(0); 15*8c3ff71bSJunchao Zhang } 16*8c3ff71bSJunchao Zhang 17*8c3ff71bSJunchao Zhang PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJKokkos(Mat mat,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 18*8c3ff71bSJunchao Zhang { 19*8c3ff71bSJunchao Zhang PetscErrorCode ierr; 20*8c3ff71bSJunchao Zhang PetscInt i; 21*8c3ff71bSJunchao Zhang Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*)mat->data; 22*8c3ff71bSJunchao Zhang 23*8c3ff71bSJunchao Zhang 24*8c3ff71bSJunchao Zhang PetscFunctionBegin; 25*8c3ff71bSJunchao Zhang ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 26*8c3ff71bSJunchao Zhang ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 27*8c3ff71bSJunchao Zhang if (d_nnz) { 28*8c3ff71bSJunchao Zhang for (i=0; i<mat->rmap->n; i++) { 29*8c3ff71bSJunchao Zhang if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 30*8c3ff71bSJunchao Zhang } 31*8c3ff71bSJunchao Zhang } 32*8c3ff71bSJunchao Zhang if (o_nnz) { 33*8c3ff71bSJunchao Zhang for (i=0; i<mat->rmap->n; i++) { 34*8c3ff71bSJunchao Zhang if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 35*8c3ff71bSJunchao Zhang } 36*8c3ff71bSJunchao Zhang } 37*8c3ff71bSJunchao Zhang if (!mat->preallocated) { 38*8c3ff71bSJunchao Zhang /* Explicitly create 2 MATSEQAIJKOKKOS matrices. */ 39*8c3ff71bSJunchao Zhang ierr = MatCreate(PETSC_COMM_SELF,&mpiaij->A);CHKERRQ(ierr); 40*8c3ff71bSJunchao Zhang ierr = MatSetSizes(mpiaij->A,mat->rmap->n,mat->cmap->n,mat->rmap->n,mat->cmap->n);CHKERRQ(ierr); 41*8c3ff71bSJunchao Zhang ierr = MatSetType(mpiaij->A,MATSEQAIJKOKKOS);CHKERRQ(ierr); 42*8c3ff71bSJunchao Zhang ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)mpiaij->A);CHKERRQ(ierr); 43*8c3ff71bSJunchao Zhang ierr = MatCreate(PETSC_COMM_SELF,&mpiaij->B);CHKERRQ(ierr); 44*8c3ff71bSJunchao Zhang ierr = MatSetSizes(mpiaij->B,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr); 45*8c3ff71bSJunchao Zhang ierr = MatSetType(mpiaij->B,MATSEQAIJKOKKOS);CHKERRQ(ierr); 46*8c3ff71bSJunchao Zhang ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)mpiaij->B);CHKERRQ(ierr); 47*8c3ff71bSJunchao Zhang } 48*8c3ff71bSJunchao Zhang ierr = MatSeqAIJSetPreallocation(mpiaij->A,d_nz,d_nnz);CHKERRQ(ierr); 49*8c3ff71bSJunchao Zhang ierr = MatSeqAIJSetPreallocation(mpiaij->B,o_nz,o_nnz);CHKERRQ(ierr); 50*8c3ff71bSJunchao Zhang 51*8c3ff71bSJunchao Zhang mat->preallocated = PETSC_TRUE; 52*8c3ff71bSJunchao Zhang PetscFunctionReturn(0); 53*8c3ff71bSJunchao Zhang } 54*8c3ff71bSJunchao Zhang 55*8c3ff71bSJunchao Zhang PetscErrorCode MatMult_MPIAIJKokkos(Mat mat,Vec xx,Vec yy) 56*8c3ff71bSJunchao Zhang { 57*8c3ff71bSJunchao Zhang Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*)mat->data; 58*8c3ff71bSJunchao Zhang PetscErrorCode ierr; 59*8c3ff71bSJunchao Zhang PetscInt nt; 60*8c3ff71bSJunchao Zhang 61*8c3ff71bSJunchao Zhang PetscFunctionBegin; 62*8c3ff71bSJunchao Zhang ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 63*8c3ff71bSJunchao Zhang if (nt != mat->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of mat (%D) and xx (%D)",mat->cmap->n,nt); 64*8c3ff71bSJunchao Zhang ierr = VecScatterBegin(mpiaij->Mvctx,xx,mpiaij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 65*8c3ff71bSJunchao Zhang ierr = (*mpiaij->A->ops->mult)(mpiaij->A,xx,yy);CHKERRQ(ierr); 66*8c3ff71bSJunchao Zhang ierr = VecScatterEnd(mpiaij->Mvctx,xx,mpiaij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 67*8c3ff71bSJunchao Zhang ierr = (*mpiaij->B->ops->multadd)(mpiaij->B,mpiaij->lvec,yy,yy);CHKERRQ(ierr); 68*8c3ff71bSJunchao Zhang PetscFunctionReturn(0); 69*8c3ff71bSJunchao Zhang } 70*8c3ff71bSJunchao Zhang 71*8c3ff71bSJunchao Zhang PetscErrorCode MatMultAdd_MPIAIJKokkos(Mat mat,Vec xx,Vec yy,Vec zz) 72*8c3ff71bSJunchao Zhang { 73*8c3ff71bSJunchao Zhang Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*)mat->data; 74*8c3ff71bSJunchao Zhang PetscErrorCode ierr; 75*8c3ff71bSJunchao Zhang PetscInt nt; 76*8c3ff71bSJunchao Zhang 77*8c3ff71bSJunchao Zhang PetscFunctionBegin; 78*8c3ff71bSJunchao Zhang ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 79*8c3ff71bSJunchao Zhang if (nt != mat->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of mat (%D) and xx (%D)",mat->cmap->n,nt); 80*8c3ff71bSJunchao Zhang ierr = VecScatterBegin(mpiaij->Mvctx,xx,mpiaij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 81*8c3ff71bSJunchao Zhang ierr = (*mpiaij->A->ops->multadd)(mpiaij->A,xx,yy,zz);CHKERRQ(ierr); 82*8c3ff71bSJunchao Zhang ierr = VecScatterEnd(mpiaij->Mvctx,xx,mpiaij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 83*8c3ff71bSJunchao Zhang ierr = (*mpiaij->B->ops->multadd)(mpiaij->B,mpiaij->lvec,zz,zz);CHKERRQ(ierr); 84*8c3ff71bSJunchao Zhang PetscFunctionReturn(0); 85*8c3ff71bSJunchao Zhang } 86*8c3ff71bSJunchao Zhang 87*8c3ff71bSJunchao Zhang PetscErrorCode MatMultTranspose_MPIAIJKokkos(Mat mat,Vec xx,Vec yy) 88*8c3ff71bSJunchao Zhang { 89*8c3ff71bSJunchao Zhang Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*)mat->data; 90*8c3ff71bSJunchao Zhang PetscErrorCode ierr; 91*8c3ff71bSJunchao Zhang PetscInt nt; 92*8c3ff71bSJunchao Zhang 93*8c3ff71bSJunchao Zhang PetscFunctionBegin; 94*8c3ff71bSJunchao Zhang ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 95*8c3ff71bSJunchao Zhang if (nt != mat->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of mat (%D) and xx (%D)",mat->rmap->n,nt); 96*8c3ff71bSJunchao Zhang ierr = (*mpiaij->B->ops->multtranspose)(mpiaij->B,xx,mpiaij->lvec);CHKERRQ(ierr); 97*8c3ff71bSJunchao Zhang ierr = (*mpiaij->A->ops->multtranspose)(mpiaij->A,xx,yy);CHKERRQ(ierr); 98*8c3ff71bSJunchao Zhang ierr = VecScatterBegin(mpiaij->Mvctx,mpiaij->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 99*8c3ff71bSJunchao Zhang ierr = VecScatterEnd(mpiaij->Mvctx,mpiaij->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 100*8c3ff71bSJunchao Zhang PetscFunctionReturn(0); 101*8c3ff71bSJunchao Zhang } 102*8c3ff71bSJunchao Zhang 103*8c3ff71bSJunchao Zhang PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat A, MatType mtype, MatReuse reuse, Mat* newmat) 104*8c3ff71bSJunchao Zhang { 105*8c3ff71bSJunchao Zhang PetscErrorCode ierr; 106*8c3ff71bSJunchao Zhang Mat B; 107*8c3ff71bSJunchao Zhang 108*8c3ff71bSJunchao Zhang PetscFunctionBegin; 109*8c3ff71bSJunchao Zhang if (reuse == MAT_INITIAL_MATRIX) { 110*8c3ff71bSJunchao Zhang ierr = MatDuplicate(A,MAT_COPY_VALUES,newmat);CHKERRQ(ierr); 111*8c3ff71bSJunchao Zhang } else if (reuse == MAT_REUSE_MATRIX) { 112*8c3ff71bSJunchao Zhang ierr = MatCopy(A,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 113*8c3ff71bSJunchao Zhang } 114*8c3ff71bSJunchao Zhang B = *newmat; 115*8c3ff71bSJunchao Zhang 116*8c3ff71bSJunchao Zhang ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr); 117*8c3ff71bSJunchao Zhang ierr = PetscStrallocpy(VECKOKKOS,&B->defaultvectype);CHKERRQ(ierr); 118*8c3ff71bSJunchao Zhang ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIAIJKOKKOS);CHKERRQ(ierr); 119*8c3ff71bSJunchao Zhang 120*8c3ff71bSJunchao Zhang B->ops->assemblyend = MatAssemblyEnd_MPIAIJKokkos; 121*8c3ff71bSJunchao Zhang B->ops->mult = MatMult_MPIAIJKokkos; 122*8c3ff71bSJunchao Zhang B->ops->multadd = MatMultAdd_MPIAIJKokkos; 123*8c3ff71bSJunchao Zhang B->ops->multtranspose = MatMultTranspose_MPIAIJKokkos; 124*8c3ff71bSJunchao Zhang 125*8c3ff71bSJunchao Zhang ierr = PetscObjectComposeFunction((PetscObject)A,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJKokkos);CHKERRQ(ierr); 126*8c3ff71bSJunchao Zhang PetscFunctionReturn(0); 127*8c3ff71bSJunchao Zhang } 128*8c3ff71bSJunchao Zhang 129*8c3ff71bSJunchao Zhang PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJKokkos(Mat A) 130*8c3ff71bSJunchao Zhang { 131*8c3ff71bSJunchao Zhang PetscErrorCode ierr; 132*8c3ff71bSJunchao Zhang 133*8c3ff71bSJunchao Zhang PetscFunctionBegin; 134*8c3ff71bSJunchao Zhang ierr = PetscKokkosInitializeCheck();CHKERRQ(ierr); 135*8c3ff71bSJunchao Zhang ierr = MatCreate_MPIAIJ(A);CHKERRQ(ierr); 136*8c3ff71bSJunchao Zhang ierr = MatConvert_MPIAIJ_MPIAIJKokkos(A,MATMPIAIJKOKKOS,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 137*8c3ff71bSJunchao Zhang PetscFunctionReturn(0); 138*8c3ff71bSJunchao Zhang } 139*8c3ff71bSJunchao Zhang 140*8c3ff71bSJunchao Zhang /*@C 141*8c3ff71bSJunchao Zhang MatCreateAIJKokkos - Creates a sparse matrix in AIJ (compressed row) format 142*8c3ff71bSJunchao Zhang (the default parallel PETSc format). This matrix will ultimately pushed down 143*8c3ff71bSJunchao Zhang to Kokkos for calculations. For good matrix 144*8c3ff71bSJunchao Zhang assembly performance the user should preallocate the matrix storage by setting 145*8c3ff71bSJunchao Zhang the parameter nz (or the array nnz). By setting these parameters accurately, 146*8c3ff71bSJunchao Zhang performance during matrix assembly can be increased by more than a factor of 50. 147*8c3ff71bSJunchao Zhang 148*8c3ff71bSJunchao Zhang Collective 149*8c3ff71bSJunchao Zhang 150*8c3ff71bSJunchao Zhang Input Parameters: 151*8c3ff71bSJunchao Zhang + comm - MPI communicator, set to PETSC_COMM_SELF 152*8c3ff71bSJunchao Zhang . m - number of rows 153*8c3ff71bSJunchao Zhang . n - number of columns 154*8c3ff71bSJunchao Zhang . nz - number of nonzeros per row (same for all rows) 155*8c3ff71bSJunchao Zhang - nnz - array containing the number of nonzeros in the various rows 156*8c3ff71bSJunchao Zhang (possibly different for each row) or NULL 157*8c3ff71bSJunchao Zhang 158*8c3ff71bSJunchao Zhang Output Parameter: 159*8c3ff71bSJunchao Zhang . A - the matrix 160*8c3ff71bSJunchao Zhang 161*8c3ff71bSJunchao Zhang It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 162*8c3ff71bSJunchao Zhang MatXXXXSetPreallocation() paradigm instead of this routine directly. 163*8c3ff71bSJunchao Zhang [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 164*8c3ff71bSJunchao Zhang 165*8c3ff71bSJunchao Zhang Notes: 166*8c3ff71bSJunchao Zhang If nnz is given then nz is ignored 167*8c3ff71bSJunchao Zhang 168*8c3ff71bSJunchao Zhang The AIJ format (also called the Yale sparse matrix format or 169*8c3ff71bSJunchao Zhang compressed row storage), is fully compatible with standard Fortran 77 170*8c3ff71bSJunchao Zhang storage. That is, the stored row and column indices can begin at 171*8c3ff71bSJunchao Zhang either one (as in Fortran) or zero. See the users' manual for details. 172*8c3ff71bSJunchao Zhang 173*8c3ff71bSJunchao Zhang Specify the preallocated storage with either nz or nnz (not both). 174*8c3ff71bSJunchao Zhang Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 175*8c3ff71bSJunchao Zhang allocation. For large problems you MUST preallocate memory or you 176*8c3ff71bSJunchao Zhang will get TERRIBLE performance, see the users' manual chapter on matrices. 177*8c3ff71bSJunchao Zhang 178*8c3ff71bSJunchao Zhang By default, this format uses inodes (identical nodes) when possible, to 179*8c3ff71bSJunchao Zhang improve numerical efficiency of matrix-vector products and solves. We 180*8c3ff71bSJunchao Zhang search for consecutive rows with the same nonzero structure, thereby 181*8c3ff71bSJunchao Zhang reusing matrix information to achieve increased efficiency. 182*8c3ff71bSJunchao Zhang 183*8c3ff71bSJunchao Zhang Level: intermediate 184*8c3ff71bSJunchao Zhang 185*8c3ff71bSJunchao Zhang .seealso: MatCreate(), MatCreateAIJ(), MatSetValues(), MatSeqAIJSetColumnIndices(), MatCreateSeqAIJWithArrays(), MatCreateAIJ(), MATMPIAIJKOKKOS, MATAIJKokkos 186*8c3ff71bSJunchao Zhang @*/ 187*8c3ff71bSJunchao Zhang PetscErrorCode MatCreateAIJKokkos(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 188*8c3ff71bSJunchao Zhang { 189*8c3ff71bSJunchao Zhang PetscErrorCode ierr; 190*8c3ff71bSJunchao Zhang PetscMPIInt size; 191*8c3ff71bSJunchao Zhang 192*8c3ff71bSJunchao Zhang PetscFunctionBegin; 193*8c3ff71bSJunchao Zhang ierr = MatCreate(comm,A);CHKERRQ(ierr); 194*8c3ff71bSJunchao Zhang ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 195*8c3ff71bSJunchao Zhang ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 196*8c3ff71bSJunchao Zhang if (size > 1) { 197*8c3ff71bSJunchao Zhang ierr = MatSetType(*A,MATMPIAIJKOKKOS);CHKERRQ(ierr); 198*8c3ff71bSJunchao Zhang ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 199*8c3ff71bSJunchao Zhang } else { 200*8c3ff71bSJunchao Zhang ierr = MatSetType(*A,MATSEQAIJKOKKOS);CHKERRQ(ierr); 201*8c3ff71bSJunchao Zhang ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 202*8c3ff71bSJunchao Zhang } 203*8c3ff71bSJunchao Zhang PetscFunctionReturn(0); 204*8c3ff71bSJunchao Zhang } 205*8c3ff71bSJunchao Zhang 206