1 #include <petscconf.h> 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> 3 4 PetscErrorCode MatAssemblyEnd_MPIAIJKokkos(Mat A,MatAssemblyType mode) 5 { 6 PetscErrorCode ierr; 7 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*)A->data; 8 9 PetscFunctionBegin; 10 ierr = MatAssemblyEnd_MPIAIJ(A,mode);CHKERRQ(ierr); 11 if (!A->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 12 ierr = VecSetType(mpiaij->lvec,VECSEQKOKKOS);CHKERRQ(ierr); 13 } 14 PetscFunctionReturn(0); 15 } 16 17 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJKokkos(Mat mat,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 18 { 19 PetscErrorCode ierr; 20 PetscInt i; 21 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*)mat->data; 22 23 24 PetscFunctionBegin; 25 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 26 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 27 if (d_nnz) { 28 for (i=0; i<mat->rmap->n; i++) { 29 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 30 } 31 } 32 if (o_nnz) { 33 for (i=0; i<mat->rmap->n; i++) { 34 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 35 } 36 } 37 if (!mat->preallocated) { 38 /* Explicitly create 2 MATSEQAIJKOKKOS matrices. */ 39 ierr = MatCreate(PETSC_COMM_SELF,&mpiaij->A);CHKERRQ(ierr); 40 ierr = MatSetSizes(mpiaij->A,mat->rmap->n,mat->cmap->n,mat->rmap->n,mat->cmap->n);CHKERRQ(ierr); 41 ierr = MatSetType(mpiaij->A,MATSEQAIJKOKKOS);CHKERRQ(ierr); 42 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)mpiaij->A);CHKERRQ(ierr); 43 ierr = MatCreate(PETSC_COMM_SELF,&mpiaij->B);CHKERRQ(ierr); 44 ierr = MatSetSizes(mpiaij->B,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr); 45 ierr = MatSetType(mpiaij->B,MATSEQAIJKOKKOS);CHKERRQ(ierr); 46 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)mpiaij->B);CHKERRQ(ierr); 47 } 48 ierr = MatSeqAIJSetPreallocation(mpiaij->A,d_nz,d_nnz);CHKERRQ(ierr); 49 ierr = MatSeqAIJSetPreallocation(mpiaij->B,o_nz,o_nnz);CHKERRQ(ierr); 50 51 mat->preallocated = PETSC_TRUE; 52 PetscFunctionReturn(0); 53 } 54 55 PetscErrorCode MatMult_MPIAIJKokkos(Mat mat,Vec xx,Vec yy) 56 { 57 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*)mat->data; 58 PetscErrorCode ierr; 59 PetscInt nt; 60 61 PetscFunctionBegin; 62 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 63 if (nt != mat->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of mat (%D) and xx (%D)",mat->cmap->n,nt); 64 ierr = VecScatterBegin(mpiaij->Mvctx,xx,mpiaij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 65 ierr = (*mpiaij->A->ops->mult)(mpiaij->A,xx,yy);CHKERRQ(ierr); 66 ierr = VecScatterEnd(mpiaij->Mvctx,xx,mpiaij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 67 ierr = (*mpiaij->B->ops->multadd)(mpiaij->B,mpiaij->lvec,yy,yy);CHKERRQ(ierr); 68 PetscFunctionReturn(0); 69 } 70 71 PetscErrorCode MatMultAdd_MPIAIJKokkos(Mat mat,Vec xx,Vec yy,Vec zz) 72 { 73 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*)mat->data; 74 PetscErrorCode ierr; 75 PetscInt nt; 76 77 PetscFunctionBegin; 78 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 79 if (nt != mat->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of mat (%D) and xx (%D)",mat->cmap->n,nt); 80 ierr = VecScatterBegin(mpiaij->Mvctx,xx,mpiaij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 81 ierr = (*mpiaij->A->ops->multadd)(mpiaij->A,xx,yy,zz);CHKERRQ(ierr); 82 ierr = VecScatterEnd(mpiaij->Mvctx,xx,mpiaij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 83 ierr = (*mpiaij->B->ops->multadd)(mpiaij->B,mpiaij->lvec,zz,zz);CHKERRQ(ierr); 84 PetscFunctionReturn(0); 85 } 86 87 PetscErrorCode MatMultTranspose_MPIAIJKokkos(Mat mat,Vec xx,Vec yy) 88 { 89 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*)mat->data; 90 PetscErrorCode ierr; 91 PetscInt nt; 92 93 PetscFunctionBegin; 94 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 95 if (nt != mat->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of mat (%D) and xx (%D)",mat->rmap->n,nt); 96 ierr = (*mpiaij->B->ops->multtranspose)(mpiaij->B,xx,mpiaij->lvec);CHKERRQ(ierr); 97 ierr = (*mpiaij->A->ops->multtranspose)(mpiaij->A,xx,yy);CHKERRQ(ierr); 98 ierr = VecScatterBegin(mpiaij->Mvctx,mpiaij->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 99 ierr = VecScatterEnd(mpiaij->Mvctx,mpiaij->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 100 PetscFunctionReturn(0); 101 } 102 103 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat A, MatType mtype, MatReuse reuse, Mat* newmat) 104 { 105 PetscErrorCode ierr; 106 Mat B; 107 108 PetscFunctionBegin; 109 if (reuse == MAT_INITIAL_MATRIX) { 110 ierr = MatDuplicate(A,MAT_COPY_VALUES,newmat);CHKERRQ(ierr); 111 } else if (reuse == MAT_REUSE_MATRIX) { 112 ierr = MatCopy(A,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 113 } 114 B = *newmat; 115 116 ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr); 117 ierr = PetscStrallocpy(VECKOKKOS,&B->defaultvectype);CHKERRQ(ierr); 118 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJKOKKOS);CHKERRQ(ierr); 119 120 B->ops->assemblyend = MatAssemblyEnd_MPIAIJKokkos; 121 B->ops->mult = MatMult_MPIAIJKokkos; 122 B->ops->multadd = MatMultAdd_MPIAIJKokkos; 123 B->ops->multtranspose = MatMultTranspose_MPIAIJKokkos; 124 125 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJKokkos);CHKERRQ(ierr); 126 PetscFunctionReturn(0); 127 } 128 129 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJKokkos(Mat A) 130 { 131 PetscErrorCode ierr; 132 133 PetscFunctionBegin; 134 ierr = PetscKokkosInitializeCheck();CHKERRQ(ierr); 135 ierr = MatCreate_MPIAIJ(A);CHKERRQ(ierr); 136 ierr = MatConvert_MPIAIJ_MPIAIJKokkos(A,MATMPIAIJKOKKOS,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 137 PetscFunctionReturn(0); 138 } 139 140 /*@C 141 MatCreateAIJKokkos - Creates a sparse matrix in AIJ (compressed row) format 142 (the default parallel PETSc format). This matrix will ultimately pushed down 143 to Kokkos for calculations. For good matrix 144 assembly performance the user should preallocate the matrix storage by setting 145 the parameter nz (or the array nnz). By setting these parameters accurately, 146 performance during matrix assembly can be increased by more than a factor of 50. 147 148 Collective 149 150 Input Parameters: 151 + comm - MPI communicator, set to PETSC_COMM_SELF 152 . m - number of rows 153 . n - number of columns 154 . nz - number of nonzeros per row (same for all rows) 155 - nnz - array containing the number of nonzeros in the various rows 156 (possibly different for each row) or NULL 157 158 Output Parameter: 159 . A - the matrix 160 161 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 162 MatXXXXSetPreallocation() paradigm instead of this routine directly. 163 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 164 165 Notes: 166 If nnz is given then nz is ignored 167 168 The AIJ format (also called the Yale sparse matrix format or 169 compressed row storage), is fully compatible with standard Fortran 77 170 storage. That is, the stored row and column indices can begin at 171 either one (as in Fortran) or zero. See the users' manual for details. 172 173 Specify the preallocated storage with either nz or nnz (not both). 174 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 175 allocation. For large problems you MUST preallocate memory or you 176 will get TERRIBLE performance, see the users' manual chapter on matrices. 177 178 By default, this format uses inodes (identical nodes) when possible, to 179 improve numerical efficiency of matrix-vector products and solves. We 180 search for consecutive rows with the same nonzero structure, thereby 181 reusing matrix information to achieve increased efficiency. 182 183 Level: intermediate 184 185 .seealso: MatCreate(), MatCreateAIJ(), MatSetValues(), MatSeqAIJSetColumnIndices(), MatCreateSeqAIJWithArrays(), MatCreateAIJ(), MATMPIAIJKOKKOS, MATAIJKokkos 186 @*/ 187 PetscErrorCode MatCreateAIJKokkos(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 188 { 189 PetscErrorCode ierr; 190 PetscMPIInt size; 191 192 PetscFunctionBegin; 193 ierr = MatCreate(comm,A);CHKERRQ(ierr); 194 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 195 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 196 if (size > 1) { 197 ierr = MatSetType(*A,MATMPIAIJKOKKOS);CHKERRQ(ierr); 198 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 199 } else { 200 ierr = MatSetType(*A,MATSEQAIJKOKKOS);CHKERRQ(ierr); 201 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 202 } 203 PetscFunctionReturn(0); 204 } 205 206