1*8c3ff71bSJunchao Zhang #include "petsc/private/petscimpl.h" 2*8c3ff71bSJunchao Zhang #include <petscsystypes.h> 3*8c3ff71bSJunchao Zhang #include <petscerror.h> 4*8c3ff71bSJunchao Zhang #include <petscvec.hpp> 5*8c3ff71bSJunchao Zhang 6*8c3ff71bSJunchao Zhang #include <Kokkos_Core.hpp> 7*8c3ff71bSJunchao Zhang #include <KokkosSparse_CrsMatrix.hpp> 8*8c3ff71bSJunchao Zhang #include <KokkosSparse_spmv.hpp> 9*8c3ff71bSJunchao Zhang #include <KokkosSparse_spmv.hpp> 10*8c3ff71bSJunchao Zhang #include <../src/mat/impls/aij/seq/aij.h> 11*8c3ff71bSJunchao Zhang 12*8c3ff71bSJunchao Zhang #include <../src/mat/impls/aij/seq/kokkos/aijkokkosimpl.hpp> 13*8c3ff71bSJunchao Zhang 14*8c3ff71bSJunchao Zhang static PetscErrorCode MatSetOps_SeqAIJKokkos(Mat); /* Forward declaration */ 15*8c3ff71bSJunchao Zhang 16*8c3ff71bSJunchao Zhang static PetscErrorCode MatAssemblyEnd_SeqAIJKokkos(Mat A,MatAssemblyType mode) 17*8c3ff71bSJunchao Zhang { 18*8c3ff71bSJunchao Zhang PetscErrorCode ierr; 19*8c3ff71bSJunchao Zhang 20*8c3ff71bSJunchao Zhang PetscFunctionBegin; 21*8c3ff71bSJunchao Zhang ierr = MatAssemblyEnd_SeqAIJ(A,mode);CHKERRQ(ierr); 22*8c3ff71bSJunchao Zhang A->offloadmask = PETSC_OFFLOAD_CPU; 23*8c3ff71bSJunchao Zhang /* Don't build (or update) the Mat_SeqAIJKokkos struct. We delay it to the very last moment until we need it. */ 24*8c3ff71bSJunchao Zhang PetscFunctionReturn(0); 25*8c3ff71bSJunchao Zhang } 26*8c3ff71bSJunchao Zhang 27*8c3ff71bSJunchao Zhang static PetscErrorCode MatSeqAIJKokkosSyncDevice(Mat A) 28*8c3ff71bSJunchao Zhang { 29*8c3ff71bSJunchao Zhang Mat_SeqAIJ *aijseq = (Mat_SeqAIJ*)A->data; 30*8c3ff71bSJunchao Zhang Mat_SeqAIJKokkos *aijkok = static_cast<Mat_SeqAIJKokkos*>(A->spptr); 31*8c3ff71bSJunchao Zhang PetscInt nrows = A->rmap->n,ncols = A->cmap->n,nnz = aijseq->nz; 32*8c3ff71bSJunchao Zhang 33*8c3ff71bSJunchao Zhang PetscFunctionBegin; 34*8c3ff71bSJunchao Zhang /* If aijkok is not built yet or the nonzero pattern on CPU has changed, build aijkok from the scratch */ 35*8c3ff71bSJunchao Zhang if (!aijkok || aijkok->nonzerostate != A->nonzerostate) { 36*8c3ff71bSJunchao Zhang delete aijkok; 37*8c3ff71bSJunchao Zhang aijkok = new Mat_SeqAIJKokkos(nrows,ncols,nnz,aijseq->i,aijseq->j,aijseq->a); 38*8c3ff71bSJunchao Zhang aijkok->nonzerostate = A->nonzerostate; 39*8c3ff71bSJunchao Zhang A->spptr = aijkok; 40*8c3ff71bSJunchao Zhang } else if (A->offloadmask == PETSC_OFFLOAD_CPU) { /* Copy values only */ 41*8c3ff71bSJunchao Zhang Kokkos::deep_copy(aijkok->a_d,aijkok->a_h); 42*8c3ff71bSJunchao Zhang } 43*8c3ff71bSJunchao Zhang A->offloadmask = PETSC_OFFLOAD_BOTH; 44*8c3ff71bSJunchao Zhang PetscFunctionReturn(0); 45*8c3ff71bSJunchao Zhang } 46*8c3ff71bSJunchao Zhang 47*8c3ff71bSJunchao Zhang /* y = A x */ 48*8c3ff71bSJunchao Zhang static PetscErrorCode MatMult_SeqAIJKokkos(Mat A,Vec xx,Vec yy) 49*8c3ff71bSJunchao Zhang { 50*8c3ff71bSJunchao Zhang PetscErrorCode ierr; 51*8c3ff71bSJunchao Zhang Mat_SeqAIJKokkos *aijkok; 52*8c3ff71bSJunchao Zhang ConstPetscScalarViewDevice_t xv; 53*8c3ff71bSJunchao Zhang PetscScalarViewDevice_t yv; 54*8c3ff71bSJunchao Zhang 55*8c3ff71bSJunchao Zhang PetscFunctionBegin; 56*8c3ff71bSJunchao Zhang ierr = MatSeqAIJKokkosSyncDevice(A);CHKERRQ(ierr); 57*8c3ff71bSJunchao Zhang ierr = VecKokkosGetDeviceViewRead(xx,&xv);CHKERRQ(ierr); 58*8c3ff71bSJunchao Zhang ierr = VecKokkosGetDeviceView(yy,&yv);CHKERRQ(ierr); 59*8c3ff71bSJunchao Zhang aijkok = static_cast<Mat_SeqAIJKokkos*>(A->spptr); 60*8c3ff71bSJunchao Zhang KokkosSparse::spmv("N",1.0/*alpha*/,aijkok->csr,xv,0.0/*beta*/,yv); /* y = alpha A x + beta y */ 61*8c3ff71bSJunchao Zhang ierr = VecKokkosRestoreDeviceViewRead(xx,&xv);CHKERRQ(ierr); 62*8c3ff71bSJunchao Zhang ierr = VecKokkosRestoreDeviceView(yy,&yv);CHKERRQ(ierr); 63*8c3ff71bSJunchao Zhang PetscFunctionReturn(0); 64*8c3ff71bSJunchao Zhang } 65*8c3ff71bSJunchao Zhang 66*8c3ff71bSJunchao Zhang /* y = A^T x */ 67*8c3ff71bSJunchao Zhang static PetscErrorCode MatMultTranspose_SeqAIJKokkos(Mat A,Vec xx,Vec yy) 68*8c3ff71bSJunchao Zhang { 69*8c3ff71bSJunchao Zhang PetscErrorCode ierr; 70*8c3ff71bSJunchao Zhang Mat_SeqAIJKokkos *aijkok; 71*8c3ff71bSJunchao Zhang ConstPetscScalarViewDevice_t xv; 72*8c3ff71bSJunchao Zhang PetscScalarViewDevice_t yv; 73*8c3ff71bSJunchao Zhang 74*8c3ff71bSJunchao Zhang PetscFunctionBegin; 75*8c3ff71bSJunchao Zhang ierr = MatSeqAIJKokkosSyncDevice(A);CHKERRQ(ierr); 76*8c3ff71bSJunchao Zhang ierr = VecKokkosGetDeviceViewRead(xx,&xv);CHKERRQ(ierr); 77*8c3ff71bSJunchao Zhang ierr = VecKokkosGetDeviceView(yy,&yv);CHKERRQ(ierr); 78*8c3ff71bSJunchao Zhang aijkok = static_cast<Mat_SeqAIJKokkos*>(A->spptr); 79*8c3ff71bSJunchao Zhang KokkosSparse::spmv("T",1.0/*alpha*/,aijkok->csr,xv,0.0/*beta*/,yv); /* y = alpha A^T x + beta y */ 80*8c3ff71bSJunchao Zhang ierr = VecKokkosRestoreDeviceViewRead(xx,&xv);CHKERRQ(ierr); 81*8c3ff71bSJunchao Zhang ierr = VecKokkosRestoreDeviceView(yy,&yv);CHKERRQ(ierr); 82*8c3ff71bSJunchao Zhang PetscFunctionReturn(0); 83*8c3ff71bSJunchao Zhang } 84*8c3ff71bSJunchao Zhang 85*8c3ff71bSJunchao Zhang /* y = A^H x */ 86*8c3ff71bSJunchao Zhang static PetscErrorCode MatMultHermitianTranspose_SeqAIJKokkos(Mat A,Vec xx,Vec yy) 87*8c3ff71bSJunchao Zhang { 88*8c3ff71bSJunchao Zhang PetscErrorCode ierr; 89*8c3ff71bSJunchao Zhang Mat_SeqAIJKokkos *aijkok; 90*8c3ff71bSJunchao Zhang ConstPetscScalarViewDevice_t xv; 91*8c3ff71bSJunchao Zhang PetscScalarViewDevice_t yv; 92*8c3ff71bSJunchao Zhang 93*8c3ff71bSJunchao Zhang PetscFunctionBegin; 94*8c3ff71bSJunchao Zhang ierr = MatSeqAIJKokkosSyncDevice(A);CHKERRQ(ierr); 95*8c3ff71bSJunchao Zhang ierr = VecKokkosGetDeviceViewRead(xx,&xv);CHKERRQ(ierr); 96*8c3ff71bSJunchao Zhang ierr = VecKokkosGetDeviceView(yy,&yv);CHKERRQ(ierr); 97*8c3ff71bSJunchao Zhang aijkok = static_cast<Mat_SeqAIJKokkos*>(A->spptr); 98*8c3ff71bSJunchao Zhang KokkosSparse::spmv("C",1.0/*alpha*/,aijkok->csr,xv,0.0/*beta*/,yv); /* y = alpha A^H x + beta y */ 99*8c3ff71bSJunchao Zhang ierr = VecKokkosRestoreDeviceViewRead(xx,&xv);CHKERRQ(ierr); 100*8c3ff71bSJunchao Zhang ierr = VecKokkosRestoreDeviceView(yy,&yv);CHKERRQ(ierr); 101*8c3ff71bSJunchao Zhang PetscFunctionReturn(0); 102*8c3ff71bSJunchao Zhang } 103*8c3ff71bSJunchao Zhang 104*8c3ff71bSJunchao Zhang /* z = A x + y */ 105*8c3ff71bSJunchao Zhang static PetscErrorCode MatMultAdd_SeqAIJKokkos(Mat A,Vec xx,Vec yy, Vec zz) 106*8c3ff71bSJunchao Zhang { 107*8c3ff71bSJunchao Zhang PetscErrorCode ierr; 108*8c3ff71bSJunchao Zhang Mat_SeqAIJKokkos *aijkok; 109*8c3ff71bSJunchao Zhang ConstPetscScalarViewDevice_t xv,yv; 110*8c3ff71bSJunchao Zhang PetscScalarViewDevice_t zv; 111*8c3ff71bSJunchao Zhang 112*8c3ff71bSJunchao Zhang PetscFunctionBegin; 113*8c3ff71bSJunchao Zhang ierr = MatSeqAIJKokkosSyncDevice(A);CHKERRQ(ierr); 114*8c3ff71bSJunchao Zhang ierr = VecKokkosGetDeviceViewRead(xx,&xv);CHKERRQ(ierr); 115*8c3ff71bSJunchao Zhang ierr = VecKokkosGetDeviceViewRead(yy,&yv);CHKERRQ(ierr); 116*8c3ff71bSJunchao Zhang ierr = VecKokkosGetDeviceView(zz,&zv);CHKERRQ(ierr); 117*8c3ff71bSJunchao Zhang if (zz != yy) Kokkos::deep_copy(zv,yv); 118*8c3ff71bSJunchao Zhang aijkok = static_cast<Mat_SeqAIJKokkos*>(A->spptr); 119*8c3ff71bSJunchao Zhang KokkosSparse::spmv("N",1.0/*alpha*/,aijkok->csr,xv,1.0/*beta*/,zv); /* z = alpha A x + beta z */ 120*8c3ff71bSJunchao Zhang ierr = VecKokkosRestoreDeviceViewRead(xx,&xv);CHKERRQ(ierr); 121*8c3ff71bSJunchao Zhang ierr = VecKokkosRestoreDeviceViewRead(yy,&yv);CHKERRQ(ierr); 122*8c3ff71bSJunchao Zhang ierr = VecKokkosRestoreDeviceView(zz,&zv);CHKERRQ(ierr); 123*8c3ff71bSJunchao Zhang PetscFunctionReturn(0); 124*8c3ff71bSJunchao Zhang } 125*8c3ff71bSJunchao Zhang 126*8c3ff71bSJunchao Zhang /* z = A^T x + y */ 127*8c3ff71bSJunchao Zhang static PetscErrorCode MatMultTransposeAdd_SeqAIJKokkos(Mat A,Vec xx,Vec yy,Vec zz) 128*8c3ff71bSJunchao Zhang { 129*8c3ff71bSJunchao Zhang PetscErrorCode ierr; 130*8c3ff71bSJunchao Zhang Mat_SeqAIJKokkos *aijkok; 131*8c3ff71bSJunchao Zhang ConstPetscScalarViewDevice_t xv,yv; 132*8c3ff71bSJunchao Zhang PetscScalarViewDevice_t zv; 133*8c3ff71bSJunchao Zhang 134*8c3ff71bSJunchao Zhang PetscFunctionBegin; 135*8c3ff71bSJunchao Zhang ierr = MatSeqAIJKokkosSyncDevice(A);CHKERRQ(ierr); 136*8c3ff71bSJunchao Zhang ierr = VecKokkosGetDeviceViewRead(xx,&xv);CHKERRQ(ierr); 137*8c3ff71bSJunchao Zhang ierr = VecKokkosGetDeviceViewRead(yy,&yv);CHKERRQ(ierr); 138*8c3ff71bSJunchao Zhang ierr = VecKokkosGetDeviceView(zz,&zv);CHKERRQ(ierr); 139*8c3ff71bSJunchao Zhang if (zz != yy) Kokkos::deep_copy(zv,yv); 140*8c3ff71bSJunchao Zhang aijkok = static_cast<Mat_SeqAIJKokkos*>(A->spptr); 141*8c3ff71bSJunchao Zhang KokkosSparse::spmv("T",1.0/*alpha*/,aijkok->csr,xv,1.0/*beta*/,zv); /* z = alpha A^T x + beta z */ 142*8c3ff71bSJunchao Zhang ierr = VecKokkosRestoreDeviceViewRead(xx,&xv);CHKERRQ(ierr); 143*8c3ff71bSJunchao Zhang ierr = VecKokkosRestoreDeviceViewRead(yy,&yv);CHKERRQ(ierr); 144*8c3ff71bSJunchao Zhang ierr = VecKokkosRestoreDeviceView(zz,&zv);CHKERRQ(ierr); 145*8c3ff71bSJunchao Zhang PetscFunctionReturn(0); 146*8c3ff71bSJunchao Zhang } 147*8c3ff71bSJunchao Zhang 148*8c3ff71bSJunchao Zhang /* z = A^H x + y */ 149*8c3ff71bSJunchao Zhang static PetscErrorCode MatMultHermitianTransposeAdd_SeqAIJKokkos(Mat A,Vec xx,Vec yy,Vec zz) 150*8c3ff71bSJunchao Zhang { 151*8c3ff71bSJunchao Zhang PetscErrorCode ierr; 152*8c3ff71bSJunchao Zhang Mat_SeqAIJKokkos *aijkok; 153*8c3ff71bSJunchao Zhang ConstPetscScalarViewDevice_t xv,yv; 154*8c3ff71bSJunchao Zhang PetscScalarViewDevice_t zv; 155*8c3ff71bSJunchao Zhang 156*8c3ff71bSJunchao Zhang PetscFunctionBegin; 157*8c3ff71bSJunchao Zhang ierr = MatSeqAIJKokkosSyncDevice(A);CHKERRQ(ierr); 158*8c3ff71bSJunchao Zhang ierr = VecKokkosGetDeviceViewRead(xx,&xv);CHKERRQ(ierr); 159*8c3ff71bSJunchao Zhang ierr = VecKokkosGetDeviceViewRead(yy,&yv);CHKERRQ(ierr); 160*8c3ff71bSJunchao Zhang ierr = VecKokkosGetDeviceView(zz,&zv);CHKERRQ(ierr); 161*8c3ff71bSJunchao Zhang if (zz != yy) Kokkos::deep_copy(zv,yv); 162*8c3ff71bSJunchao Zhang aijkok = static_cast<Mat_SeqAIJKokkos*>(A->spptr); 163*8c3ff71bSJunchao Zhang KokkosSparse::spmv("C",1.0/*alpha*/,aijkok->csr,xv,1.0/*beta*/,zv); /* z = alpha A^H x + beta z */ 164*8c3ff71bSJunchao Zhang ierr = VecKokkosRestoreDeviceViewRead(xx,&xv);CHKERRQ(ierr); 165*8c3ff71bSJunchao Zhang ierr = VecKokkosRestoreDeviceViewRead(yy,&yv);CHKERRQ(ierr); 166*8c3ff71bSJunchao Zhang ierr = VecKokkosRestoreDeviceView(zz,&zv);CHKERRQ(ierr); 167*8c3ff71bSJunchao Zhang PetscFunctionReturn(0); 168*8c3ff71bSJunchao Zhang } 169*8c3ff71bSJunchao Zhang 170*8c3ff71bSJunchao Zhang PetscErrorCode MatConvert_SeqAIJ_SeqAIJKokkos(Mat A, MatType mtype, MatReuse reuse, Mat* newmat) 171*8c3ff71bSJunchao Zhang { 172*8c3ff71bSJunchao Zhang PetscErrorCode ierr; 173*8c3ff71bSJunchao Zhang Mat B; 174*8c3ff71bSJunchao Zhang 175*8c3ff71bSJunchao Zhang PetscFunctionBegin; 176*8c3ff71bSJunchao Zhang if (reuse == MAT_INITIAL_MATRIX) { /* Build a new mat */ 177*8c3ff71bSJunchao Zhang ierr = MatDuplicate(A,MAT_COPY_VALUES,newmat);CHKERRQ(ierr); 178*8c3ff71bSJunchao Zhang } else if (reuse == MAT_REUSE_MATRIX) { /* Reuse the mat created before */ 179*8c3ff71bSJunchao Zhang ierr = MatCopy(A,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 180*8c3ff71bSJunchao Zhang } 181*8c3ff71bSJunchao Zhang 182*8c3ff71bSJunchao Zhang B = *newmat; 183*8c3ff71bSJunchao Zhang ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr); 184*8c3ff71bSJunchao Zhang ierr = PetscStrallocpy(VECKOKKOS,&B->defaultvectype);CHKERRQ(ierr); 185*8c3ff71bSJunchao Zhang ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJKOKKOS);CHKERRQ(ierr); 186*8c3ff71bSJunchao Zhang ierr = MatSetOps_SeqAIJKokkos(A);CHKERRQ(ierr); 187*8c3ff71bSJunchao Zhang 188*8c3ff71bSJunchao Zhang B->offloadmask = PETSC_OFFLOAD_CPU; 189*8c3ff71bSJunchao Zhang PetscFunctionReturn(0); 190*8c3ff71bSJunchao Zhang } 191*8c3ff71bSJunchao Zhang 192*8c3ff71bSJunchao Zhang static PetscErrorCode MatDuplicate_SeqAIJKokkos(Mat A,MatDuplicateOption cpvalues,Mat *B) 193*8c3ff71bSJunchao Zhang { 194*8c3ff71bSJunchao Zhang PetscErrorCode ierr; 195*8c3ff71bSJunchao Zhang 196*8c3ff71bSJunchao Zhang PetscFunctionBegin; 197*8c3ff71bSJunchao Zhang ierr = MatDuplicate_SeqAIJ(A,cpvalues,B);CHKERRQ(ierr); 198*8c3ff71bSJunchao Zhang ierr = MatConvert_SeqAIJ_SeqAIJKokkos(*B,MATSEQAIJKOKKOS,MAT_INPLACE_MATRIX,B);CHKERRQ(ierr); 199*8c3ff71bSJunchao Zhang PetscFunctionReturn(0); 200*8c3ff71bSJunchao Zhang } 201*8c3ff71bSJunchao Zhang 202*8c3ff71bSJunchao Zhang static PetscErrorCode MatDestroy_SeqAIJKokkos(Mat A) 203*8c3ff71bSJunchao Zhang { 204*8c3ff71bSJunchao Zhang PetscErrorCode ierr; 205*8c3ff71bSJunchao Zhang Mat_SeqAIJKokkos *aijkok = static_cast<Mat_SeqAIJKokkos*>(A->spptr); 206*8c3ff71bSJunchao Zhang 207*8c3ff71bSJunchao Zhang PetscFunctionBegin; 208*8c3ff71bSJunchao Zhang delete aijkok; 209*8c3ff71bSJunchao Zhang ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 210*8c3ff71bSJunchao Zhang PetscFunctionReturn(0); 211*8c3ff71bSJunchao Zhang } 212*8c3ff71bSJunchao Zhang 213*8c3ff71bSJunchao Zhang PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJKokkos(Mat A) 214*8c3ff71bSJunchao Zhang { 215*8c3ff71bSJunchao Zhang PetscErrorCode ierr; 216*8c3ff71bSJunchao Zhang 217*8c3ff71bSJunchao Zhang PetscFunctionBegin; 218*8c3ff71bSJunchao Zhang ierr = PetscKokkosInitializeCheck();CHKERRQ(ierr); 219*8c3ff71bSJunchao Zhang ierr = MatCreate_SeqAIJ(A);CHKERRQ(ierr); 220*8c3ff71bSJunchao Zhang ierr = MatConvert_SeqAIJ_SeqAIJKokkos(A,MATSEQAIJKOKKOS,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 221*8c3ff71bSJunchao Zhang PetscFunctionReturn(0); 222*8c3ff71bSJunchao Zhang } 223*8c3ff71bSJunchao Zhang 224*8c3ff71bSJunchao Zhang static PetscErrorCode MatSetOps_SeqAIJKokkos(Mat A) 225*8c3ff71bSJunchao Zhang { 226*8c3ff71bSJunchao Zhang PetscFunctionBegin; 227*8c3ff71bSJunchao Zhang A->ops->assemblyend = MatAssemblyEnd_SeqAIJKokkos; 228*8c3ff71bSJunchao Zhang A->ops->destroy = MatDestroy_SeqAIJKokkos; 229*8c3ff71bSJunchao Zhang A->ops->duplicate = MatDuplicate_SeqAIJKokkos; 230*8c3ff71bSJunchao Zhang 231*8c3ff71bSJunchao Zhang A->ops->mult = MatMult_SeqAIJKokkos; 232*8c3ff71bSJunchao Zhang A->ops->multadd = MatMultAdd_SeqAIJKokkos; 233*8c3ff71bSJunchao Zhang A->ops->multtranspose = MatMultTranspose_SeqAIJKokkos; 234*8c3ff71bSJunchao Zhang A->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJKokkos; 235*8c3ff71bSJunchao Zhang A->ops->multhermitiantranspose = MatMultHermitianTranspose_SeqAIJKokkos; 236*8c3ff71bSJunchao Zhang A->ops->multhermitiantransposeadd = MatMultHermitianTransposeAdd_SeqAIJKokkos; 237*8c3ff71bSJunchao Zhang PetscFunctionReturn(0); 238*8c3ff71bSJunchao Zhang } 239*8c3ff71bSJunchao Zhang 240*8c3ff71bSJunchao Zhang /* --------------------------------------------------------------------------------*/ 241*8c3ff71bSJunchao Zhang /*C 242*8c3ff71bSJunchao Zhang MatCreateSeqAIJKokkos - Creates a sparse matrix in AIJ (compressed row) format 243*8c3ff71bSJunchao Zhang (the default parallel PETSc format). This matrix will ultimately be handled by 244*8c3ff71bSJunchao Zhang Kokkos for calculations. For good matrix 245*8c3ff71bSJunchao Zhang assembly performance the user should preallocate the matrix storage by setting 246*8c3ff71bSJunchao Zhang the parameter nz (or the array nnz). By setting these parameters accurately, 247*8c3ff71bSJunchao Zhang performance during matrix assembly can be increased by more than a factor of 50. 248*8c3ff71bSJunchao Zhang 249*8c3ff71bSJunchao Zhang Collective 250*8c3ff71bSJunchao Zhang 251*8c3ff71bSJunchao Zhang Input Parameters: 252*8c3ff71bSJunchao Zhang + comm - MPI communicator, set to PETSC_COMM_SELF 253*8c3ff71bSJunchao Zhang . m - number of rows 254*8c3ff71bSJunchao Zhang . n - number of columns 255*8c3ff71bSJunchao Zhang . nz - number of nonzeros per row (same for all rows) 256*8c3ff71bSJunchao Zhang - nnz - array containing the number of nonzeros in the various rows 257*8c3ff71bSJunchao Zhang (possibly different for each row) or NULL 258*8c3ff71bSJunchao Zhang 259*8c3ff71bSJunchao Zhang Output Parameter: 260*8c3ff71bSJunchao Zhang . A - the matrix 261*8c3ff71bSJunchao Zhang 262*8c3ff71bSJunchao Zhang It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 263*8c3ff71bSJunchao Zhang MatXXXXSetPreallocation() paradgm instead of this routine directly. 264*8c3ff71bSJunchao Zhang [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 265*8c3ff71bSJunchao Zhang 266*8c3ff71bSJunchao Zhang Notes: 267*8c3ff71bSJunchao Zhang If nnz is given then nz is ignored 268*8c3ff71bSJunchao Zhang 269*8c3ff71bSJunchao Zhang The AIJ format (also called the Yale sparse matrix format or 270*8c3ff71bSJunchao Zhang compressed row storage), is fully compatible with standard Fortran 77 271*8c3ff71bSJunchao Zhang storage. That is, the stored row and column indices can begin at 272*8c3ff71bSJunchao Zhang either one (as in Fortran) or zero. See the users' manual for details. 273*8c3ff71bSJunchao Zhang 274*8c3ff71bSJunchao Zhang Specify the preallocated storage with either nz or nnz (not both). 275*8c3ff71bSJunchao Zhang Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 276*8c3ff71bSJunchao Zhang allocation. For large problems you MUST preallocate memory or you 277*8c3ff71bSJunchao Zhang will get TERRIBLE performance, see the users' manual chapter on matrices. 278*8c3ff71bSJunchao Zhang 279*8c3ff71bSJunchao Zhang By default, this format uses inodes (identical nodes) when possible, to 280*8c3ff71bSJunchao Zhang improve numerical efficiency of matrix-vector products and solves. We 281*8c3ff71bSJunchao Zhang search for consecutive rows with the same nonzero structure, thereby 282*8c3ff71bSJunchao Zhang reusing matrix information to achieve increased efficiency. 283*8c3ff71bSJunchao Zhang 284*8c3ff71bSJunchao Zhang Level: intermediate 285*8c3ff71bSJunchao Zhang 286*8c3ff71bSJunchao Zhang .seealso: MatCreate(), MatCreateAIJ(), MatSetValues(), MatSeqAIJSetColumnIndices(), MatCreateSeqAIJWithArrays(), MatCreateAIJ(), MATSeqAIJKokkos, MATAIJKOKKOS 287*8c3ff71bSJunchao Zhang @*/ 288*8c3ff71bSJunchao Zhang PetscErrorCode MatCreateSeqAIJKokkos(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 289*8c3ff71bSJunchao Zhang { 290*8c3ff71bSJunchao Zhang PetscErrorCode ierr; 291*8c3ff71bSJunchao Zhang 292*8c3ff71bSJunchao Zhang PetscFunctionBegin; 293*8c3ff71bSJunchao Zhang ierr = PetscKokkosInitializeCheck();CHKERRQ(ierr); 294*8c3ff71bSJunchao Zhang ierr = MatCreate(comm,A);CHKERRQ(ierr); 295*8c3ff71bSJunchao Zhang ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 296*8c3ff71bSJunchao Zhang ierr = MatSetType(*A,MATSEQAIJKOKKOS);CHKERRQ(ierr); 297*8c3ff71bSJunchao Zhang ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,(PetscInt*)nnz);CHKERRQ(ierr); 298*8c3ff71bSJunchao Zhang PetscFunctionReturn(0); 299*8c3ff71bSJunchao Zhang } 300