xref: /petsc/src/mat/impls/aij/seq/kokkos/aijkok.kokkos.cxx (revision bb2d6e605a67d683a0328b551bfaeeadce8acd85)
18c3ff71bSJunchao Zhang #include "petsc/private/petscimpl.h"
28c3ff71bSJunchao Zhang #include <petscsystypes.h>
38c3ff71bSJunchao Zhang #include <petscerror.h>
49f114eb2SJunchao Zhang #include <petscveckokkos.hpp>
58c3ff71bSJunchao Zhang 
68c3ff71bSJunchao Zhang #include <Kokkos_Core.hpp>
78c3ff71bSJunchao Zhang #include <KokkosSparse_CrsMatrix.hpp>
88c3ff71bSJunchao Zhang #include <KokkosSparse_spmv.hpp>
98c3ff71bSJunchao Zhang #include <KokkosSparse_spmv.hpp>
108c3ff71bSJunchao Zhang #include <../src/mat/impls/aij/seq/aij.h>
118c3ff71bSJunchao Zhang 
128c3ff71bSJunchao Zhang #include <../src/mat/impls/aij/seq/kokkos/aijkokkosimpl.hpp>
138c3ff71bSJunchao Zhang 
148c3ff71bSJunchao Zhang static PetscErrorCode MatSetOps_SeqAIJKokkos(Mat); /* Forward declaration */
158c3ff71bSJunchao Zhang 
168c3ff71bSJunchao Zhang static PetscErrorCode MatAssemblyEnd_SeqAIJKokkos(Mat A,MatAssemblyType mode)
178c3ff71bSJunchao Zhang {
188c3ff71bSJunchao Zhang   PetscErrorCode ierr;
198c3ff71bSJunchao Zhang 
208c3ff71bSJunchao Zhang   PetscFunctionBegin;
218c3ff71bSJunchao Zhang   ierr = MatAssemblyEnd_SeqAIJ(A,mode);CHKERRQ(ierr);
228c3ff71bSJunchao Zhang   A->offloadmask = PETSC_OFFLOAD_CPU;
238c3ff71bSJunchao Zhang   /* Don't build (or update) the Mat_SeqAIJKokkos struct. We delay it to the very last moment until we need it. */
248c3ff71bSJunchao Zhang   PetscFunctionReturn(0);
258c3ff71bSJunchao Zhang }
268c3ff71bSJunchao Zhang 
278c3ff71bSJunchao Zhang static PetscErrorCode MatSeqAIJKokkosSyncDevice(Mat A)
288c3ff71bSJunchao Zhang {
298c3ff71bSJunchao Zhang   Mat_SeqAIJ                *aijseq = (Mat_SeqAIJ*)A->data;
308c3ff71bSJunchao Zhang   Mat_SeqAIJKokkos          *aijkok = static_cast<Mat_SeqAIJKokkos*>(A->spptr);
318c3ff71bSJunchao Zhang   PetscInt                  nrows   = A->rmap->n,ncols = A->cmap->n,nnz = aijseq->nz;
328c3ff71bSJunchao Zhang 
338c3ff71bSJunchao Zhang   PetscFunctionBegin;
348c3ff71bSJunchao Zhang   /* If aijkok is not built yet or the nonzero pattern on CPU has changed, build aijkok from the scratch */
358c3ff71bSJunchao Zhang   if (!aijkok || aijkok->nonzerostate != A->nonzerostate) {
368c3ff71bSJunchao Zhang     delete aijkok;
378c3ff71bSJunchao Zhang     aijkok               = new Mat_SeqAIJKokkos(nrows,ncols,nnz,aijseq->i,aijseq->j,aijseq->a);
388c3ff71bSJunchao Zhang     aijkok->nonzerostate = A->nonzerostate;
398c3ff71bSJunchao Zhang     A->spptr             = aijkok;
408c3ff71bSJunchao Zhang   } else if (A->offloadmask == PETSC_OFFLOAD_CPU) { /* Copy values only */
418c3ff71bSJunchao Zhang     Kokkos::deep_copy(aijkok->a_d,aijkok->a_h);
428c3ff71bSJunchao Zhang   }
438c3ff71bSJunchao Zhang   A->offloadmask = PETSC_OFFLOAD_BOTH;
448c3ff71bSJunchao Zhang   PetscFunctionReturn(0);
458c3ff71bSJunchao Zhang }
468c3ff71bSJunchao Zhang 
478c3ff71bSJunchao Zhang /* y = A x */
488c3ff71bSJunchao Zhang static PetscErrorCode MatMult_SeqAIJKokkos(Mat A,Vec xx,Vec yy)
498c3ff71bSJunchao Zhang {
508c3ff71bSJunchao Zhang   PetscErrorCode                   ierr;
518c3ff71bSJunchao Zhang   Mat_SeqAIJKokkos                 *aijkok;
528c3ff71bSJunchao Zhang   ConstPetscScalarViewDevice_t     xv;
538c3ff71bSJunchao Zhang   PetscScalarViewDevice_t          yv;
548c3ff71bSJunchao Zhang 
558c3ff71bSJunchao Zhang   PetscFunctionBegin;
568c3ff71bSJunchao Zhang   ierr   = MatSeqAIJKokkosSyncDevice(A);CHKERRQ(ierr);
578c3ff71bSJunchao Zhang   ierr   = VecKokkosGetDeviceViewRead(xx,&xv);CHKERRQ(ierr);
588c3ff71bSJunchao Zhang   ierr   = VecKokkosGetDeviceView(yy,&yv);CHKERRQ(ierr);
598c3ff71bSJunchao Zhang   aijkok = static_cast<Mat_SeqAIJKokkos*>(A->spptr);
608c3ff71bSJunchao Zhang   KokkosSparse::spmv("N",1.0/*alpha*/,aijkok->csr,xv,0.0/*beta*/,yv); /* y = alpha A x + beta y */
618c3ff71bSJunchao Zhang   ierr   = VecKokkosRestoreDeviceViewRead(xx,&xv);CHKERRQ(ierr);
628c3ff71bSJunchao Zhang   ierr   = VecKokkosRestoreDeviceView(yy,&yv);CHKERRQ(ierr);
631e566348SJunchao Zhang   /* 2.0*aijkok->csr.nnz()-aijkok->csr.numRows() seems more accurate here but assumes there are no zero-rows. So a little sloopy here. */
64*bb2d6e60SJunchao Zhang   ierr   = WaitForKokkos();CHKERRQ(ierr);
651e566348SJunchao Zhang   ierr   = PetscLogGpuFlops(2.0*aijkok->csr.nnz());CHKERRQ(ierr);
668c3ff71bSJunchao Zhang   PetscFunctionReturn(0);
678c3ff71bSJunchao Zhang }
688c3ff71bSJunchao Zhang 
698c3ff71bSJunchao Zhang /* y = A^T x */
708c3ff71bSJunchao Zhang static PetscErrorCode MatMultTranspose_SeqAIJKokkos(Mat A,Vec xx,Vec yy)
718c3ff71bSJunchao Zhang {
728c3ff71bSJunchao Zhang   PetscErrorCode                   ierr;
738c3ff71bSJunchao Zhang   Mat_SeqAIJKokkos                 *aijkok;
748c3ff71bSJunchao Zhang   ConstPetscScalarViewDevice_t     xv;
758c3ff71bSJunchao Zhang   PetscScalarViewDevice_t          yv;
768c3ff71bSJunchao Zhang 
778c3ff71bSJunchao Zhang   PetscFunctionBegin;
788c3ff71bSJunchao Zhang   ierr = MatSeqAIJKokkosSyncDevice(A);CHKERRQ(ierr);
798c3ff71bSJunchao Zhang   ierr = VecKokkosGetDeviceViewRead(xx,&xv);CHKERRQ(ierr);
808c3ff71bSJunchao Zhang   ierr = VecKokkosGetDeviceView(yy,&yv);CHKERRQ(ierr);
818c3ff71bSJunchao Zhang   aijkok = static_cast<Mat_SeqAIJKokkos*>(A->spptr);
828c3ff71bSJunchao Zhang   KokkosSparse::spmv("T",1.0/*alpha*/,aijkok->csr,xv,0.0/*beta*/,yv); /* y = alpha A^T x + beta y */
838c3ff71bSJunchao Zhang   ierr = VecKokkosRestoreDeviceViewRead(xx,&xv);CHKERRQ(ierr);
848c3ff71bSJunchao Zhang   ierr = VecKokkosRestoreDeviceView(yy,&yv);CHKERRQ(ierr);
85*bb2d6e60SJunchao Zhang   ierr = WaitForKokkos();CHKERRQ(ierr);
861e566348SJunchao Zhang   ierr = PetscLogGpuFlops(2.0*aijkok->csr.nnz());CHKERRQ(ierr);
878c3ff71bSJunchao Zhang   PetscFunctionReturn(0);
888c3ff71bSJunchao Zhang }
898c3ff71bSJunchao Zhang 
908c3ff71bSJunchao Zhang /* y = A^H x */
918c3ff71bSJunchao Zhang static PetscErrorCode MatMultHermitianTranspose_SeqAIJKokkos(Mat A,Vec xx,Vec yy)
928c3ff71bSJunchao Zhang {
938c3ff71bSJunchao Zhang   PetscErrorCode                   ierr;
948c3ff71bSJunchao Zhang   Mat_SeqAIJKokkos                 *aijkok;
958c3ff71bSJunchao Zhang   ConstPetscScalarViewDevice_t     xv;
968c3ff71bSJunchao Zhang   PetscScalarViewDevice_t          yv;
978c3ff71bSJunchao Zhang 
988c3ff71bSJunchao Zhang   PetscFunctionBegin;
998c3ff71bSJunchao Zhang   ierr = MatSeqAIJKokkosSyncDevice(A);CHKERRQ(ierr);
1008c3ff71bSJunchao Zhang   ierr = VecKokkosGetDeviceViewRead(xx,&xv);CHKERRQ(ierr);
1018c3ff71bSJunchao Zhang   ierr = VecKokkosGetDeviceView(yy,&yv);CHKERRQ(ierr);
1028c3ff71bSJunchao Zhang   aijkok = static_cast<Mat_SeqAIJKokkos*>(A->spptr);
1038c3ff71bSJunchao Zhang   KokkosSparse::spmv("C",1.0/*alpha*/,aijkok->csr,xv,0.0/*beta*/,yv); /* y = alpha A^H x + beta y */
1048c3ff71bSJunchao Zhang   ierr = VecKokkosRestoreDeviceViewRead(xx,&xv);CHKERRQ(ierr);
1058c3ff71bSJunchao Zhang   ierr = VecKokkosRestoreDeviceView(yy,&yv);CHKERRQ(ierr);
106*bb2d6e60SJunchao Zhang   ierr = WaitForKokkos();CHKERRQ(ierr);
1071e566348SJunchao Zhang   ierr = PetscLogGpuFlops(2.0*aijkok->csr.nnz());CHKERRQ(ierr);
1088c3ff71bSJunchao Zhang   PetscFunctionReturn(0);
1098c3ff71bSJunchao Zhang }
1108c3ff71bSJunchao Zhang 
1118c3ff71bSJunchao Zhang /* z = A x + y */
1128c3ff71bSJunchao Zhang static PetscErrorCode MatMultAdd_SeqAIJKokkos(Mat A,Vec xx,Vec yy, Vec zz)
1138c3ff71bSJunchao Zhang {
1148c3ff71bSJunchao Zhang   PetscErrorCode                   ierr;
1158c3ff71bSJunchao Zhang   Mat_SeqAIJKokkos                 *aijkok;
1168c3ff71bSJunchao Zhang   ConstPetscScalarViewDevice_t     xv,yv;
1178c3ff71bSJunchao Zhang   PetscScalarViewDevice_t          zv;
1188c3ff71bSJunchao Zhang 
1198c3ff71bSJunchao Zhang   PetscFunctionBegin;
1208c3ff71bSJunchao Zhang   ierr = MatSeqAIJKokkosSyncDevice(A);CHKERRQ(ierr);
1218c3ff71bSJunchao Zhang   ierr = VecKokkosGetDeviceViewRead(xx,&xv);CHKERRQ(ierr);
1228c3ff71bSJunchao Zhang   ierr = VecKokkosGetDeviceViewRead(yy,&yv);CHKERRQ(ierr);
1238c3ff71bSJunchao Zhang   ierr = VecKokkosGetDeviceView(zz,&zv);CHKERRQ(ierr);
1248c3ff71bSJunchao Zhang   if (zz != yy) Kokkos::deep_copy(zv,yv);
1258c3ff71bSJunchao Zhang   aijkok = static_cast<Mat_SeqAIJKokkos*>(A->spptr);
1268c3ff71bSJunchao Zhang   KokkosSparse::spmv("N",1.0/*alpha*/,aijkok->csr,xv,1.0/*beta*/,zv); /* z = alpha A x + beta z */
1278c3ff71bSJunchao Zhang   ierr = VecKokkosRestoreDeviceViewRead(xx,&xv);CHKERRQ(ierr);
1288c3ff71bSJunchao Zhang   ierr = VecKokkosRestoreDeviceViewRead(yy,&yv);CHKERRQ(ierr);
1298c3ff71bSJunchao Zhang   ierr = VecKokkosRestoreDeviceView(zz,&zv);CHKERRQ(ierr);
130*bb2d6e60SJunchao Zhang   ierr = WaitForKokkos();CHKERRQ(ierr);
1311e566348SJunchao Zhang   ierr = PetscLogGpuFlops(2.0*aijkok->csr.nnz());CHKERRQ(ierr);
1328c3ff71bSJunchao Zhang   PetscFunctionReturn(0);
1338c3ff71bSJunchao Zhang }
1348c3ff71bSJunchao Zhang 
1358c3ff71bSJunchao Zhang /* z = A^T x + y */
1368c3ff71bSJunchao Zhang static PetscErrorCode MatMultTransposeAdd_SeqAIJKokkos(Mat A,Vec xx,Vec yy,Vec zz)
1378c3ff71bSJunchao Zhang {
1388c3ff71bSJunchao Zhang   PetscErrorCode                   ierr;
1398c3ff71bSJunchao Zhang   Mat_SeqAIJKokkos                 *aijkok;
1408c3ff71bSJunchao Zhang   ConstPetscScalarViewDevice_t     xv,yv;
1418c3ff71bSJunchao Zhang   PetscScalarViewDevice_t          zv;
1428c3ff71bSJunchao Zhang 
1438c3ff71bSJunchao Zhang   PetscFunctionBegin;
1448c3ff71bSJunchao Zhang   ierr = MatSeqAIJKokkosSyncDevice(A);CHKERRQ(ierr);
1458c3ff71bSJunchao Zhang   ierr = VecKokkosGetDeviceViewRead(xx,&xv);CHKERRQ(ierr);
1468c3ff71bSJunchao Zhang   ierr = VecKokkosGetDeviceViewRead(yy,&yv);CHKERRQ(ierr);
1478c3ff71bSJunchao Zhang   ierr = VecKokkosGetDeviceView(zz,&zv);CHKERRQ(ierr);
1488c3ff71bSJunchao Zhang   if (zz != yy) Kokkos::deep_copy(zv,yv);
1498c3ff71bSJunchao Zhang   aijkok = static_cast<Mat_SeqAIJKokkos*>(A->spptr);
1508c3ff71bSJunchao Zhang   KokkosSparse::spmv("T",1.0/*alpha*/,aijkok->csr,xv,1.0/*beta*/,zv); /* z = alpha A^T x + beta z */
1518c3ff71bSJunchao Zhang   ierr = VecKokkosRestoreDeviceViewRead(xx,&xv);CHKERRQ(ierr);
1528c3ff71bSJunchao Zhang   ierr = VecKokkosRestoreDeviceViewRead(yy,&yv);CHKERRQ(ierr);
1538c3ff71bSJunchao Zhang   ierr = VecKokkosRestoreDeviceView(zz,&zv);CHKERRQ(ierr);
154*bb2d6e60SJunchao Zhang   ierr = WaitForKokkos();CHKERRQ(ierr);
1551e566348SJunchao Zhang   ierr = PetscLogGpuFlops(2.0*aijkok->csr.nnz());CHKERRQ(ierr);
1568c3ff71bSJunchao Zhang   PetscFunctionReturn(0);
1578c3ff71bSJunchao Zhang }
1588c3ff71bSJunchao Zhang 
1598c3ff71bSJunchao Zhang /* z = A^H x + y */
1608c3ff71bSJunchao Zhang static PetscErrorCode MatMultHermitianTransposeAdd_SeqAIJKokkos(Mat A,Vec xx,Vec yy,Vec zz)
1618c3ff71bSJunchao Zhang {
1628c3ff71bSJunchao Zhang   PetscErrorCode                   ierr;
1638c3ff71bSJunchao Zhang   Mat_SeqAIJKokkos                 *aijkok;
1648c3ff71bSJunchao Zhang   ConstPetscScalarViewDevice_t     xv,yv;
1658c3ff71bSJunchao Zhang   PetscScalarViewDevice_t          zv;
1668c3ff71bSJunchao Zhang 
1678c3ff71bSJunchao Zhang   PetscFunctionBegin;
1688c3ff71bSJunchao Zhang   ierr = MatSeqAIJKokkosSyncDevice(A);CHKERRQ(ierr);
1698c3ff71bSJunchao Zhang   ierr = VecKokkosGetDeviceViewRead(xx,&xv);CHKERRQ(ierr);
1708c3ff71bSJunchao Zhang   ierr = VecKokkosGetDeviceViewRead(yy,&yv);CHKERRQ(ierr);
1718c3ff71bSJunchao Zhang   ierr = VecKokkosGetDeviceView(zz,&zv);CHKERRQ(ierr);
1728c3ff71bSJunchao Zhang   if (zz != yy) Kokkos::deep_copy(zv,yv);
1738c3ff71bSJunchao Zhang   aijkok = static_cast<Mat_SeqAIJKokkos*>(A->spptr);
1748c3ff71bSJunchao Zhang   KokkosSparse::spmv("C",1.0/*alpha*/,aijkok->csr,xv,1.0/*beta*/,zv); /* z = alpha A^H x + beta z */
1758c3ff71bSJunchao Zhang   ierr = VecKokkosRestoreDeviceViewRead(xx,&xv);CHKERRQ(ierr);
1768c3ff71bSJunchao Zhang   ierr = VecKokkosRestoreDeviceViewRead(yy,&yv);CHKERRQ(ierr);
1778c3ff71bSJunchao Zhang   ierr = VecKokkosRestoreDeviceView(zz,&zv);CHKERRQ(ierr);
178*bb2d6e60SJunchao Zhang   ierr = WaitForKokkos();CHKERRQ(ierr);
1791e566348SJunchao Zhang   ierr = PetscLogGpuFlops(2.0*aijkok->csr.nnz());CHKERRQ(ierr);
1808c3ff71bSJunchao Zhang   PetscFunctionReturn(0);
1818c3ff71bSJunchao Zhang }
1828c3ff71bSJunchao Zhang 
1833d0639e7SStefano Zampini PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJKokkos(Mat A, MatType mtype, MatReuse reuse, Mat* newmat)
1848c3ff71bSJunchao Zhang {
1858c3ff71bSJunchao Zhang   PetscErrorCode   ierr;
1868c3ff71bSJunchao Zhang   Mat              B;
187c58ef05eSStefano Zampini   Mat_SeqAIJ       *aij;
1888c3ff71bSJunchao Zhang 
1898c3ff71bSJunchao Zhang   PetscFunctionBegin;
1908c3ff71bSJunchao Zhang   if (reuse == MAT_INITIAL_MATRIX) { /* Build a new mat */
1918c3ff71bSJunchao Zhang     ierr = MatDuplicate(A,MAT_COPY_VALUES,newmat);CHKERRQ(ierr);
1928c3ff71bSJunchao Zhang   } else if (reuse == MAT_REUSE_MATRIX) { /* Reuse the mat created before */
1938c3ff71bSJunchao Zhang     ierr = MatCopy(A,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1948c3ff71bSJunchao Zhang   }
1958c3ff71bSJunchao Zhang 
1968c3ff71bSJunchao Zhang   B    = *newmat;
1978c3ff71bSJunchao Zhang   ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr);
1988c3ff71bSJunchao Zhang   ierr = PetscStrallocpy(VECKOKKOS,&B->defaultvectype);CHKERRQ(ierr);
1998c3ff71bSJunchao Zhang   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJKOKKOS);CHKERRQ(ierr);
2000d8bce8aSStefano Zampini   ierr = MatSetOps_SeqAIJKokkos(B);CHKERRQ(ierr);
201c58ef05eSStefano Zampini   /* TODO: see ViennaCL and CUSPARSE once we have a BindToCPU? */
202c58ef05eSStefano Zampini   aij  = (Mat_SeqAIJ*)B->data;
203c58ef05eSStefano Zampini   aij->inode.use = PETSC_FALSE;
2048c3ff71bSJunchao Zhang 
2058c3ff71bSJunchao Zhang   B->offloadmask = PETSC_OFFLOAD_CPU;
2068c3ff71bSJunchao Zhang   PetscFunctionReturn(0);
2078c3ff71bSJunchao Zhang }
2088c3ff71bSJunchao Zhang 
2098c3ff71bSJunchao Zhang static PetscErrorCode MatDuplicate_SeqAIJKokkos(Mat A,MatDuplicateOption cpvalues,Mat *B)
2108c3ff71bSJunchao Zhang {
2118c3ff71bSJunchao Zhang   PetscErrorCode ierr;
2128c3ff71bSJunchao Zhang 
2138c3ff71bSJunchao Zhang   PetscFunctionBegin;
2148c3ff71bSJunchao Zhang   ierr = MatDuplicate_SeqAIJ(A,cpvalues,B);CHKERRQ(ierr);
2158c3ff71bSJunchao Zhang   ierr = MatConvert_SeqAIJ_SeqAIJKokkos(*B,MATSEQAIJKOKKOS,MAT_INPLACE_MATRIX,B);CHKERRQ(ierr);
2168c3ff71bSJunchao Zhang   PetscFunctionReturn(0);
2178c3ff71bSJunchao Zhang }
2188c3ff71bSJunchao Zhang 
2198c3ff71bSJunchao Zhang static PetscErrorCode MatDestroy_SeqAIJKokkos(Mat A)
2208c3ff71bSJunchao Zhang {
2218c3ff71bSJunchao Zhang   PetscErrorCode        ierr;
2228c3ff71bSJunchao Zhang   Mat_SeqAIJKokkos      *aijkok = static_cast<Mat_SeqAIJKokkos*>(A->spptr);
2238c3ff71bSJunchao Zhang 
2248c3ff71bSJunchao Zhang   PetscFunctionBegin;
2258c3ff71bSJunchao Zhang   delete aijkok;
2268c3ff71bSJunchao Zhang   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
2278c3ff71bSJunchao Zhang   PetscFunctionReturn(0);
2288c3ff71bSJunchao Zhang }
2298c3ff71bSJunchao Zhang 
2308c3ff71bSJunchao Zhang PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJKokkos(Mat A)
2318c3ff71bSJunchao Zhang {
2328c3ff71bSJunchao Zhang   PetscErrorCode ierr;
2338c3ff71bSJunchao Zhang 
2348c3ff71bSJunchao Zhang   PetscFunctionBegin;
2358c3ff71bSJunchao Zhang   ierr = PetscKokkosInitializeCheck();CHKERRQ(ierr);
2368c3ff71bSJunchao Zhang   ierr = MatCreate_SeqAIJ(A);CHKERRQ(ierr);
2378c3ff71bSJunchao Zhang   ierr = MatConvert_SeqAIJ_SeqAIJKokkos(A,MATSEQAIJKOKKOS,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
2388c3ff71bSJunchao Zhang   PetscFunctionReturn(0);
2398c3ff71bSJunchao Zhang }
2408c3ff71bSJunchao Zhang 
2418c3ff71bSJunchao Zhang static PetscErrorCode MatSetOps_SeqAIJKokkos(Mat A)
2428c3ff71bSJunchao Zhang {
2438c3ff71bSJunchao Zhang   PetscFunctionBegin;
2448c3ff71bSJunchao Zhang   A->ops->assemblyend               = MatAssemblyEnd_SeqAIJKokkos;
2458c3ff71bSJunchao Zhang   A->ops->destroy                   = MatDestroy_SeqAIJKokkos;
2468c3ff71bSJunchao Zhang   A->ops->duplicate                 = MatDuplicate_SeqAIJKokkos;
2478c3ff71bSJunchao Zhang 
2488c3ff71bSJunchao Zhang   A->ops->mult                      = MatMult_SeqAIJKokkos;
2498c3ff71bSJunchao Zhang   A->ops->multadd                   = MatMultAdd_SeqAIJKokkos;
2508c3ff71bSJunchao Zhang   A->ops->multtranspose             = MatMultTranspose_SeqAIJKokkos;
2518c3ff71bSJunchao Zhang   A->ops->multtransposeadd          = MatMultTransposeAdd_SeqAIJKokkos;
2528c3ff71bSJunchao Zhang   A->ops->multhermitiantranspose    = MatMultHermitianTranspose_SeqAIJKokkos;
2538c3ff71bSJunchao Zhang   A->ops->multhermitiantransposeadd = MatMultHermitianTransposeAdd_SeqAIJKokkos;
2548c3ff71bSJunchao Zhang   PetscFunctionReturn(0);
2558c3ff71bSJunchao Zhang }
2568c3ff71bSJunchao Zhang 
2578c3ff71bSJunchao Zhang /* --------------------------------------------------------------------------------*/
2588c3ff71bSJunchao Zhang /*C
2598c3ff71bSJunchao Zhang    MatCreateSeqAIJKokkos - Creates a sparse matrix in AIJ (compressed row) format
2608c3ff71bSJunchao Zhang    (the default parallel PETSc format). This matrix will ultimately be handled by
2618c3ff71bSJunchao Zhang    Kokkos for calculations. For good matrix
2628c3ff71bSJunchao Zhang    assembly performance the user should preallocate the matrix storage by setting
2638c3ff71bSJunchao Zhang    the parameter nz (or the array nnz).  By setting these parameters accurately,
2648c3ff71bSJunchao Zhang    performance during matrix assembly can be increased by more than a factor of 50.
2658c3ff71bSJunchao Zhang 
2668c3ff71bSJunchao Zhang    Collective
2678c3ff71bSJunchao Zhang 
2688c3ff71bSJunchao Zhang    Input Parameters:
2698c3ff71bSJunchao Zhang +  comm - MPI communicator, set to PETSC_COMM_SELF
2708c3ff71bSJunchao Zhang .  m - number of rows
2718c3ff71bSJunchao Zhang .  n - number of columns
2728c3ff71bSJunchao Zhang .  nz - number of nonzeros per row (same for all rows)
2738c3ff71bSJunchao Zhang -  nnz - array containing the number of nonzeros in the various rows
2748c3ff71bSJunchao Zhang          (possibly different for each row) or NULL
2758c3ff71bSJunchao Zhang 
2768c3ff71bSJunchao Zhang    Output Parameter:
2778c3ff71bSJunchao Zhang .  A - the matrix
2788c3ff71bSJunchao Zhang 
2798c3ff71bSJunchao Zhang    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
2808c3ff71bSJunchao Zhang    MatXXXXSetPreallocation() paradgm instead of this routine directly.
2818c3ff71bSJunchao Zhang    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
2828c3ff71bSJunchao Zhang 
2838c3ff71bSJunchao Zhang    Notes:
2848c3ff71bSJunchao Zhang    If nnz is given then nz is ignored
2858c3ff71bSJunchao Zhang 
2868c3ff71bSJunchao Zhang    The AIJ format (also called the Yale sparse matrix format or
2878c3ff71bSJunchao Zhang    compressed row storage), is fully compatible with standard Fortran 77
2888c3ff71bSJunchao Zhang    storage.  That is, the stored row and column indices can begin at
2898c3ff71bSJunchao Zhang    either one (as in Fortran) or zero.  See the users' manual for details.
2908c3ff71bSJunchao Zhang 
2918c3ff71bSJunchao Zhang    Specify the preallocated storage with either nz or nnz (not both).
2928c3ff71bSJunchao Zhang    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
2938c3ff71bSJunchao Zhang    allocation.  For large problems you MUST preallocate memory or you
2948c3ff71bSJunchao Zhang    will get TERRIBLE performance, see the users' manual chapter on matrices.
2958c3ff71bSJunchao Zhang 
2968c3ff71bSJunchao Zhang    By default, this format uses inodes (identical nodes) when possible, to
2978c3ff71bSJunchao Zhang    improve numerical efficiency of matrix-vector products and solves. We
2988c3ff71bSJunchao Zhang    search for consecutive rows with the same nonzero structure, thereby
2998c3ff71bSJunchao Zhang    reusing matrix information to achieve increased efficiency.
3008c3ff71bSJunchao Zhang 
3018c3ff71bSJunchao Zhang    Level: intermediate
3028c3ff71bSJunchao Zhang 
3038c3ff71bSJunchao Zhang .seealso: MatCreate(), MatCreateAIJ(), MatSetValues(), MatSeqAIJSetColumnIndices(), MatCreateSeqAIJWithArrays(), MatCreateAIJ(), MATSeqAIJKokkos, MATAIJKOKKOS
3048c3ff71bSJunchao Zhang @*/
3058c3ff71bSJunchao Zhang PetscErrorCode  MatCreateSeqAIJKokkos(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3068c3ff71bSJunchao Zhang {
3078c3ff71bSJunchao Zhang   PetscErrorCode ierr;
3088c3ff71bSJunchao Zhang 
3098c3ff71bSJunchao Zhang   PetscFunctionBegin;
3108c3ff71bSJunchao Zhang   ierr = PetscKokkosInitializeCheck();CHKERRQ(ierr);
3118c3ff71bSJunchao Zhang   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3128c3ff71bSJunchao Zhang   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3138c3ff71bSJunchao Zhang   ierr = MatSetType(*A,MATSEQAIJKOKKOS);CHKERRQ(ierr);
3148c3ff71bSJunchao Zhang   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3158c3ff71bSJunchao Zhang   PetscFunctionReturn(0);
3168c3ff71bSJunchao Zhang }
317