18c3ff71bSJunchao Zhang #include "petsc/private/petscimpl.h" 28c3ff71bSJunchao Zhang #include <petscsystypes.h> 38c3ff71bSJunchao Zhang #include <petscerror.h> 49f114eb2SJunchao Zhang #include <petscveckokkos.hpp> 58c3ff71bSJunchao Zhang 68c3ff71bSJunchao Zhang #include <Kokkos_Core.hpp> 78c3ff71bSJunchao Zhang #include <KokkosSparse_CrsMatrix.hpp> 88c3ff71bSJunchao Zhang #include <KokkosSparse_spmv.hpp> 98c3ff71bSJunchao Zhang #include <KokkosSparse_spmv.hpp> 108c3ff71bSJunchao Zhang #include <../src/mat/impls/aij/seq/aij.h> 118c3ff71bSJunchao Zhang 128c3ff71bSJunchao Zhang #include <../src/mat/impls/aij/seq/kokkos/aijkokkosimpl.hpp> 138c3ff71bSJunchao Zhang 148c3ff71bSJunchao Zhang static PetscErrorCode MatSetOps_SeqAIJKokkos(Mat); /* Forward declaration */ 158c3ff71bSJunchao Zhang 168c3ff71bSJunchao Zhang static PetscErrorCode MatAssemblyEnd_SeqAIJKokkos(Mat A,MatAssemblyType mode) 178c3ff71bSJunchao Zhang { 188c3ff71bSJunchao Zhang PetscErrorCode ierr; 198c3ff71bSJunchao Zhang 208c3ff71bSJunchao Zhang PetscFunctionBegin; 218c3ff71bSJunchao Zhang ierr = MatAssemblyEnd_SeqAIJ(A,mode);CHKERRQ(ierr); 228c3ff71bSJunchao Zhang A->offloadmask = PETSC_OFFLOAD_CPU; 238c3ff71bSJunchao Zhang /* Don't build (or update) the Mat_SeqAIJKokkos struct. We delay it to the very last moment until we need it. */ 248c3ff71bSJunchao Zhang PetscFunctionReturn(0); 258c3ff71bSJunchao Zhang } 268c3ff71bSJunchao Zhang 278c3ff71bSJunchao Zhang static PetscErrorCode MatSeqAIJKokkosSyncDevice(Mat A) 288c3ff71bSJunchao Zhang { 298c3ff71bSJunchao Zhang Mat_SeqAIJ *aijseq = (Mat_SeqAIJ*)A->data; 308c3ff71bSJunchao Zhang Mat_SeqAIJKokkos *aijkok = static_cast<Mat_SeqAIJKokkos*>(A->spptr); 318c3ff71bSJunchao Zhang PetscInt nrows = A->rmap->n,ncols = A->cmap->n,nnz = aijseq->nz; 328c3ff71bSJunchao Zhang 338c3ff71bSJunchao Zhang PetscFunctionBegin; 348c3ff71bSJunchao Zhang /* If aijkok is not built yet or the nonzero pattern on CPU has changed, build aijkok from the scratch */ 358c3ff71bSJunchao Zhang if (!aijkok || aijkok->nonzerostate != A->nonzerostate) { 368c3ff71bSJunchao Zhang delete aijkok; 378c3ff71bSJunchao Zhang aijkok = new Mat_SeqAIJKokkos(nrows,ncols,nnz,aijseq->i,aijseq->j,aijseq->a); 388c3ff71bSJunchao Zhang aijkok->nonzerostate = A->nonzerostate; 398c3ff71bSJunchao Zhang A->spptr = aijkok; 408c3ff71bSJunchao Zhang } else if (A->offloadmask == PETSC_OFFLOAD_CPU) { /* Copy values only */ 418c3ff71bSJunchao Zhang Kokkos::deep_copy(aijkok->a_d,aijkok->a_h); 428c3ff71bSJunchao Zhang } 438c3ff71bSJunchao Zhang A->offloadmask = PETSC_OFFLOAD_BOTH; 448c3ff71bSJunchao Zhang PetscFunctionReturn(0); 458c3ff71bSJunchao Zhang } 468c3ff71bSJunchao Zhang 478c3ff71bSJunchao Zhang /* y = A x */ 488c3ff71bSJunchao Zhang static PetscErrorCode MatMult_SeqAIJKokkos(Mat A,Vec xx,Vec yy) 498c3ff71bSJunchao Zhang { 508c3ff71bSJunchao Zhang PetscErrorCode ierr; 518c3ff71bSJunchao Zhang Mat_SeqAIJKokkos *aijkok; 528c3ff71bSJunchao Zhang ConstPetscScalarViewDevice_t xv; 538c3ff71bSJunchao Zhang PetscScalarViewDevice_t yv; 548c3ff71bSJunchao Zhang 558c3ff71bSJunchao Zhang PetscFunctionBegin; 568c3ff71bSJunchao Zhang ierr = MatSeqAIJKokkosSyncDevice(A);CHKERRQ(ierr); 578c3ff71bSJunchao Zhang ierr = VecKokkosGetDeviceViewRead(xx,&xv);CHKERRQ(ierr); 588c3ff71bSJunchao Zhang ierr = VecKokkosGetDeviceView(yy,&yv);CHKERRQ(ierr); 598c3ff71bSJunchao Zhang aijkok = static_cast<Mat_SeqAIJKokkos*>(A->spptr); 608c3ff71bSJunchao Zhang KokkosSparse::spmv("N",1.0/*alpha*/,aijkok->csr,xv,0.0/*beta*/,yv); /* y = alpha A x + beta y */ 618c3ff71bSJunchao Zhang ierr = VecKokkosRestoreDeviceViewRead(xx,&xv);CHKERRQ(ierr); 628c3ff71bSJunchao Zhang ierr = VecKokkosRestoreDeviceView(yy,&yv);CHKERRQ(ierr); 63*1e566348SJunchao Zhang /* 2.0*aijkok->csr.nnz()-aijkok->csr.numRows() seems more accurate here but assumes there are no zero-rows. So a little sloopy here. */ 64*1e566348SJunchao Zhang ierr = PetscLogGpuFlops(2.0*aijkok->csr.nnz());CHKERRQ(ierr); 658c3ff71bSJunchao Zhang PetscFunctionReturn(0); 668c3ff71bSJunchao Zhang } 678c3ff71bSJunchao Zhang 688c3ff71bSJunchao Zhang /* y = A^T x */ 698c3ff71bSJunchao Zhang static PetscErrorCode MatMultTranspose_SeqAIJKokkos(Mat A,Vec xx,Vec yy) 708c3ff71bSJunchao Zhang { 718c3ff71bSJunchao Zhang PetscErrorCode ierr; 728c3ff71bSJunchao Zhang Mat_SeqAIJKokkos *aijkok; 738c3ff71bSJunchao Zhang ConstPetscScalarViewDevice_t xv; 748c3ff71bSJunchao Zhang PetscScalarViewDevice_t yv; 758c3ff71bSJunchao Zhang 768c3ff71bSJunchao Zhang PetscFunctionBegin; 778c3ff71bSJunchao Zhang ierr = MatSeqAIJKokkosSyncDevice(A);CHKERRQ(ierr); 788c3ff71bSJunchao Zhang ierr = VecKokkosGetDeviceViewRead(xx,&xv);CHKERRQ(ierr); 798c3ff71bSJunchao Zhang ierr = VecKokkosGetDeviceView(yy,&yv);CHKERRQ(ierr); 808c3ff71bSJunchao Zhang aijkok = static_cast<Mat_SeqAIJKokkos*>(A->spptr); 818c3ff71bSJunchao Zhang KokkosSparse::spmv("T",1.0/*alpha*/,aijkok->csr,xv,0.0/*beta*/,yv); /* y = alpha A^T x + beta y */ 828c3ff71bSJunchao Zhang ierr = VecKokkosRestoreDeviceViewRead(xx,&xv);CHKERRQ(ierr); 838c3ff71bSJunchao Zhang ierr = VecKokkosRestoreDeviceView(yy,&yv);CHKERRQ(ierr); 84*1e566348SJunchao Zhang ierr = PetscLogGpuFlops(2.0*aijkok->csr.nnz());CHKERRQ(ierr); 858c3ff71bSJunchao Zhang PetscFunctionReturn(0); 868c3ff71bSJunchao Zhang } 878c3ff71bSJunchao Zhang 888c3ff71bSJunchao Zhang /* y = A^H x */ 898c3ff71bSJunchao Zhang static PetscErrorCode MatMultHermitianTranspose_SeqAIJKokkos(Mat A,Vec xx,Vec yy) 908c3ff71bSJunchao Zhang { 918c3ff71bSJunchao Zhang PetscErrorCode ierr; 928c3ff71bSJunchao Zhang Mat_SeqAIJKokkos *aijkok; 938c3ff71bSJunchao Zhang ConstPetscScalarViewDevice_t xv; 948c3ff71bSJunchao Zhang PetscScalarViewDevice_t yv; 958c3ff71bSJunchao Zhang 968c3ff71bSJunchao Zhang PetscFunctionBegin; 978c3ff71bSJunchao Zhang ierr = MatSeqAIJKokkosSyncDevice(A);CHKERRQ(ierr); 988c3ff71bSJunchao Zhang ierr = VecKokkosGetDeviceViewRead(xx,&xv);CHKERRQ(ierr); 998c3ff71bSJunchao Zhang ierr = VecKokkosGetDeviceView(yy,&yv);CHKERRQ(ierr); 1008c3ff71bSJunchao Zhang aijkok = static_cast<Mat_SeqAIJKokkos*>(A->spptr); 1018c3ff71bSJunchao Zhang KokkosSparse::spmv("C",1.0/*alpha*/,aijkok->csr,xv,0.0/*beta*/,yv); /* y = alpha A^H x + beta y */ 1028c3ff71bSJunchao Zhang ierr = VecKokkosRestoreDeviceViewRead(xx,&xv);CHKERRQ(ierr); 1038c3ff71bSJunchao Zhang ierr = VecKokkosRestoreDeviceView(yy,&yv);CHKERRQ(ierr); 104*1e566348SJunchao Zhang ierr = PetscLogGpuFlops(2.0*aijkok->csr.nnz());CHKERRQ(ierr); 1058c3ff71bSJunchao Zhang PetscFunctionReturn(0); 1068c3ff71bSJunchao Zhang } 1078c3ff71bSJunchao Zhang 1088c3ff71bSJunchao Zhang /* z = A x + y */ 1098c3ff71bSJunchao Zhang static PetscErrorCode MatMultAdd_SeqAIJKokkos(Mat A,Vec xx,Vec yy, Vec zz) 1108c3ff71bSJunchao Zhang { 1118c3ff71bSJunchao Zhang PetscErrorCode ierr; 1128c3ff71bSJunchao Zhang Mat_SeqAIJKokkos *aijkok; 1138c3ff71bSJunchao Zhang ConstPetscScalarViewDevice_t xv,yv; 1148c3ff71bSJunchao Zhang PetscScalarViewDevice_t zv; 1158c3ff71bSJunchao Zhang 1168c3ff71bSJunchao Zhang PetscFunctionBegin; 1178c3ff71bSJunchao Zhang ierr = MatSeqAIJKokkosSyncDevice(A);CHKERRQ(ierr); 1188c3ff71bSJunchao Zhang ierr = VecKokkosGetDeviceViewRead(xx,&xv);CHKERRQ(ierr); 1198c3ff71bSJunchao Zhang ierr = VecKokkosGetDeviceViewRead(yy,&yv);CHKERRQ(ierr); 1208c3ff71bSJunchao Zhang ierr = VecKokkosGetDeviceView(zz,&zv);CHKERRQ(ierr); 1218c3ff71bSJunchao Zhang if (zz != yy) Kokkos::deep_copy(zv,yv); 1228c3ff71bSJunchao Zhang aijkok = static_cast<Mat_SeqAIJKokkos*>(A->spptr); 1238c3ff71bSJunchao Zhang KokkosSparse::spmv("N",1.0/*alpha*/,aijkok->csr,xv,1.0/*beta*/,zv); /* z = alpha A x + beta z */ 1248c3ff71bSJunchao Zhang ierr = VecKokkosRestoreDeviceViewRead(xx,&xv);CHKERRQ(ierr); 1258c3ff71bSJunchao Zhang ierr = VecKokkosRestoreDeviceViewRead(yy,&yv);CHKERRQ(ierr); 1268c3ff71bSJunchao Zhang ierr = VecKokkosRestoreDeviceView(zz,&zv);CHKERRQ(ierr); 127*1e566348SJunchao Zhang ierr = PetscLogGpuFlops(2.0*aijkok->csr.nnz());CHKERRQ(ierr); 1288c3ff71bSJunchao Zhang PetscFunctionReturn(0); 1298c3ff71bSJunchao Zhang } 1308c3ff71bSJunchao Zhang 1318c3ff71bSJunchao Zhang /* z = A^T x + y */ 1328c3ff71bSJunchao Zhang static PetscErrorCode MatMultTransposeAdd_SeqAIJKokkos(Mat A,Vec xx,Vec yy,Vec zz) 1338c3ff71bSJunchao Zhang { 1348c3ff71bSJunchao Zhang PetscErrorCode ierr; 1358c3ff71bSJunchao Zhang Mat_SeqAIJKokkos *aijkok; 1368c3ff71bSJunchao Zhang ConstPetscScalarViewDevice_t xv,yv; 1378c3ff71bSJunchao Zhang PetscScalarViewDevice_t zv; 1388c3ff71bSJunchao Zhang 1398c3ff71bSJunchao Zhang PetscFunctionBegin; 1408c3ff71bSJunchao Zhang ierr = MatSeqAIJKokkosSyncDevice(A);CHKERRQ(ierr); 1418c3ff71bSJunchao Zhang ierr = VecKokkosGetDeviceViewRead(xx,&xv);CHKERRQ(ierr); 1428c3ff71bSJunchao Zhang ierr = VecKokkosGetDeviceViewRead(yy,&yv);CHKERRQ(ierr); 1438c3ff71bSJunchao Zhang ierr = VecKokkosGetDeviceView(zz,&zv);CHKERRQ(ierr); 1448c3ff71bSJunchao Zhang if (zz != yy) Kokkos::deep_copy(zv,yv); 1458c3ff71bSJunchao Zhang aijkok = static_cast<Mat_SeqAIJKokkos*>(A->spptr); 1468c3ff71bSJunchao Zhang KokkosSparse::spmv("T",1.0/*alpha*/,aijkok->csr,xv,1.0/*beta*/,zv); /* z = alpha A^T x + beta z */ 1478c3ff71bSJunchao Zhang ierr = VecKokkosRestoreDeviceViewRead(xx,&xv);CHKERRQ(ierr); 1488c3ff71bSJunchao Zhang ierr = VecKokkosRestoreDeviceViewRead(yy,&yv);CHKERRQ(ierr); 1498c3ff71bSJunchao Zhang ierr = VecKokkosRestoreDeviceView(zz,&zv);CHKERRQ(ierr); 150*1e566348SJunchao Zhang ierr = PetscLogGpuFlops(2.0*aijkok->csr.nnz());CHKERRQ(ierr); 1518c3ff71bSJunchao Zhang PetscFunctionReturn(0); 1528c3ff71bSJunchao Zhang } 1538c3ff71bSJunchao Zhang 1548c3ff71bSJunchao Zhang /* z = A^H x + y */ 1558c3ff71bSJunchao Zhang static PetscErrorCode MatMultHermitianTransposeAdd_SeqAIJKokkos(Mat A,Vec xx,Vec yy,Vec zz) 1568c3ff71bSJunchao Zhang { 1578c3ff71bSJunchao Zhang PetscErrorCode ierr; 1588c3ff71bSJunchao Zhang Mat_SeqAIJKokkos *aijkok; 1598c3ff71bSJunchao Zhang ConstPetscScalarViewDevice_t xv,yv; 1608c3ff71bSJunchao Zhang PetscScalarViewDevice_t zv; 1618c3ff71bSJunchao Zhang 1628c3ff71bSJunchao Zhang PetscFunctionBegin; 1638c3ff71bSJunchao Zhang ierr = MatSeqAIJKokkosSyncDevice(A);CHKERRQ(ierr); 1648c3ff71bSJunchao Zhang ierr = VecKokkosGetDeviceViewRead(xx,&xv);CHKERRQ(ierr); 1658c3ff71bSJunchao Zhang ierr = VecKokkosGetDeviceViewRead(yy,&yv);CHKERRQ(ierr); 1668c3ff71bSJunchao Zhang ierr = VecKokkosGetDeviceView(zz,&zv);CHKERRQ(ierr); 1678c3ff71bSJunchao Zhang if (zz != yy) Kokkos::deep_copy(zv,yv); 1688c3ff71bSJunchao Zhang aijkok = static_cast<Mat_SeqAIJKokkos*>(A->spptr); 1698c3ff71bSJunchao Zhang KokkosSparse::spmv("C",1.0/*alpha*/,aijkok->csr,xv,1.0/*beta*/,zv); /* z = alpha A^H x + beta z */ 1708c3ff71bSJunchao Zhang ierr = VecKokkosRestoreDeviceViewRead(xx,&xv);CHKERRQ(ierr); 1718c3ff71bSJunchao Zhang ierr = VecKokkosRestoreDeviceViewRead(yy,&yv);CHKERRQ(ierr); 1728c3ff71bSJunchao Zhang ierr = VecKokkosRestoreDeviceView(zz,&zv);CHKERRQ(ierr); 173*1e566348SJunchao Zhang ierr = PetscLogGpuFlops(2.0*aijkok->csr.nnz());CHKERRQ(ierr); 1748c3ff71bSJunchao Zhang PetscFunctionReturn(0); 1758c3ff71bSJunchao Zhang } 1768c3ff71bSJunchao Zhang 1773d0639e7SStefano Zampini PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJKokkos(Mat A, MatType mtype, MatReuse reuse, Mat* newmat) 1788c3ff71bSJunchao Zhang { 1798c3ff71bSJunchao Zhang PetscErrorCode ierr; 1808c3ff71bSJunchao Zhang Mat B; 181c58ef05eSStefano Zampini Mat_SeqAIJ *aij; 1828c3ff71bSJunchao Zhang 1838c3ff71bSJunchao Zhang PetscFunctionBegin; 1848c3ff71bSJunchao Zhang if (reuse == MAT_INITIAL_MATRIX) { /* Build a new mat */ 1858c3ff71bSJunchao Zhang ierr = MatDuplicate(A,MAT_COPY_VALUES,newmat);CHKERRQ(ierr); 1868c3ff71bSJunchao Zhang } else if (reuse == MAT_REUSE_MATRIX) { /* Reuse the mat created before */ 1878c3ff71bSJunchao Zhang ierr = MatCopy(A,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1888c3ff71bSJunchao Zhang } 1898c3ff71bSJunchao Zhang 1908c3ff71bSJunchao Zhang B = *newmat; 1918c3ff71bSJunchao Zhang ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr); 1928c3ff71bSJunchao Zhang ierr = PetscStrallocpy(VECKOKKOS,&B->defaultvectype);CHKERRQ(ierr); 1938c3ff71bSJunchao Zhang ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJKOKKOS);CHKERRQ(ierr); 1940d8bce8aSStefano Zampini ierr = MatSetOps_SeqAIJKokkos(B);CHKERRQ(ierr); 195c58ef05eSStefano Zampini /* TODO: see ViennaCL and CUSPARSE once we have a BindToCPU? */ 196c58ef05eSStefano Zampini aij = (Mat_SeqAIJ*)B->data; 197c58ef05eSStefano Zampini aij->inode.use = PETSC_FALSE; 1988c3ff71bSJunchao Zhang 1998c3ff71bSJunchao Zhang B->offloadmask = PETSC_OFFLOAD_CPU; 2008c3ff71bSJunchao Zhang PetscFunctionReturn(0); 2018c3ff71bSJunchao Zhang } 2028c3ff71bSJunchao Zhang 2038c3ff71bSJunchao Zhang static PetscErrorCode MatDuplicate_SeqAIJKokkos(Mat A,MatDuplicateOption cpvalues,Mat *B) 2048c3ff71bSJunchao Zhang { 2058c3ff71bSJunchao Zhang PetscErrorCode ierr; 2068c3ff71bSJunchao Zhang 2078c3ff71bSJunchao Zhang PetscFunctionBegin; 2088c3ff71bSJunchao Zhang ierr = MatDuplicate_SeqAIJ(A,cpvalues,B);CHKERRQ(ierr); 2098c3ff71bSJunchao Zhang ierr = MatConvert_SeqAIJ_SeqAIJKokkos(*B,MATSEQAIJKOKKOS,MAT_INPLACE_MATRIX,B);CHKERRQ(ierr); 2108c3ff71bSJunchao Zhang PetscFunctionReturn(0); 2118c3ff71bSJunchao Zhang } 2128c3ff71bSJunchao Zhang 2138c3ff71bSJunchao Zhang static PetscErrorCode MatDestroy_SeqAIJKokkos(Mat A) 2148c3ff71bSJunchao Zhang { 2158c3ff71bSJunchao Zhang PetscErrorCode ierr; 2168c3ff71bSJunchao Zhang Mat_SeqAIJKokkos *aijkok = static_cast<Mat_SeqAIJKokkos*>(A->spptr); 2178c3ff71bSJunchao Zhang 2188c3ff71bSJunchao Zhang PetscFunctionBegin; 2198c3ff71bSJunchao Zhang delete aijkok; 2208c3ff71bSJunchao Zhang ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 2218c3ff71bSJunchao Zhang PetscFunctionReturn(0); 2228c3ff71bSJunchao Zhang } 2238c3ff71bSJunchao Zhang 2248c3ff71bSJunchao Zhang PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJKokkos(Mat A) 2258c3ff71bSJunchao Zhang { 2268c3ff71bSJunchao Zhang PetscErrorCode ierr; 2278c3ff71bSJunchao Zhang 2288c3ff71bSJunchao Zhang PetscFunctionBegin; 2298c3ff71bSJunchao Zhang ierr = PetscKokkosInitializeCheck();CHKERRQ(ierr); 2308c3ff71bSJunchao Zhang ierr = MatCreate_SeqAIJ(A);CHKERRQ(ierr); 2318c3ff71bSJunchao Zhang ierr = MatConvert_SeqAIJ_SeqAIJKokkos(A,MATSEQAIJKOKKOS,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 2328c3ff71bSJunchao Zhang PetscFunctionReturn(0); 2338c3ff71bSJunchao Zhang } 2348c3ff71bSJunchao Zhang 2358c3ff71bSJunchao Zhang static PetscErrorCode MatSetOps_SeqAIJKokkos(Mat A) 2368c3ff71bSJunchao Zhang { 2378c3ff71bSJunchao Zhang PetscFunctionBegin; 2388c3ff71bSJunchao Zhang A->ops->assemblyend = MatAssemblyEnd_SeqAIJKokkos; 2398c3ff71bSJunchao Zhang A->ops->destroy = MatDestroy_SeqAIJKokkos; 2408c3ff71bSJunchao Zhang A->ops->duplicate = MatDuplicate_SeqAIJKokkos; 2418c3ff71bSJunchao Zhang 2428c3ff71bSJunchao Zhang A->ops->mult = MatMult_SeqAIJKokkos; 2438c3ff71bSJunchao Zhang A->ops->multadd = MatMultAdd_SeqAIJKokkos; 2448c3ff71bSJunchao Zhang A->ops->multtranspose = MatMultTranspose_SeqAIJKokkos; 2458c3ff71bSJunchao Zhang A->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJKokkos; 2468c3ff71bSJunchao Zhang A->ops->multhermitiantranspose = MatMultHermitianTranspose_SeqAIJKokkos; 2478c3ff71bSJunchao Zhang A->ops->multhermitiantransposeadd = MatMultHermitianTransposeAdd_SeqAIJKokkos; 2488c3ff71bSJunchao Zhang PetscFunctionReturn(0); 2498c3ff71bSJunchao Zhang } 2508c3ff71bSJunchao Zhang 2518c3ff71bSJunchao Zhang /* --------------------------------------------------------------------------------*/ 2528c3ff71bSJunchao Zhang /*C 2538c3ff71bSJunchao Zhang MatCreateSeqAIJKokkos - Creates a sparse matrix in AIJ (compressed row) format 2548c3ff71bSJunchao Zhang (the default parallel PETSc format). This matrix will ultimately be handled by 2558c3ff71bSJunchao Zhang Kokkos for calculations. For good matrix 2568c3ff71bSJunchao Zhang assembly performance the user should preallocate the matrix storage by setting 2578c3ff71bSJunchao Zhang the parameter nz (or the array nnz). By setting these parameters accurately, 2588c3ff71bSJunchao Zhang performance during matrix assembly can be increased by more than a factor of 50. 2598c3ff71bSJunchao Zhang 2608c3ff71bSJunchao Zhang Collective 2618c3ff71bSJunchao Zhang 2628c3ff71bSJunchao Zhang Input Parameters: 2638c3ff71bSJunchao Zhang + comm - MPI communicator, set to PETSC_COMM_SELF 2648c3ff71bSJunchao Zhang . m - number of rows 2658c3ff71bSJunchao Zhang . n - number of columns 2668c3ff71bSJunchao Zhang . nz - number of nonzeros per row (same for all rows) 2678c3ff71bSJunchao Zhang - nnz - array containing the number of nonzeros in the various rows 2688c3ff71bSJunchao Zhang (possibly different for each row) or NULL 2698c3ff71bSJunchao Zhang 2708c3ff71bSJunchao Zhang Output Parameter: 2718c3ff71bSJunchao Zhang . A - the matrix 2728c3ff71bSJunchao Zhang 2738c3ff71bSJunchao Zhang It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 2748c3ff71bSJunchao Zhang MatXXXXSetPreallocation() paradgm instead of this routine directly. 2758c3ff71bSJunchao Zhang [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 2768c3ff71bSJunchao Zhang 2778c3ff71bSJunchao Zhang Notes: 2788c3ff71bSJunchao Zhang If nnz is given then nz is ignored 2798c3ff71bSJunchao Zhang 2808c3ff71bSJunchao Zhang The AIJ format (also called the Yale sparse matrix format or 2818c3ff71bSJunchao Zhang compressed row storage), is fully compatible with standard Fortran 77 2828c3ff71bSJunchao Zhang storage. That is, the stored row and column indices can begin at 2838c3ff71bSJunchao Zhang either one (as in Fortran) or zero. See the users' manual for details. 2848c3ff71bSJunchao Zhang 2858c3ff71bSJunchao Zhang Specify the preallocated storage with either nz or nnz (not both). 2868c3ff71bSJunchao Zhang Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 2878c3ff71bSJunchao Zhang allocation. For large problems you MUST preallocate memory or you 2888c3ff71bSJunchao Zhang will get TERRIBLE performance, see the users' manual chapter on matrices. 2898c3ff71bSJunchao Zhang 2908c3ff71bSJunchao Zhang By default, this format uses inodes (identical nodes) when possible, to 2918c3ff71bSJunchao Zhang improve numerical efficiency of matrix-vector products and solves. We 2928c3ff71bSJunchao Zhang search for consecutive rows with the same nonzero structure, thereby 2938c3ff71bSJunchao Zhang reusing matrix information to achieve increased efficiency. 2948c3ff71bSJunchao Zhang 2958c3ff71bSJunchao Zhang Level: intermediate 2968c3ff71bSJunchao Zhang 2978c3ff71bSJunchao Zhang .seealso: MatCreate(), MatCreateAIJ(), MatSetValues(), MatSeqAIJSetColumnIndices(), MatCreateSeqAIJWithArrays(), MatCreateAIJ(), MATSeqAIJKokkos, MATAIJKOKKOS 2988c3ff71bSJunchao Zhang @*/ 2998c3ff71bSJunchao Zhang PetscErrorCode MatCreateSeqAIJKokkos(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3008c3ff71bSJunchao Zhang { 3018c3ff71bSJunchao Zhang PetscErrorCode ierr; 3028c3ff71bSJunchao Zhang 3038c3ff71bSJunchao Zhang PetscFunctionBegin; 3048c3ff71bSJunchao Zhang ierr = PetscKokkosInitializeCheck();CHKERRQ(ierr); 3058c3ff71bSJunchao Zhang ierr = MatCreate(comm,A);CHKERRQ(ierr); 3068c3ff71bSJunchao Zhang ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3078c3ff71bSJunchao Zhang ierr = MatSetType(*A,MATSEQAIJKOKKOS);CHKERRQ(ierr); 3088c3ff71bSJunchao Zhang ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3098c3ff71bSJunchao Zhang PetscFunctionReturn(0); 3108c3ff71bSJunchao Zhang } 311