xref: /petsc/src/mat/impls/aij/mpi/kokkos/mpiaijkok.kokkos.cxx (revision 3d0639e713175fd8812218a04e351b221c8b6709)
18c3ff71bSJunchao Zhang #include <petscconf.h>
28c3ff71bSJunchao Zhang #include <../src/mat/impls/aij/mpi/mpiaij.h>
38c3ff71bSJunchao Zhang 
48c3ff71bSJunchao Zhang PetscErrorCode MatAssemblyEnd_MPIAIJKokkos(Mat A,MatAssemblyType mode)
58c3ff71bSJunchao Zhang {
68c3ff71bSJunchao Zhang   PetscErrorCode ierr;
78c3ff71bSJunchao Zhang   Mat_MPIAIJ     *mpiaij = (Mat_MPIAIJ*)A->data;
88c3ff71bSJunchao Zhang 
98c3ff71bSJunchao Zhang   PetscFunctionBegin;
108c3ff71bSJunchao Zhang   ierr = MatAssemblyEnd_MPIAIJ(A,mode);CHKERRQ(ierr);
118c3ff71bSJunchao Zhang   if (!A->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
128c3ff71bSJunchao Zhang     ierr = VecSetType(mpiaij->lvec,VECSEQKOKKOS);CHKERRQ(ierr);
138c3ff71bSJunchao Zhang   }
148c3ff71bSJunchao Zhang   PetscFunctionReturn(0);
158c3ff71bSJunchao Zhang }
168c3ff71bSJunchao Zhang 
178c3ff71bSJunchao Zhang PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJKokkos(Mat mat,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
188c3ff71bSJunchao Zhang {
198c3ff71bSJunchao Zhang   PetscErrorCode     ierr;
208c3ff71bSJunchao Zhang   PetscInt           i;
218c3ff71bSJunchao Zhang   Mat_MPIAIJ         *mpiaij = (Mat_MPIAIJ*)mat->data;
228c3ff71bSJunchao Zhang 
238c3ff71bSJunchao Zhang 
248c3ff71bSJunchao Zhang   PetscFunctionBegin;
258c3ff71bSJunchao Zhang   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
268c3ff71bSJunchao Zhang   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
278c3ff71bSJunchao Zhang   if (d_nnz) {
288c3ff71bSJunchao Zhang     for (i=0; i<mat->rmap->n; i++) {
298c3ff71bSJunchao Zhang       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
308c3ff71bSJunchao Zhang     }
318c3ff71bSJunchao Zhang   }
328c3ff71bSJunchao Zhang   if (o_nnz) {
338c3ff71bSJunchao Zhang     for (i=0; i<mat->rmap->n; i++) {
348c3ff71bSJunchao Zhang       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
358c3ff71bSJunchao Zhang     }
368c3ff71bSJunchao Zhang   }
378c3ff71bSJunchao Zhang   if (!mat->preallocated) {
388c3ff71bSJunchao Zhang     /* Explicitly create 2 MATSEQAIJKOKKOS matrices. */
398c3ff71bSJunchao Zhang     ierr = MatCreate(PETSC_COMM_SELF,&mpiaij->A);CHKERRQ(ierr);
408c3ff71bSJunchao Zhang     ierr = MatSetSizes(mpiaij->A,mat->rmap->n,mat->cmap->n,mat->rmap->n,mat->cmap->n);CHKERRQ(ierr);
418c3ff71bSJunchao Zhang     ierr = MatSetType(mpiaij->A,MATSEQAIJKOKKOS);CHKERRQ(ierr);
428c3ff71bSJunchao Zhang     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)mpiaij->A);CHKERRQ(ierr);
438c3ff71bSJunchao Zhang     ierr = MatCreate(PETSC_COMM_SELF,&mpiaij->B);CHKERRQ(ierr);
448c3ff71bSJunchao Zhang     ierr = MatSetSizes(mpiaij->B,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
458c3ff71bSJunchao Zhang     ierr = MatSetType(mpiaij->B,MATSEQAIJKOKKOS);CHKERRQ(ierr);
468c3ff71bSJunchao Zhang     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)mpiaij->B);CHKERRQ(ierr);
478c3ff71bSJunchao Zhang   }
488c3ff71bSJunchao Zhang   ierr = MatSeqAIJSetPreallocation(mpiaij->A,d_nz,d_nnz);CHKERRQ(ierr);
498c3ff71bSJunchao Zhang   ierr = MatSeqAIJSetPreallocation(mpiaij->B,o_nz,o_nnz);CHKERRQ(ierr);
508c3ff71bSJunchao Zhang 
518c3ff71bSJunchao Zhang   mat->preallocated = PETSC_TRUE;
528c3ff71bSJunchao Zhang   PetscFunctionReturn(0);
538c3ff71bSJunchao Zhang }
548c3ff71bSJunchao Zhang 
558c3ff71bSJunchao Zhang PetscErrorCode MatMult_MPIAIJKokkos(Mat mat,Vec xx,Vec yy)
568c3ff71bSJunchao Zhang {
578c3ff71bSJunchao Zhang   Mat_MPIAIJ     *mpiaij = (Mat_MPIAIJ*)mat->data;
588c3ff71bSJunchao Zhang   PetscErrorCode ierr;
598c3ff71bSJunchao Zhang   PetscInt       nt;
608c3ff71bSJunchao Zhang 
618c3ff71bSJunchao Zhang   PetscFunctionBegin;
628c3ff71bSJunchao Zhang   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
638c3ff71bSJunchao Zhang   if (nt != mat->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of mat (%D) and xx (%D)",mat->cmap->n,nt);
648c3ff71bSJunchao Zhang   ierr = VecScatterBegin(mpiaij->Mvctx,xx,mpiaij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
658c3ff71bSJunchao Zhang   ierr = (*mpiaij->A->ops->mult)(mpiaij->A,xx,yy);CHKERRQ(ierr);
668c3ff71bSJunchao Zhang   ierr = VecScatterEnd(mpiaij->Mvctx,xx,mpiaij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
678c3ff71bSJunchao Zhang   ierr = (*mpiaij->B->ops->multadd)(mpiaij->B,mpiaij->lvec,yy,yy);CHKERRQ(ierr);
688c3ff71bSJunchao Zhang   PetscFunctionReturn(0);
698c3ff71bSJunchao Zhang }
708c3ff71bSJunchao Zhang 
718c3ff71bSJunchao Zhang PetscErrorCode MatMultAdd_MPIAIJKokkos(Mat mat,Vec xx,Vec yy,Vec zz)
728c3ff71bSJunchao Zhang {
738c3ff71bSJunchao Zhang   Mat_MPIAIJ     *mpiaij = (Mat_MPIAIJ*)mat->data;
748c3ff71bSJunchao Zhang   PetscErrorCode ierr;
758c3ff71bSJunchao Zhang   PetscInt       nt;
768c3ff71bSJunchao Zhang 
778c3ff71bSJunchao Zhang   PetscFunctionBegin;
788c3ff71bSJunchao Zhang   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
798c3ff71bSJunchao Zhang   if (nt != mat->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of mat (%D) and xx (%D)",mat->cmap->n,nt);
808c3ff71bSJunchao Zhang   ierr = VecScatterBegin(mpiaij->Mvctx,xx,mpiaij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
818c3ff71bSJunchao Zhang   ierr = (*mpiaij->A->ops->multadd)(mpiaij->A,xx,yy,zz);CHKERRQ(ierr);
828c3ff71bSJunchao Zhang   ierr = VecScatterEnd(mpiaij->Mvctx,xx,mpiaij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
838c3ff71bSJunchao Zhang   ierr = (*mpiaij->B->ops->multadd)(mpiaij->B,mpiaij->lvec,zz,zz);CHKERRQ(ierr);
848c3ff71bSJunchao Zhang   PetscFunctionReturn(0);
858c3ff71bSJunchao Zhang }
868c3ff71bSJunchao Zhang 
878c3ff71bSJunchao Zhang PetscErrorCode MatMultTranspose_MPIAIJKokkos(Mat mat,Vec xx,Vec yy)
888c3ff71bSJunchao Zhang {
898c3ff71bSJunchao Zhang   Mat_MPIAIJ     *mpiaij = (Mat_MPIAIJ*)mat->data;
908c3ff71bSJunchao Zhang   PetscErrorCode ierr;
918c3ff71bSJunchao Zhang   PetscInt       nt;
928c3ff71bSJunchao Zhang 
938c3ff71bSJunchao Zhang   PetscFunctionBegin;
948c3ff71bSJunchao Zhang   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
958c3ff71bSJunchao Zhang   if (nt != mat->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of mat (%D) and xx (%D)",mat->rmap->n,nt);
968c3ff71bSJunchao Zhang   ierr = (*mpiaij->B->ops->multtranspose)(mpiaij->B,xx,mpiaij->lvec);CHKERRQ(ierr);
978c3ff71bSJunchao Zhang   ierr = (*mpiaij->A->ops->multtranspose)(mpiaij->A,xx,yy);CHKERRQ(ierr);
988c3ff71bSJunchao Zhang   ierr = VecScatterBegin(mpiaij->Mvctx,mpiaij->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
998c3ff71bSJunchao Zhang   ierr = VecScatterEnd(mpiaij->Mvctx,mpiaij->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1008c3ff71bSJunchao Zhang   PetscFunctionReturn(0);
1018c3ff71bSJunchao Zhang }
1028c3ff71bSJunchao Zhang 
1038c3ff71bSJunchao Zhang PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat A, MatType mtype, MatReuse reuse, Mat* newmat)
1048c3ff71bSJunchao Zhang {
1058c3ff71bSJunchao Zhang   PetscErrorCode     ierr;
1068c3ff71bSJunchao Zhang   Mat                B;
1078c3ff71bSJunchao Zhang 
1088c3ff71bSJunchao Zhang   PetscFunctionBegin;
1098c3ff71bSJunchao Zhang   if (reuse == MAT_INITIAL_MATRIX) {
1108c3ff71bSJunchao Zhang     ierr = MatDuplicate(A,MAT_COPY_VALUES,newmat);CHKERRQ(ierr);
1118c3ff71bSJunchao Zhang   } else if (reuse == MAT_REUSE_MATRIX) {
1128c3ff71bSJunchao Zhang     ierr = MatCopy(A,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1138c3ff71bSJunchao Zhang   }
1148c3ff71bSJunchao Zhang   B = *newmat;
1158c3ff71bSJunchao Zhang 
1168c3ff71bSJunchao Zhang   ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr);
1178c3ff71bSJunchao Zhang   ierr = PetscStrallocpy(VECKOKKOS,&B->defaultvectype);CHKERRQ(ierr);
118*3d0639e7SStefano Zampini   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJKOKKOS);CHKERRQ(ierr);
1198c3ff71bSJunchao Zhang 
1208c3ff71bSJunchao Zhang   B->ops->assemblyend    = MatAssemblyEnd_MPIAIJKokkos;
1218c3ff71bSJunchao Zhang   B->ops->mult           = MatMult_MPIAIJKokkos;
1228c3ff71bSJunchao Zhang   B->ops->multadd        = MatMultAdd_MPIAIJKokkos;
1238c3ff71bSJunchao Zhang   B->ops->multtranspose  = MatMultTranspose_MPIAIJKokkos;
1248c3ff71bSJunchao Zhang 
125*3d0639e7SStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJKokkos);CHKERRQ(ierr);
1268c3ff71bSJunchao Zhang   PetscFunctionReturn(0);
1278c3ff71bSJunchao Zhang }
1288c3ff71bSJunchao Zhang 
1298c3ff71bSJunchao Zhang PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJKokkos(Mat A)
1308c3ff71bSJunchao Zhang {
1318c3ff71bSJunchao Zhang   PetscErrorCode ierr;
1328c3ff71bSJunchao Zhang 
1338c3ff71bSJunchao Zhang   PetscFunctionBegin;
1348c3ff71bSJunchao Zhang   ierr = PetscKokkosInitializeCheck();CHKERRQ(ierr);
1358c3ff71bSJunchao Zhang   ierr = MatCreate_MPIAIJ(A);CHKERRQ(ierr);
1368c3ff71bSJunchao Zhang   ierr = MatConvert_MPIAIJ_MPIAIJKokkos(A,MATMPIAIJKOKKOS,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
1378c3ff71bSJunchao Zhang   PetscFunctionReturn(0);
1388c3ff71bSJunchao Zhang }
1398c3ff71bSJunchao Zhang 
1408c3ff71bSJunchao Zhang /*@C
1418c3ff71bSJunchao Zhang    MatCreateAIJKokkos - Creates a sparse matrix in AIJ (compressed row) format
1428c3ff71bSJunchao Zhang    (the default parallel PETSc format).  This matrix will ultimately pushed down
1438c3ff71bSJunchao Zhang    to Kokkos for calculations. For good matrix
1448c3ff71bSJunchao Zhang    assembly performance the user should preallocate the matrix storage by setting
1458c3ff71bSJunchao Zhang    the parameter nz (or the array nnz).  By setting these parameters accurately,
1468c3ff71bSJunchao Zhang    performance during matrix assembly can be increased by more than a factor of 50.
1478c3ff71bSJunchao Zhang 
1488c3ff71bSJunchao Zhang    Collective
1498c3ff71bSJunchao Zhang 
1508c3ff71bSJunchao Zhang    Input Parameters:
1518c3ff71bSJunchao Zhang +  comm - MPI communicator, set to PETSC_COMM_SELF
1528c3ff71bSJunchao Zhang .  m - number of rows
1538c3ff71bSJunchao Zhang .  n - number of columns
1548c3ff71bSJunchao Zhang .  nz - number of nonzeros per row (same for all rows)
1558c3ff71bSJunchao Zhang -  nnz - array containing the number of nonzeros in the various rows
1568c3ff71bSJunchao Zhang          (possibly different for each row) or NULL
1578c3ff71bSJunchao Zhang 
1588c3ff71bSJunchao Zhang    Output Parameter:
1598c3ff71bSJunchao Zhang .  A - the matrix
1608c3ff71bSJunchao Zhang 
1618c3ff71bSJunchao Zhang    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
1628c3ff71bSJunchao Zhang    MatXXXXSetPreallocation() paradigm instead of this routine directly.
1638c3ff71bSJunchao Zhang    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
1648c3ff71bSJunchao Zhang 
1658c3ff71bSJunchao Zhang    Notes:
1668c3ff71bSJunchao Zhang    If nnz is given then nz is ignored
1678c3ff71bSJunchao Zhang 
1688c3ff71bSJunchao Zhang    The AIJ format (also called the Yale sparse matrix format or
1698c3ff71bSJunchao Zhang    compressed row storage), is fully compatible with standard Fortran 77
1708c3ff71bSJunchao Zhang    storage.  That is, the stored row and column indices can begin at
1718c3ff71bSJunchao Zhang    either one (as in Fortran) or zero.  See the users' manual for details.
1728c3ff71bSJunchao Zhang 
1738c3ff71bSJunchao Zhang    Specify the preallocated storage with either nz or nnz (not both).
1748c3ff71bSJunchao Zhang    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
1758c3ff71bSJunchao Zhang    allocation.  For large problems you MUST preallocate memory or you
1768c3ff71bSJunchao Zhang    will get TERRIBLE performance, see the users' manual chapter on matrices.
1778c3ff71bSJunchao Zhang 
1788c3ff71bSJunchao Zhang    By default, this format uses inodes (identical nodes) when possible, to
1798c3ff71bSJunchao Zhang    improve numerical efficiency of matrix-vector products and solves. We
1808c3ff71bSJunchao Zhang    search for consecutive rows with the same nonzero structure, thereby
1818c3ff71bSJunchao Zhang    reusing matrix information to achieve increased efficiency.
1828c3ff71bSJunchao Zhang 
1838c3ff71bSJunchao Zhang    Level: intermediate
1848c3ff71bSJunchao Zhang 
1858c3ff71bSJunchao Zhang .seealso: MatCreate(), MatCreateAIJ(), MatSetValues(), MatSeqAIJSetColumnIndices(), MatCreateSeqAIJWithArrays(), MatCreateAIJ(), MATMPIAIJKOKKOS, MATAIJKokkos
1868c3ff71bSJunchao Zhang @*/
1878c3ff71bSJunchao Zhang PetscErrorCode  MatCreateAIJKokkos(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
1888c3ff71bSJunchao Zhang {
1898c3ff71bSJunchao Zhang   PetscErrorCode ierr;
1908c3ff71bSJunchao Zhang   PetscMPIInt    size;
1918c3ff71bSJunchao Zhang 
1928c3ff71bSJunchao Zhang   PetscFunctionBegin;
1938c3ff71bSJunchao Zhang   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1948c3ff71bSJunchao Zhang   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1958c3ff71bSJunchao Zhang   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1968c3ff71bSJunchao Zhang   if (size > 1) {
1978c3ff71bSJunchao Zhang     ierr = MatSetType(*A,MATMPIAIJKOKKOS);CHKERRQ(ierr);
1988c3ff71bSJunchao Zhang     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1998c3ff71bSJunchao Zhang   } else {
2008c3ff71bSJunchao Zhang     ierr = MatSetType(*A,MATSEQAIJKOKKOS);CHKERRQ(ierr);
2018c3ff71bSJunchao Zhang     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2028c3ff71bSJunchao Zhang   }
2038c3ff71bSJunchao Zhang   PetscFunctionReturn(0);
2048c3ff71bSJunchao Zhang }
2058c3ff71bSJunchao Zhang 
206