xref: /petsc/src/mat/impls/aij/mpi/kokkos/mpiaijkok.kokkos.cxx (revision 8c3ff71bb91e1c43c25fcf6109e2039cde4e7447)
1*8c3ff71bSJunchao Zhang #include <petscconf.h>
2*8c3ff71bSJunchao Zhang #include <../src/mat/impls/aij/mpi/mpiaij.h>
3*8c3ff71bSJunchao Zhang 
4*8c3ff71bSJunchao Zhang PetscErrorCode MatAssemblyEnd_MPIAIJKokkos(Mat A,MatAssemblyType mode)
5*8c3ff71bSJunchao Zhang {
6*8c3ff71bSJunchao Zhang   PetscErrorCode ierr;
7*8c3ff71bSJunchao Zhang   Mat_MPIAIJ     *mpiaij = (Mat_MPIAIJ*)A->data;
8*8c3ff71bSJunchao Zhang 
9*8c3ff71bSJunchao Zhang   PetscFunctionBegin;
10*8c3ff71bSJunchao Zhang   ierr = MatAssemblyEnd_MPIAIJ(A,mode);CHKERRQ(ierr);
11*8c3ff71bSJunchao Zhang   if (!A->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
12*8c3ff71bSJunchao Zhang     ierr = VecSetType(mpiaij->lvec,VECSEQKOKKOS);CHKERRQ(ierr);
13*8c3ff71bSJunchao Zhang   }
14*8c3ff71bSJunchao Zhang   PetscFunctionReturn(0);
15*8c3ff71bSJunchao Zhang }
16*8c3ff71bSJunchao Zhang 
17*8c3ff71bSJunchao Zhang PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJKokkos(Mat mat,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
18*8c3ff71bSJunchao Zhang {
19*8c3ff71bSJunchao Zhang   PetscErrorCode     ierr;
20*8c3ff71bSJunchao Zhang   PetscInt           i;
21*8c3ff71bSJunchao Zhang   Mat_MPIAIJ         *mpiaij = (Mat_MPIAIJ*)mat->data;
22*8c3ff71bSJunchao Zhang 
23*8c3ff71bSJunchao Zhang 
24*8c3ff71bSJunchao Zhang   PetscFunctionBegin;
25*8c3ff71bSJunchao Zhang   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
26*8c3ff71bSJunchao Zhang   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
27*8c3ff71bSJunchao Zhang   if (d_nnz) {
28*8c3ff71bSJunchao Zhang     for (i=0; i<mat->rmap->n; i++) {
29*8c3ff71bSJunchao Zhang       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
30*8c3ff71bSJunchao Zhang     }
31*8c3ff71bSJunchao Zhang   }
32*8c3ff71bSJunchao Zhang   if (o_nnz) {
33*8c3ff71bSJunchao Zhang     for (i=0; i<mat->rmap->n; i++) {
34*8c3ff71bSJunchao Zhang       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
35*8c3ff71bSJunchao Zhang     }
36*8c3ff71bSJunchao Zhang   }
37*8c3ff71bSJunchao Zhang   if (!mat->preallocated) {
38*8c3ff71bSJunchao Zhang     /* Explicitly create 2 MATSEQAIJKOKKOS matrices. */
39*8c3ff71bSJunchao Zhang     ierr = MatCreate(PETSC_COMM_SELF,&mpiaij->A);CHKERRQ(ierr);
40*8c3ff71bSJunchao Zhang     ierr = MatSetSizes(mpiaij->A,mat->rmap->n,mat->cmap->n,mat->rmap->n,mat->cmap->n);CHKERRQ(ierr);
41*8c3ff71bSJunchao Zhang     ierr = MatSetType(mpiaij->A,MATSEQAIJKOKKOS);CHKERRQ(ierr);
42*8c3ff71bSJunchao Zhang     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)mpiaij->A);CHKERRQ(ierr);
43*8c3ff71bSJunchao Zhang     ierr = MatCreate(PETSC_COMM_SELF,&mpiaij->B);CHKERRQ(ierr);
44*8c3ff71bSJunchao Zhang     ierr = MatSetSizes(mpiaij->B,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
45*8c3ff71bSJunchao Zhang     ierr = MatSetType(mpiaij->B,MATSEQAIJKOKKOS);CHKERRQ(ierr);
46*8c3ff71bSJunchao Zhang     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)mpiaij->B);CHKERRQ(ierr);
47*8c3ff71bSJunchao Zhang   }
48*8c3ff71bSJunchao Zhang   ierr = MatSeqAIJSetPreallocation(mpiaij->A,d_nz,d_nnz);CHKERRQ(ierr);
49*8c3ff71bSJunchao Zhang   ierr = MatSeqAIJSetPreallocation(mpiaij->B,o_nz,o_nnz);CHKERRQ(ierr);
50*8c3ff71bSJunchao Zhang 
51*8c3ff71bSJunchao Zhang   mat->preallocated = PETSC_TRUE;
52*8c3ff71bSJunchao Zhang   PetscFunctionReturn(0);
53*8c3ff71bSJunchao Zhang }
54*8c3ff71bSJunchao Zhang 
55*8c3ff71bSJunchao Zhang PetscErrorCode MatMult_MPIAIJKokkos(Mat mat,Vec xx,Vec yy)
56*8c3ff71bSJunchao Zhang {
57*8c3ff71bSJunchao Zhang   Mat_MPIAIJ     *mpiaij = (Mat_MPIAIJ*)mat->data;
58*8c3ff71bSJunchao Zhang   PetscErrorCode ierr;
59*8c3ff71bSJunchao Zhang   PetscInt       nt;
60*8c3ff71bSJunchao Zhang 
61*8c3ff71bSJunchao Zhang   PetscFunctionBegin;
62*8c3ff71bSJunchao Zhang   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
63*8c3ff71bSJunchao Zhang   if (nt != mat->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of mat (%D) and xx (%D)",mat->cmap->n,nt);
64*8c3ff71bSJunchao Zhang   ierr = VecScatterBegin(mpiaij->Mvctx,xx,mpiaij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
65*8c3ff71bSJunchao Zhang   ierr = (*mpiaij->A->ops->mult)(mpiaij->A,xx,yy);CHKERRQ(ierr);
66*8c3ff71bSJunchao Zhang   ierr = VecScatterEnd(mpiaij->Mvctx,xx,mpiaij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
67*8c3ff71bSJunchao Zhang   ierr = (*mpiaij->B->ops->multadd)(mpiaij->B,mpiaij->lvec,yy,yy);CHKERRQ(ierr);
68*8c3ff71bSJunchao Zhang   PetscFunctionReturn(0);
69*8c3ff71bSJunchao Zhang }
70*8c3ff71bSJunchao Zhang 
71*8c3ff71bSJunchao Zhang PetscErrorCode MatMultAdd_MPIAIJKokkos(Mat mat,Vec xx,Vec yy,Vec zz)
72*8c3ff71bSJunchao Zhang {
73*8c3ff71bSJunchao Zhang   Mat_MPIAIJ     *mpiaij = (Mat_MPIAIJ*)mat->data;
74*8c3ff71bSJunchao Zhang   PetscErrorCode ierr;
75*8c3ff71bSJunchao Zhang   PetscInt       nt;
76*8c3ff71bSJunchao Zhang 
77*8c3ff71bSJunchao Zhang   PetscFunctionBegin;
78*8c3ff71bSJunchao Zhang   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
79*8c3ff71bSJunchao Zhang   if (nt != mat->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of mat (%D) and xx (%D)",mat->cmap->n,nt);
80*8c3ff71bSJunchao Zhang   ierr = VecScatterBegin(mpiaij->Mvctx,xx,mpiaij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
81*8c3ff71bSJunchao Zhang   ierr = (*mpiaij->A->ops->multadd)(mpiaij->A,xx,yy,zz);CHKERRQ(ierr);
82*8c3ff71bSJunchao Zhang   ierr = VecScatterEnd(mpiaij->Mvctx,xx,mpiaij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
83*8c3ff71bSJunchao Zhang   ierr = (*mpiaij->B->ops->multadd)(mpiaij->B,mpiaij->lvec,zz,zz);CHKERRQ(ierr);
84*8c3ff71bSJunchao Zhang   PetscFunctionReturn(0);
85*8c3ff71bSJunchao Zhang }
86*8c3ff71bSJunchao Zhang 
87*8c3ff71bSJunchao Zhang PetscErrorCode MatMultTranspose_MPIAIJKokkos(Mat mat,Vec xx,Vec yy)
88*8c3ff71bSJunchao Zhang {
89*8c3ff71bSJunchao Zhang   Mat_MPIAIJ     *mpiaij = (Mat_MPIAIJ*)mat->data;
90*8c3ff71bSJunchao Zhang   PetscErrorCode ierr;
91*8c3ff71bSJunchao Zhang   PetscInt       nt;
92*8c3ff71bSJunchao Zhang 
93*8c3ff71bSJunchao Zhang   PetscFunctionBegin;
94*8c3ff71bSJunchao Zhang   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
95*8c3ff71bSJunchao Zhang   if (nt != mat->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of mat (%D) and xx (%D)",mat->rmap->n,nt);
96*8c3ff71bSJunchao Zhang   ierr = (*mpiaij->B->ops->multtranspose)(mpiaij->B,xx,mpiaij->lvec);CHKERRQ(ierr);
97*8c3ff71bSJunchao Zhang   ierr = (*mpiaij->A->ops->multtranspose)(mpiaij->A,xx,yy);CHKERRQ(ierr);
98*8c3ff71bSJunchao Zhang   ierr = VecScatterBegin(mpiaij->Mvctx,mpiaij->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
99*8c3ff71bSJunchao Zhang   ierr = VecScatterEnd(mpiaij->Mvctx,mpiaij->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
100*8c3ff71bSJunchao Zhang   PetscFunctionReturn(0);
101*8c3ff71bSJunchao Zhang }
102*8c3ff71bSJunchao Zhang 
103*8c3ff71bSJunchao Zhang PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat A, MatType mtype, MatReuse reuse, Mat* newmat)
104*8c3ff71bSJunchao Zhang {
105*8c3ff71bSJunchao Zhang   PetscErrorCode     ierr;
106*8c3ff71bSJunchao Zhang   Mat                B;
107*8c3ff71bSJunchao Zhang 
108*8c3ff71bSJunchao Zhang   PetscFunctionBegin;
109*8c3ff71bSJunchao Zhang   if (reuse == MAT_INITIAL_MATRIX) {
110*8c3ff71bSJunchao Zhang     ierr = MatDuplicate(A,MAT_COPY_VALUES,newmat);CHKERRQ(ierr);
111*8c3ff71bSJunchao Zhang   } else if (reuse == MAT_REUSE_MATRIX) {
112*8c3ff71bSJunchao Zhang     ierr = MatCopy(A,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
113*8c3ff71bSJunchao Zhang   }
114*8c3ff71bSJunchao Zhang   B = *newmat;
115*8c3ff71bSJunchao Zhang 
116*8c3ff71bSJunchao Zhang   ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr);
117*8c3ff71bSJunchao Zhang   ierr = PetscStrallocpy(VECKOKKOS,&B->defaultvectype);CHKERRQ(ierr);
118*8c3ff71bSJunchao Zhang   ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIAIJKOKKOS);CHKERRQ(ierr);
119*8c3ff71bSJunchao Zhang 
120*8c3ff71bSJunchao Zhang   B->ops->assemblyend    = MatAssemblyEnd_MPIAIJKokkos;
121*8c3ff71bSJunchao Zhang   B->ops->mult           = MatMult_MPIAIJKokkos;
122*8c3ff71bSJunchao Zhang   B->ops->multadd        = MatMultAdd_MPIAIJKokkos;
123*8c3ff71bSJunchao Zhang   B->ops->multtranspose  = MatMultTranspose_MPIAIJKokkos;
124*8c3ff71bSJunchao Zhang 
125*8c3ff71bSJunchao Zhang   ierr = PetscObjectComposeFunction((PetscObject)A,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJKokkos);CHKERRQ(ierr);
126*8c3ff71bSJunchao Zhang   PetscFunctionReturn(0);
127*8c3ff71bSJunchao Zhang }
128*8c3ff71bSJunchao Zhang 
129*8c3ff71bSJunchao Zhang PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJKokkos(Mat A)
130*8c3ff71bSJunchao Zhang {
131*8c3ff71bSJunchao Zhang   PetscErrorCode ierr;
132*8c3ff71bSJunchao Zhang 
133*8c3ff71bSJunchao Zhang   PetscFunctionBegin;
134*8c3ff71bSJunchao Zhang   ierr = PetscKokkosInitializeCheck();CHKERRQ(ierr);
135*8c3ff71bSJunchao Zhang   ierr = MatCreate_MPIAIJ(A);CHKERRQ(ierr);
136*8c3ff71bSJunchao Zhang   ierr = MatConvert_MPIAIJ_MPIAIJKokkos(A,MATMPIAIJKOKKOS,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
137*8c3ff71bSJunchao Zhang   PetscFunctionReturn(0);
138*8c3ff71bSJunchao Zhang }
139*8c3ff71bSJunchao Zhang 
140*8c3ff71bSJunchao Zhang /*@C
141*8c3ff71bSJunchao Zhang    MatCreateAIJKokkos - Creates a sparse matrix in AIJ (compressed row) format
142*8c3ff71bSJunchao Zhang    (the default parallel PETSc format).  This matrix will ultimately pushed down
143*8c3ff71bSJunchao Zhang    to Kokkos for calculations. For good matrix
144*8c3ff71bSJunchao Zhang    assembly performance the user should preallocate the matrix storage by setting
145*8c3ff71bSJunchao Zhang    the parameter nz (or the array nnz).  By setting these parameters accurately,
146*8c3ff71bSJunchao Zhang    performance during matrix assembly can be increased by more than a factor of 50.
147*8c3ff71bSJunchao Zhang 
148*8c3ff71bSJunchao Zhang    Collective
149*8c3ff71bSJunchao Zhang 
150*8c3ff71bSJunchao Zhang    Input Parameters:
151*8c3ff71bSJunchao Zhang +  comm - MPI communicator, set to PETSC_COMM_SELF
152*8c3ff71bSJunchao Zhang .  m - number of rows
153*8c3ff71bSJunchao Zhang .  n - number of columns
154*8c3ff71bSJunchao Zhang .  nz - number of nonzeros per row (same for all rows)
155*8c3ff71bSJunchao Zhang -  nnz - array containing the number of nonzeros in the various rows
156*8c3ff71bSJunchao Zhang          (possibly different for each row) or NULL
157*8c3ff71bSJunchao Zhang 
158*8c3ff71bSJunchao Zhang    Output Parameter:
159*8c3ff71bSJunchao Zhang .  A - the matrix
160*8c3ff71bSJunchao Zhang 
161*8c3ff71bSJunchao Zhang    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
162*8c3ff71bSJunchao Zhang    MatXXXXSetPreallocation() paradigm instead of this routine directly.
163*8c3ff71bSJunchao Zhang    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
164*8c3ff71bSJunchao Zhang 
165*8c3ff71bSJunchao Zhang    Notes:
166*8c3ff71bSJunchao Zhang    If nnz is given then nz is ignored
167*8c3ff71bSJunchao Zhang 
168*8c3ff71bSJunchao Zhang    The AIJ format (also called the Yale sparse matrix format or
169*8c3ff71bSJunchao Zhang    compressed row storage), is fully compatible with standard Fortran 77
170*8c3ff71bSJunchao Zhang    storage.  That is, the stored row and column indices can begin at
171*8c3ff71bSJunchao Zhang    either one (as in Fortran) or zero.  See the users' manual for details.
172*8c3ff71bSJunchao Zhang 
173*8c3ff71bSJunchao Zhang    Specify the preallocated storage with either nz or nnz (not both).
174*8c3ff71bSJunchao Zhang    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
175*8c3ff71bSJunchao Zhang    allocation.  For large problems you MUST preallocate memory or you
176*8c3ff71bSJunchao Zhang    will get TERRIBLE performance, see the users' manual chapter on matrices.
177*8c3ff71bSJunchao Zhang 
178*8c3ff71bSJunchao Zhang    By default, this format uses inodes (identical nodes) when possible, to
179*8c3ff71bSJunchao Zhang    improve numerical efficiency of matrix-vector products and solves. We
180*8c3ff71bSJunchao Zhang    search for consecutive rows with the same nonzero structure, thereby
181*8c3ff71bSJunchao Zhang    reusing matrix information to achieve increased efficiency.
182*8c3ff71bSJunchao Zhang 
183*8c3ff71bSJunchao Zhang    Level: intermediate
184*8c3ff71bSJunchao Zhang 
185*8c3ff71bSJunchao Zhang .seealso: MatCreate(), MatCreateAIJ(), MatSetValues(), MatSeqAIJSetColumnIndices(), MatCreateSeqAIJWithArrays(), MatCreateAIJ(), MATMPIAIJKOKKOS, MATAIJKokkos
186*8c3ff71bSJunchao Zhang @*/
187*8c3ff71bSJunchao Zhang PetscErrorCode  MatCreateAIJKokkos(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
188*8c3ff71bSJunchao Zhang {
189*8c3ff71bSJunchao Zhang   PetscErrorCode ierr;
190*8c3ff71bSJunchao Zhang   PetscMPIInt    size;
191*8c3ff71bSJunchao Zhang 
192*8c3ff71bSJunchao Zhang   PetscFunctionBegin;
193*8c3ff71bSJunchao Zhang   ierr = MatCreate(comm,A);CHKERRQ(ierr);
194*8c3ff71bSJunchao Zhang   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
195*8c3ff71bSJunchao Zhang   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
196*8c3ff71bSJunchao Zhang   if (size > 1) {
197*8c3ff71bSJunchao Zhang     ierr = MatSetType(*A,MATMPIAIJKOKKOS);CHKERRQ(ierr);
198*8c3ff71bSJunchao Zhang     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
199*8c3ff71bSJunchao Zhang   } else {
200*8c3ff71bSJunchao Zhang     ierr = MatSetType(*A,MATSEQAIJKOKKOS);CHKERRQ(ierr);
201*8c3ff71bSJunchao Zhang     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
202*8c3ff71bSJunchao Zhang   }
203*8c3ff71bSJunchao Zhang   PetscFunctionReturn(0);
204*8c3ff71bSJunchao Zhang }
205*8c3ff71bSJunchao Zhang 
206