xref: /petsc/src/mat/impls/aij/seq/kokkos/aijkok.kokkos.cxx (revision 8c3ff71bb91e1c43c25fcf6109e2039cde4e7447)
1*8c3ff71bSJunchao Zhang #include "petsc/private/petscimpl.h"
2*8c3ff71bSJunchao Zhang #include <petscsystypes.h>
3*8c3ff71bSJunchao Zhang #include <petscerror.h>
4*8c3ff71bSJunchao Zhang #include <petscvec.hpp>
5*8c3ff71bSJunchao Zhang 
6*8c3ff71bSJunchao Zhang #include <Kokkos_Core.hpp>
7*8c3ff71bSJunchao Zhang #include <KokkosSparse_CrsMatrix.hpp>
8*8c3ff71bSJunchao Zhang #include <KokkosSparse_spmv.hpp>
9*8c3ff71bSJunchao Zhang #include <KokkosSparse_spmv.hpp>
10*8c3ff71bSJunchao Zhang #include <../src/mat/impls/aij/seq/aij.h>
11*8c3ff71bSJunchao Zhang 
12*8c3ff71bSJunchao Zhang #include <../src/mat/impls/aij/seq/kokkos/aijkokkosimpl.hpp>
13*8c3ff71bSJunchao Zhang 
14*8c3ff71bSJunchao Zhang static PetscErrorCode MatSetOps_SeqAIJKokkos(Mat); /* Forward declaration */
15*8c3ff71bSJunchao Zhang 
16*8c3ff71bSJunchao Zhang static PetscErrorCode MatAssemblyEnd_SeqAIJKokkos(Mat A,MatAssemblyType mode)
17*8c3ff71bSJunchao Zhang {
18*8c3ff71bSJunchao Zhang   PetscErrorCode ierr;
19*8c3ff71bSJunchao Zhang 
20*8c3ff71bSJunchao Zhang   PetscFunctionBegin;
21*8c3ff71bSJunchao Zhang   ierr = MatAssemblyEnd_SeqAIJ(A,mode);CHKERRQ(ierr);
22*8c3ff71bSJunchao Zhang   A->offloadmask = PETSC_OFFLOAD_CPU;
23*8c3ff71bSJunchao Zhang   /* Don't build (or update) the Mat_SeqAIJKokkos struct. We delay it to the very last moment until we need it. */
24*8c3ff71bSJunchao Zhang   PetscFunctionReturn(0);
25*8c3ff71bSJunchao Zhang }
26*8c3ff71bSJunchao Zhang 
27*8c3ff71bSJunchao Zhang static PetscErrorCode MatSeqAIJKokkosSyncDevice(Mat A)
28*8c3ff71bSJunchao Zhang {
29*8c3ff71bSJunchao Zhang   Mat_SeqAIJ                *aijseq = (Mat_SeqAIJ*)A->data;
30*8c3ff71bSJunchao Zhang   Mat_SeqAIJKokkos          *aijkok = static_cast<Mat_SeqAIJKokkos*>(A->spptr);
31*8c3ff71bSJunchao Zhang   PetscInt                  nrows   = A->rmap->n,ncols = A->cmap->n,nnz = aijseq->nz;
32*8c3ff71bSJunchao Zhang 
33*8c3ff71bSJunchao Zhang   PetscFunctionBegin;
34*8c3ff71bSJunchao Zhang   /* If aijkok is not built yet or the nonzero pattern on CPU has changed, build aijkok from the scratch */
35*8c3ff71bSJunchao Zhang   if (!aijkok || aijkok->nonzerostate != A->nonzerostate) {
36*8c3ff71bSJunchao Zhang     delete aijkok;
37*8c3ff71bSJunchao Zhang     aijkok               = new Mat_SeqAIJKokkos(nrows,ncols,nnz,aijseq->i,aijseq->j,aijseq->a);
38*8c3ff71bSJunchao Zhang     aijkok->nonzerostate = A->nonzerostate;
39*8c3ff71bSJunchao Zhang     A->spptr             = aijkok;
40*8c3ff71bSJunchao Zhang   } else if (A->offloadmask == PETSC_OFFLOAD_CPU) { /* Copy values only */
41*8c3ff71bSJunchao Zhang     Kokkos::deep_copy(aijkok->a_d,aijkok->a_h);
42*8c3ff71bSJunchao Zhang   }
43*8c3ff71bSJunchao Zhang   A->offloadmask = PETSC_OFFLOAD_BOTH;
44*8c3ff71bSJunchao Zhang   PetscFunctionReturn(0);
45*8c3ff71bSJunchao Zhang }
46*8c3ff71bSJunchao Zhang 
47*8c3ff71bSJunchao Zhang /* y = A x */
48*8c3ff71bSJunchao Zhang static PetscErrorCode MatMult_SeqAIJKokkos(Mat A,Vec xx,Vec yy)
49*8c3ff71bSJunchao Zhang {
50*8c3ff71bSJunchao Zhang   PetscErrorCode                   ierr;
51*8c3ff71bSJunchao Zhang   Mat_SeqAIJKokkos                 *aijkok;
52*8c3ff71bSJunchao Zhang   ConstPetscScalarViewDevice_t     xv;
53*8c3ff71bSJunchao Zhang   PetscScalarViewDevice_t          yv;
54*8c3ff71bSJunchao Zhang 
55*8c3ff71bSJunchao Zhang   PetscFunctionBegin;
56*8c3ff71bSJunchao Zhang   ierr   = MatSeqAIJKokkosSyncDevice(A);CHKERRQ(ierr);
57*8c3ff71bSJunchao Zhang   ierr   = VecKokkosGetDeviceViewRead(xx,&xv);CHKERRQ(ierr);
58*8c3ff71bSJunchao Zhang   ierr   = VecKokkosGetDeviceView(yy,&yv);CHKERRQ(ierr);
59*8c3ff71bSJunchao Zhang   aijkok = static_cast<Mat_SeqAIJKokkos*>(A->spptr);
60*8c3ff71bSJunchao Zhang   KokkosSparse::spmv("N",1.0/*alpha*/,aijkok->csr,xv,0.0/*beta*/,yv); /* y = alpha A x + beta y */
61*8c3ff71bSJunchao Zhang   ierr   = VecKokkosRestoreDeviceViewRead(xx,&xv);CHKERRQ(ierr);
62*8c3ff71bSJunchao Zhang   ierr   = VecKokkosRestoreDeviceView(yy,&yv);CHKERRQ(ierr);
63*8c3ff71bSJunchao Zhang   PetscFunctionReturn(0);
64*8c3ff71bSJunchao Zhang }
65*8c3ff71bSJunchao Zhang 
66*8c3ff71bSJunchao Zhang /* y = A^T x */
67*8c3ff71bSJunchao Zhang static PetscErrorCode MatMultTranspose_SeqAIJKokkos(Mat A,Vec xx,Vec yy)
68*8c3ff71bSJunchao Zhang {
69*8c3ff71bSJunchao Zhang   PetscErrorCode                   ierr;
70*8c3ff71bSJunchao Zhang   Mat_SeqAIJKokkos                 *aijkok;
71*8c3ff71bSJunchao Zhang   ConstPetscScalarViewDevice_t     xv;
72*8c3ff71bSJunchao Zhang   PetscScalarViewDevice_t          yv;
73*8c3ff71bSJunchao Zhang 
74*8c3ff71bSJunchao Zhang   PetscFunctionBegin;
75*8c3ff71bSJunchao Zhang   ierr = MatSeqAIJKokkosSyncDevice(A);CHKERRQ(ierr);
76*8c3ff71bSJunchao Zhang   ierr = VecKokkosGetDeviceViewRead(xx,&xv);CHKERRQ(ierr);
77*8c3ff71bSJunchao Zhang   ierr = VecKokkosGetDeviceView(yy,&yv);CHKERRQ(ierr);
78*8c3ff71bSJunchao Zhang   aijkok = static_cast<Mat_SeqAIJKokkos*>(A->spptr);
79*8c3ff71bSJunchao Zhang   KokkosSparse::spmv("T",1.0/*alpha*/,aijkok->csr,xv,0.0/*beta*/,yv); /* y = alpha A^T x + beta y */
80*8c3ff71bSJunchao Zhang   ierr = VecKokkosRestoreDeviceViewRead(xx,&xv);CHKERRQ(ierr);
81*8c3ff71bSJunchao Zhang   ierr = VecKokkosRestoreDeviceView(yy,&yv);CHKERRQ(ierr);
82*8c3ff71bSJunchao Zhang   PetscFunctionReturn(0);
83*8c3ff71bSJunchao Zhang }
84*8c3ff71bSJunchao Zhang 
85*8c3ff71bSJunchao Zhang /* y = A^H x */
86*8c3ff71bSJunchao Zhang static PetscErrorCode MatMultHermitianTranspose_SeqAIJKokkos(Mat A,Vec xx,Vec yy)
87*8c3ff71bSJunchao Zhang {
88*8c3ff71bSJunchao Zhang   PetscErrorCode                   ierr;
89*8c3ff71bSJunchao Zhang   Mat_SeqAIJKokkos                 *aijkok;
90*8c3ff71bSJunchao Zhang   ConstPetscScalarViewDevice_t     xv;
91*8c3ff71bSJunchao Zhang   PetscScalarViewDevice_t          yv;
92*8c3ff71bSJunchao Zhang 
93*8c3ff71bSJunchao Zhang   PetscFunctionBegin;
94*8c3ff71bSJunchao Zhang   ierr = MatSeqAIJKokkosSyncDevice(A);CHKERRQ(ierr);
95*8c3ff71bSJunchao Zhang   ierr = VecKokkosGetDeviceViewRead(xx,&xv);CHKERRQ(ierr);
96*8c3ff71bSJunchao Zhang   ierr = VecKokkosGetDeviceView(yy,&yv);CHKERRQ(ierr);
97*8c3ff71bSJunchao Zhang   aijkok = static_cast<Mat_SeqAIJKokkos*>(A->spptr);
98*8c3ff71bSJunchao Zhang   KokkosSparse::spmv("C",1.0/*alpha*/,aijkok->csr,xv,0.0/*beta*/,yv); /* y = alpha A^H x + beta y */
99*8c3ff71bSJunchao Zhang   ierr = VecKokkosRestoreDeviceViewRead(xx,&xv);CHKERRQ(ierr);
100*8c3ff71bSJunchao Zhang   ierr = VecKokkosRestoreDeviceView(yy,&yv);CHKERRQ(ierr);
101*8c3ff71bSJunchao Zhang   PetscFunctionReturn(0);
102*8c3ff71bSJunchao Zhang }
103*8c3ff71bSJunchao Zhang 
104*8c3ff71bSJunchao Zhang /* z = A x + y */
105*8c3ff71bSJunchao Zhang static PetscErrorCode MatMultAdd_SeqAIJKokkos(Mat A,Vec xx,Vec yy, Vec zz)
106*8c3ff71bSJunchao Zhang {
107*8c3ff71bSJunchao Zhang   PetscErrorCode                   ierr;
108*8c3ff71bSJunchao Zhang   Mat_SeqAIJKokkos                 *aijkok;
109*8c3ff71bSJunchao Zhang   ConstPetscScalarViewDevice_t     xv,yv;
110*8c3ff71bSJunchao Zhang   PetscScalarViewDevice_t          zv;
111*8c3ff71bSJunchao Zhang 
112*8c3ff71bSJunchao Zhang   PetscFunctionBegin;
113*8c3ff71bSJunchao Zhang   ierr = MatSeqAIJKokkosSyncDevice(A);CHKERRQ(ierr);
114*8c3ff71bSJunchao Zhang   ierr = VecKokkosGetDeviceViewRead(xx,&xv);CHKERRQ(ierr);
115*8c3ff71bSJunchao Zhang   ierr = VecKokkosGetDeviceViewRead(yy,&yv);CHKERRQ(ierr);
116*8c3ff71bSJunchao Zhang   ierr = VecKokkosGetDeviceView(zz,&zv);CHKERRQ(ierr);
117*8c3ff71bSJunchao Zhang   if (zz != yy) Kokkos::deep_copy(zv,yv);
118*8c3ff71bSJunchao Zhang   aijkok = static_cast<Mat_SeqAIJKokkos*>(A->spptr);
119*8c3ff71bSJunchao Zhang   KokkosSparse::spmv("N",1.0/*alpha*/,aijkok->csr,xv,1.0/*beta*/,zv); /* z = alpha A x + beta z */
120*8c3ff71bSJunchao Zhang   ierr = VecKokkosRestoreDeviceViewRead(xx,&xv);CHKERRQ(ierr);
121*8c3ff71bSJunchao Zhang   ierr = VecKokkosRestoreDeviceViewRead(yy,&yv);CHKERRQ(ierr);
122*8c3ff71bSJunchao Zhang   ierr = VecKokkosRestoreDeviceView(zz,&zv);CHKERRQ(ierr);
123*8c3ff71bSJunchao Zhang   PetscFunctionReturn(0);
124*8c3ff71bSJunchao Zhang }
125*8c3ff71bSJunchao Zhang 
126*8c3ff71bSJunchao Zhang /* z = A^T x + y */
127*8c3ff71bSJunchao Zhang static PetscErrorCode MatMultTransposeAdd_SeqAIJKokkos(Mat A,Vec xx,Vec yy,Vec zz)
128*8c3ff71bSJunchao Zhang {
129*8c3ff71bSJunchao Zhang   PetscErrorCode                   ierr;
130*8c3ff71bSJunchao Zhang   Mat_SeqAIJKokkos                 *aijkok;
131*8c3ff71bSJunchao Zhang   ConstPetscScalarViewDevice_t     xv,yv;
132*8c3ff71bSJunchao Zhang   PetscScalarViewDevice_t          zv;
133*8c3ff71bSJunchao Zhang 
134*8c3ff71bSJunchao Zhang   PetscFunctionBegin;
135*8c3ff71bSJunchao Zhang   ierr = MatSeqAIJKokkosSyncDevice(A);CHKERRQ(ierr);
136*8c3ff71bSJunchao Zhang   ierr = VecKokkosGetDeviceViewRead(xx,&xv);CHKERRQ(ierr);
137*8c3ff71bSJunchao Zhang   ierr = VecKokkosGetDeviceViewRead(yy,&yv);CHKERRQ(ierr);
138*8c3ff71bSJunchao Zhang   ierr = VecKokkosGetDeviceView(zz,&zv);CHKERRQ(ierr);
139*8c3ff71bSJunchao Zhang   if (zz != yy) Kokkos::deep_copy(zv,yv);
140*8c3ff71bSJunchao Zhang   aijkok = static_cast<Mat_SeqAIJKokkos*>(A->spptr);
141*8c3ff71bSJunchao Zhang   KokkosSparse::spmv("T",1.0/*alpha*/,aijkok->csr,xv,1.0/*beta*/,zv); /* z = alpha A^T x + beta z */
142*8c3ff71bSJunchao Zhang   ierr = VecKokkosRestoreDeviceViewRead(xx,&xv);CHKERRQ(ierr);
143*8c3ff71bSJunchao Zhang   ierr = VecKokkosRestoreDeviceViewRead(yy,&yv);CHKERRQ(ierr);
144*8c3ff71bSJunchao Zhang   ierr = VecKokkosRestoreDeviceView(zz,&zv);CHKERRQ(ierr);
145*8c3ff71bSJunchao Zhang   PetscFunctionReturn(0);
146*8c3ff71bSJunchao Zhang }
147*8c3ff71bSJunchao Zhang 
148*8c3ff71bSJunchao Zhang /* z = A^H x + y */
149*8c3ff71bSJunchao Zhang static PetscErrorCode MatMultHermitianTransposeAdd_SeqAIJKokkos(Mat A,Vec xx,Vec yy,Vec zz)
150*8c3ff71bSJunchao Zhang {
151*8c3ff71bSJunchao Zhang   PetscErrorCode                   ierr;
152*8c3ff71bSJunchao Zhang   Mat_SeqAIJKokkos                 *aijkok;
153*8c3ff71bSJunchao Zhang   ConstPetscScalarViewDevice_t     xv,yv;
154*8c3ff71bSJunchao Zhang   PetscScalarViewDevice_t          zv;
155*8c3ff71bSJunchao Zhang 
156*8c3ff71bSJunchao Zhang   PetscFunctionBegin;
157*8c3ff71bSJunchao Zhang   ierr = MatSeqAIJKokkosSyncDevice(A);CHKERRQ(ierr);
158*8c3ff71bSJunchao Zhang   ierr = VecKokkosGetDeviceViewRead(xx,&xv);CHKERRQ(ierr);
159*8c3ff71bSJunchao Zhang   ierr = VecKokkosGetDeviceViewRead(yy,&yv);CHKERRQ(ierr);
160*8c3ff71bSJunchao Zhang   ierr = VecKokkosGetDeviceView(zz,&zv);CHKERRQ(ierr);
161*8c3ff71bSJunchao Zhang   if (zz != yy) Kokkos::deep_copy(zv,yv);
162*8c3ff71bSJunchao Zhang   aijkok = static_cast<Mat_SeqAIJKokkos*>(A->spptr);
163*8c3ff71bSJunchao Zhang   KokkosSparse::spmv("C",1.0/*alpha*/,aijkok->csr,xv,1.0/*beta*/,zv); /* z = alpha A^H x + beta z */
164*8c3ff71bSJunchao Zhang   ierr = VecKokkosRestoreDeviceViewRead(xx,&xv);CHKERRQ(ierr);
165*8c3ff71bSJunchao Zhang   ierr = VecKokkosRestoreDeviceViewRead(yy,&yv);CHKERRQ(ierr);
166*8c3ff71bSJunchao Zhang   ierr = VecKokkosRestoreDeviceView(zz,&zv);CHKERRQ(ierr);
167*8c3ff71bSJunchao Zhang   PetscFunctionReturn(0);
168*8c3ff71bSJunchao Zhang }
169*8c3ff71bSJunchao Zhang 
170*8c3ff71bSJunchao Zhang PetscErrorCode MatConvert_SeqAIJ_SeqAIJKokkos(Mat A, MatType mtype, MatReuse reuse, Mat* newmat)
171*8c3ff71bSJunchao Zhang {
172*8c3ff71bSJunchao Zhang   PetscErrorCode   ierr;
173*8c3ff71bSJunchao Zhang   Mat              B;
174*8c3ff71bSJunchao Zhang 
175*8c3ff71bSJunchao Zhang   PetscFunctionBegin;
176*8c3ff71bSJunchao Zhang   if (reuse == MAT_INITIAL_MATRIX) { /* Build a new mat */
177*8c3ff71bSJunchao Zhang     ierr = MatDuplicate(A,MAT_COPY_VALUES,newmat);CHKERRQ(ierr);
178*8c3ff71bSJunchao Zhang   } else if (reuse == MAT_REUSE_MATRIX) { /* Reuse the mat created before */
179*8c3ff71bSJunchao Zhang     ierr = MatCopy(A,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
180*8c3ff71bSJunchao Zhang   }
181*8c3ff71bSJunchao Zhang 
182*8c3ff71bSJunchao Zhang   B    = *newmat;
183*8c3ff71bSJunchao Zhang   ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr);
184*8c3ff71bSJunchao Zhang   ierr = PetscStrallocpy(VECKOKKOS,&B->defaultvectype);CHKERRQ(ierr);
185*8c3ff71bSJunchao Zhang   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJKOKKOS);CHKERRQ(ierr);
186*8c3ff71bSJunchao Zhang   ierr = MatSetOps_SeqAIJKokkos(A);CHKERRQ(ierr);
187*8c3ff71bSJunchao Zhang 
188*8c3ff71bSJunchao Zhang   B->offloadmask = PETSC_OFFLOAD_CPU;
189*8c3ff71bSJunchao Zhang   PetscFunctionReturn(0);
190*8c3ff71bSJunchao Zhang }
191*8c3ff71bSJunchao Zhang 
192*8c3ff71bSJunchao Zhang static PetscErrorCode MatDuplicate_SeqAIJKokkos(Mat A,MatDuplicateOption cpvalues,Mat *B)
193*8c3ff71bSJunchao Zhang {
194*8c3ff71bSJunchao Zhang   PetscErrorCode ierr;
195*8c3ff71bSJunchao Zhang 
196*8c3ff71bSJunchao Zhang   PetscFunctionBegin;
197*8c3ff71bSJunchao Zhang   ierr = MatDuplicate_SeqAIJ(A,cpvalues,B);CHKERRQ(ierr);
198*8c3ff71bSJunchao Zhang   ierr = MatConvert_SeqAIJ_SeqAIJKokkos(*B,MATSEQAIJKOKKOS,MAT_INPLACE_MATRIX,B);CHKERRQ(ierr);
199*8c3ff71bSJunchao Zhang   PetscFunctionReturn(0);
200*8c3ff71bSJunchao Zhang }
201*8c3ff71bSJunchao Zhang 
202*8c3ff71bSJunchao Zhang static PetscErrorCode MatDestroy_SeqAIJKokkos(Mat A)
203*8c3ff71bSJunchao Zhang {
204*8c3ff71bSJunchao Zhang   PetscErrorCode        ierr;
205*8c3ff71bSJunchao Zhang   Mat_SeqAIJKokkos      *aijkok = static_cast<Mat_SeqAIJKokkos*>(A->spptr);
206*8c3ff71bSJunchao Zhang 
207*8c3ff71bSJunchao Zhang   PetscFunctionBegin;
208*8c3ff71bSJunchao Zhang   delete aijkok;
209*8c3ff71bSJunchao Zhang   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
210*8c3ff71bSJunchao Zhang   PetscFunctionReturn(0);
211*8c3ff71bSJunchao Zhang }
212*8c3ff71bSJunchao Zhang 
213*8c3ff71bSJunchao Zhang PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJKokkos(Mat A)
214*8c3ff71bSJunchao Zhang {
215*8c3ff71bSJunchao Zhang   PetscErrorCode ierr;
216*8c3ff71bSJunchao Zhang 
217*8c3ff71bSJunchao Zhang   PetscFunctionBegin;
218*8c3ff71bSJunchao Zhang   ierr = PetscKokkosInitializeCheck();CHKERRQ(ierr);
219*8c3ff71bSJunchao Zhang   ierr = MatCreate_SeqAIJ(A);CHKERRQ(ierr);
220*8c3ff71bSJunchao Zhang   ierr = MatConvert_SeqAIJ_SeqAIJKokkos(A,MATSEQAIJKOKKOS,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
221*8c3ff71bSJunchao Zhang   PetscFunctionReturn(0);
222*8c3ff71bSJunchao Zhang }
223*8c3ff71bSJunchao Zhang 
224*8c3ff71bSJunchao Zhang static PetscErrorCode MatSetOps_SeqAIJKokkos(Mat A)
225*8c3ff71bSJunchao Zhang {
226*8c3ff71bSJunchao Zhang   PetscFunctionBegin;
227*8c3ff71bSJunchao Zhang   A->ops->assemblyend               = MatAssemblyEnd_SeqAIJKokkos;
228*8c3ff71bSJunchao Zhang   A->ops->destroy                   = MatDestroy_SeqAIJKokkos;
229*8c3ff71bSJunchao Zhang   A->ops->duplicate                 = MatDuplicate_SeqAIJKokkos;
230*8c3ff71bSJunchao Zhang 
231*8c3ff71bSJunchao Zhang   A->ops->mult                      = MatMult_SeqAIJKokkos;
232*8c3ff71bSJunchao Zhang   A->ops->multadd                   = MatMultAdd_SeqAIJKokkos;
233*8c3ff71bSJunchao Zhang   A->ops->multtranspose             = MatMultTranspose_SeqAIJKokkos;
234*8c3ff71bSJunchao Zhang   A->ops->multtransposeadd          = MatMultTransposeAdd_SeqAIJKokkos;
235*8c3ff71bSJunchao Zhang   A->ops->multhermitiantranspose    = MatMultHermitianTranspose_SeqAIJKokkos;
236*8c3ff71bSJunchao Zhang   A->ops->multhermitiantransposeadd = MatMultHermitianTransposeAdd_SeqAIJKokkos;
237*8c3ff71bSJunchao Zhang   PetscFunctionReturn(0);
238*8c3ff71bSJunchao Zhang }
239*8c3ff71bSJunchao Zhang 
240*8c3ff71bSJunchao Zhang /* --------------------------------------------------------------------------------*/
241*8c3ff71bSJunchao Zhang /*C
242*8c3ff71bSJunchao Zhang    MatCreateSeqAIJKokkos - Creates a sparse matrix in AIJ (compressed row) format
243*8c3ff71bSJunchao Zhang    (the default parallel PETSc format). This matrix will ultimately be handled by
244*8c3ff71bSJunchao Zhang    Kokkos for calculations. For good matrix
245*8c3ff71bSJunchao Zhang    assembly performance the user should preallocate the matrix storage by setting
246*8c3ff71bSJunchao Zhang    the parameter nz (or the array nnz).  By setting these parameters accurately,
247*8c3ff71bSJunchao Zhang    performance during matrix assembly can be increased by more than a factor of 50.
248*8c3ff71bSJunchao Zhang 
249*8c3ff71bSJunchao Zhang    Collective
250*8c3ff71bSJunchao Zhang 
251*8c3ff71bSJunchao Zhang    Input Parameters:
252*8c3ff71bSJunchao Zhang +  comm - MPI communicator, set to PETSC_COMM_SELF
253*8c3ff71bSJunchao Zhang .  m - number of rows
254*8c3ff71bSJunchao Zhang .  n - number of columns
255*8c3ff71bSJunchao Zhang .  nz - number of nonzeros per row (same for all rows)
256*8c3ff71bSJunchao Zhang -  nnz - array containing the number of nonzeros in the various rows
257*8c3ff71bSJunchao Zhang          (possibly different for each row) or NULL
258*8c3ff71bSJunchao Zhang 
259*8c3ff71bSJunchao Zhang    Output Parameter:
260*8c3ff71bSJunchao Zhang .  A - the matrix
261*8c3ff71bSJunchao Zhang 
262*8c3ff71bSJunchao Zhang    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
263*8c3ff71bSJunchao Zhang    MatXXXXSetPreallocation() paradgm instead of this routine directly.
264*8c3ff71bSJunchao Zhang    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
265*8c3ff71bSJunchao Zhang 
266*8c3ff71bSJunchao Zhang    Notes:
267*8c3ff71bSJunchao Zhang    If nnz is given then nz is ignored
268*8c3ff71bSJunchao Zhang 
269*8c3ff71bSJunchao Zhang    The AIJ format (also called the Yale sparse matrix format or
270*8c3ff71bSJunchao Zhang    compressed row storage), is fully compatible with standard Fortran 77
271*8c3ff71bSJunchao Zhang    storage.  That is, the stored row and column indices can begin at
272*8c3ff71bSJunchao Zhang    either one (as in Fortran) or zero.  See the users' manual for details.
273*8c3ff71bSJunchao Zhang 
274*8c3ff71bSJunchao Zhang    Specify the preallocated storage with either nz or nnz (not both).
275*8c3ff71bSJunchao Zhang    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
276*8c3ff71bSJunchao Zhang    allocation.  For large problems you MUST preallocate memory or you
277*8c3ff71bSJunchao Zhang    will get TERRIBLE performance, see the users' manual chapter on matrices.
278*8c3ff71bSJunchao Zhang 
279*8c3ff71bSJunchao Zhang    By default, this format uses inodes (identical nodes) when possible, to
280*8c3ff71bSJunchao Zhang    improve numerical efficiency of matrix-vector products and solves. We
281*8c3ff71bSJunchao Zhang    search for consecutive rows with the same nonzero structure, thereby
282*8c3ff71bSJunchao Zhang    reusing matrix information to achieve increased efficiency.
283*8c3ff71bSJunchao Zhang 
284*8c3ff71bSJunchao Zhang    Level: intermediate
285*8c3ff71bSJunchao Zhang 
286*8c3ff71bSJunchao Zhang .seealso: MatCreate(), MatCreateAIJ(), MatSetValues(), MatSeqAIJSetColumnIndices(), MatCreateSeqAIJWithArrays(), MatCreateAIJ(), MATSeqAIJKokkos, MATAIJKOKKOS
287*8c3ff71bSJunchao Zhang @*/
288*8c3ff71bSJunchao Zhang PetscErrorCode  MatCreateSeqAIJKokkos(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
289*8c3ff71bSJunchao Zhang {
290*8c3ff71bSJunchao Zhang   PetscErrorCode ierr;
291*8c3ff71bSJunchao Zhang 
292*8c3ff71bSJunchao Zhang   PetscFunctionBegin;
293*8c3ff71bSJunchao Zhang   ierr = PetscKokkosInitializeCheck();CHKERRQ(ierr);
294*8c3ff71bSJunchao Zhang   ierr = MatCreate(comm,A);CHKERRQ(ierr);
295*8c3ff71bSJunchao Zhang   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
296*8c3ff71bSJunchao Zhang   ierr = MatSetType(*A,MATSEQAIJKOKKOS);CHKERRQ(ierr);
297*8c3ff71bSJunchao Zhang   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,(PetscInt*)nnz);CHKERRQ(ierr);
298*8c3ff71bSJunchao Zhang   PetscFunctionReturn(0);
299*8c3ff71bSJunchao Zhang }
300