xref: /petsc/src/mat/impls/aij/seq/kokkos/aijkok.kokkos.cxx (revision 6cb2790cfefec77db1a84ba0cc9dac4bbea3e3ea)
1 #include "petsc/private/petscimpl.h"
2 #include <petscsystypes.h>
3 #include <petscerror.h>
4 #include <petscveckokkos.hpp>
5 
6 #include <Kokkos_Core.hpp>
7 #include <KokkosSparse_CrsMatrix.hpp>
8 #include <KokkosSparse_spmv.hpp>
9 #include <KokkosSparse_spmv.hpp>
10 #include <../src/mat/impls/aij/seq/aij.h>
11 
12 #include <../src/mat/impls/aij/seq/kokkos/aijkokkosimpl.hpp>
13 #include <petscmat.h>
14 
15 static PetscErrorCode MatSetOps_SeqAIJKokkos(Mat); /* Forward declaration */
16 
17 static PetscErrorCode MatAssemblyEnd_SeqAIJKokkos(Mat A,MatAssemblyType mode)
18 {
19   PetscErrorCode    ierr;
20   Mat_SeqAIJKokkos  *aijkok = static_cast<Mat_SeqAIJKokkos*>(A->spptr);
21 
22   PetscFunctionBegin;
23   ierr = MatAssemblyEnd_SeqAIJ(A,mode);CHKERRQ(ierr);
24   if (aijkok && aijkok->device_mat_d.data()) {
25     A->offloadmask = PETSC_OFFLOAD_GPU; // in GPU mode, no going back. MatSetValues checks this
26   }
27   /* Don't build (or update) the Mat_SeqAIJKokkos struct. We delay it to the very last moment until we need it. */
28   PetscFunctionReturn(0);
29 }
30 
31 static PetscErrorCode MatSeqAIJKokkosSyncDevice(Mat A)
32 {
33   Mat_SeqAIJ                *aijseq = (Mat_SeqAIJ*)A->data;
34   Mat_SeqAIJKokkos          *aijkok = static_cast<Mat_SeqAIJKokkos*>(A->spptr);
35   PetscInt                  nrows   = A->rmap->n,ncols = A->cmap->n,nnz = aijseq->nz;
36 
37   PetscFunctionBegin;
38   /* If aijkok is not built yet or the nonzero pattern on CPU has changed, build aijkok from the scratch */
39   if (!aijkok || aijkok->nonzerostate != A->nonzerostate) {
40     delete aijkok;
41     aijkok               = new Mat_SeqAIJKokkos(nrows,ncols,nnz,aijseq->i,aijseq->j,aijseq->a);
42     aijkok->nonzerostate = A->nonzerostate;
43     A->spptr             = aijkok;
44   } else if (A->offloadmask == PETSC_OFFLOAD_CPU) { /* Copy values only */
45     Kokkos::deep_copy(aijkok->a_d,aijkok->a_h);
46   }
47   A->offloadmask = PETSC_OFFLOAD_BOTH;
48   PetscFunctionReturn(0);
49 }
50 
51 // MatSeqAIJKokkosSetDeviceMat takes a PetscSplitCSRDataStructure with device data and copies it to the device. Note, "deep_copy" here is really a shallow copy
52 PETSC_EXTERN PetscErrorCode MatSeqAIJKokkosSetDeviceMat(Mat A, PetscSplitCSRDataStructure *h_mat)
53 {
54   Mat_SeqAIJKokkos *aijkok;
55   Kokkos::View<PetscSplitCSRDataStructure, Kokkos::HostSpace> h_mat_k(h_mat);
56 
57   PetscFunctionBegin;
58   // ierr    = MatSeqAIJKokkosSyncDevice(A);CHKERRQ(ierr);
59   aijkok = static_cast<Mat_SeqAIJKokkos*>(A->spptr);
60   if (!aijkok) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_PLIB,"no Mat_SeqAIJKokkos");
61   aijkok->device_mat_d = create_mirror(DeviceMemorySpace(),h_mat_k);
62   Kokkos::deep_copy (aijkok->device_mat_d, h_mat_k);
63   PetscFunctionReturn(0);
64 }
65 
66 // MatSeqAIJKokkosGetDeviceMat gets the device if it is here, otherwise it creates a place for it and returns NULL
67 PETSC_EXTERN PetscErrorCode MatSeqAIJKokkosGetDeviceMat(Mat A, PetscSplitCSRDataStructure **d_mat)
68 {
69   Mat_SeqAIJKokkos *aijkok;
70 
71   PetscFunctionBegin;
72   aijkok = static_cast<Mat_SeqAIJKokkos*>(A->spptr);
73   if (aijkok && aijkok->device_mat_d.data()) {
74     *d_mat = aijkok->device_mat_d.data();
75   } else {
76     PetscErrorCode   ierr;
77     ierr    = MatSeqAIJKokkosSyncDevice(A);CHKERRQ(ierr); // create aijkok (we are making d_mat now so make a place for it)
78     *d_mat  = NULL;
79   }
80   PetscFunctionReturn(0);
81 }
82 
83 /* y = A x */
84 static PetscErrorCode MatMult_SeqAIJKokkos(Mat A,Vec xx,Vec yy)
85 {
86   PetscErrorCode                   ierr;
87   Mat_SeqAIJKokkos                 *aijkok;
88   ConstPetscScalarViewDevice_t     xv;
89   PetscScalarViewDevice_t          yv;
90 
91   PetscFunctionBegin;
92   ierr   = MatSeqAIJKokkosSyncDevice(A);CHKERRQ(ierr);
93   ierr   = VecKokkosGetDeviceViewRead(xx,&xv);CHKERRQ(ierr);
94   ierr   = VecKokkosGetDeviceView(yy,&yv);CHKERRQ(ierr);
95   aijkok = static_cast<Mat_SeqAIJKokkos*>(A->spptr);
96   KokkosSparse::spmv("N",1.0/*alpha*/,aijkok->csr,xv,0.0/*beta*/,yv); /* y = alpha A x + beta y */
97   ierr   = VecKokkosRestoreDeviceViewRead(xx,&xv);CHKERRQ(ierr);
98   ierr   = VecKokkosRestoreDeviceView(yy,&yv);CHKERRQ(ierr);
99   /* 2.0*aijkok->csr.nnz()-aijkok->csr.numRows() seems more accurate here but assumes there are no zero-rows. So a little sloopy here. */
100   ierr   = WaitForKokkos();CHKERRQ(ierr);
101   ierr   = PetscLogGpuFlops(2.0*aijkok->csr.nnz());CHKERRQ(ierr);
102   PetscFunctionReturn(0);
103 }
104 
105 /* y = A^T x */
106 static PetscErrorCode MatMultTranspose_SeqAIJKokkos(Mat A,Vec xx,Vec yy)
107 {
108   PetscErrorCode                   ierr;
109   Mat_SeqAIJKokkos                 *aijkok;
110   ConstPetscScalarViewDevice_t     xv;
111   PetscScalarViewDevice_t          yv;
112 
113   PetscFunctionBegin;
114   ierr = MatSeqAIJKokkosSyncDevice(A);CHKERRQ(ierr);
115   ierr = VecKokkosGetDeviceViewRead(xx,&xv);CHKERRQ(ierr);
116   ierr = VecKokkosGetDeviceView(yy,&yv);CHKERRQ(ierr);
117   aijkok = static_cast<Mat_SeqAIJKokkos*>(A->spptr);
118   KokkosSparse::spmv("T",1.0/*alpha*/,aijkok->csr,xv,0.0/*beta*/,yv); /* y = alpha A^T x + beta y */
119   ierr = VecKokkosRestoreDeviceViewRead(xx,&xv);CHKERRQ(ierr);
120   ierr = VecKokkosRestoreDeviceView(yy,&yv);CHKERRQ(ierr);
121   ierr = WaitForKokkos();CHKERRQ(ierr);
122   ierr = PetscLogGpuFlops(2.0*aijkok->csr.nnz());CHKERRQ(ierr);
123   PetscFunctionReturn(0);
124 }
125 
126 /* y = A^H x */
127 static PetscErrorCode MatMultHermitianTranspose_SeqAIJKokkos(Mat A,Vec xx,Vec yy)
128 {
129   PetscErrorCode                   ierr;
130   Mat_SeqAIJKokkos                 *aijkok;
131   ConstPetscScalarViewDevice_t     xv;
132   PetscScalarViewDevice_t          yv;
133 
134   PetscFunctionBegin;
135   ierr = MatSeqAIJKokkosSyncDevice(A);CHKERRQ(ierr);
136   ierr = VecKokkosGetDeviceViewRead(xx,&xv);CHKERRQ(ierr);
137   ierr = VecKokkosGetDeviceView(yy,&yv);CHKERRQ(ierr);
138   aijkok = static_cast<Mat_SeqAIJKokkos*>(A->spptr);
139   KokkosSparse::spmv("C",1.0/*alpha*/,aijkok->csr,xv,0.0/*beta*/,yv); /* y = alpha A^H x + beta y */
140   ierr = VecKokkosRestoreDeviceViewRead(xx,&xv);CHKERRQ(ierr);
141   ierr = VecKokkosRestoreDeviceView(yy,&yv);CHKERRQ(ierr);
142   ierr = WaitForKokkos();CHKERRQ(ierr);
143   ierr = PetscLogGpuFlops(2.0*aijkok->csr.nnz());CHKERRQ(ierr);
144   PetscFunctionReturn(0);
145 }
146 
147 /* z = A x + y */
148 static PetscErrorCode MatMultAdd_SeqAIJKokkos(Mat A,Vec xx,Vec yy, Vec zz)
149 {
150   PetscErrorCode                   ierr;
151   Mat_SeqAIJKokkos                 *aijkok;
152   ConstPetscScalarViewDevice_t     xv,yv;
153   PetscScalarViewDevice_t          zv;
154 
155   PetscFunctionBegin;
156   ierr = MatSeqAIJKokkosSyncDevice(A);CHKERRQ(ierr);
157   ierr = VecKokkosGetDeviceViewRead(xx,&xv);CHKERRQ(ierr);
158   ierr = VecKokkosGetDeviceViewRead(yy,&yv);CHKERRQ(ierr);
159   ierr = VecKokkosGetDeviceView(zz,&zv);CHKERRQ(ierr);
160   if (zz != yy) Kokkos::deep_copy(zv,yv);
161   aijkok = static_cast<Mat_SeqAIJKokkos*>(A->spptr);
162   KokkosSparse::spmv("N",1.0/*alpha*/,aijkok->csr,xv,1.0/*beta*/,zv); /* z = alpha A x + beta z */
163   ierr = VecKokkosRestoreDeviceViewRead(xx,&xv);CHKERRQ(ierr);
164   ierr = VecKokkosRestoreDeviceViewRead(yy,&yv);CHKERRQ(ierr);
165   ierr = VecKokkosRestoreDeviceView(zz,&zv);CHKERRQ(ierr);
166   ierr = WaitForKokkos();CHKERRQ(ierr);
167   ierr = PetscLogGpuFlops(2.0*aijkok->csr.nnz());CHKERRQ(ierr);
168   PetscFunctionReturn(0);
169 }
170 
171 /* z = A^T x + y */
172 static PetscErrorCode MatMultTransposeAdd_SeqAIJKokkos(Mat A,Vec xx,Vec yy,Vec zz)
173 {
174   PetscErrorCode                   ierr;
175   Mat_SeqAIJKokkos                 *aijkok;
176   ConstPetscScalarViewDevice_t     xv,yv;
177   PetscScalarViewDevice_t          zv;
178 
179   PetscFunctionBegin;
180   ierr = MatSeqAIJKokkosSyncDevice(A);CHKERRQ(ierr);
181   ierr = VecKokkosGetDeviceViewRead(xx,&xv);CHKERRQ(ierr);
182   ierr = VecKokkosGetDeviceViewRead(yy,&yv);CHKERRQ(ierr);
183   ierr = VecKokkosGetDeviceView(zz,&zv);CHKERRQ(ierr);
184   if (zz != yy) Kokkos::deep_copy(zv,yv);
185   aijkok = static_cast<Mat_SeqAIJKokkos*>(A->spptr);
186   KokkosSparse::spmv("T",1.0/*alpha*/,aijkok->csr,xv,1.0/*beta*/,zv); /* z = alpha A^T x + beta z */
187   ierr = VecKokkosRestoreDeviceViewRead(xx,&xv);CHKERRQ(ierr);
188   ierr = VecKokkosRestoreDeviceViewRead(yy,&yv);CHKERRQ(ierr);
189   ierr = VecKokkosRestoreDeviceView(zz,&zv);CHKERRQ(ierr);
190   ierr = WaitForKokkos();CHKERRQ(ierr);
191   ierr = PetscLogGpuFlops(2.0*aijkok->csr.nnz());CHKERRQ(ierr);
192   PetscFunctionReturn(0);
193 }
194 
195 /* z = A^H x + y */
196 static PetscErrorCode MatMultHermitianTransposeAdd_SeqAIJKokkos(Mat A,Vec xx,Vec yy,Vec zz)
197 {
198   PetscErrorCode                   ierr;
199   Mat_SeqAIJKokkos                 *aijkok;
200   ConstPetscScalarViewDevice_t     xv,yv;
201   PetscScalarViewDevice_t          zv;
202 
203   PetscFunctionBegin;
204   ierr = MatSeqAIJKokkosSyncDevice(A);CHKERRQ(ierr);
205   ierr = VecKokkosGetDeviceViewRead(xx,&xv);CHKERRQ(ierr);
206   ierr = VecKokkosGetDeviceViewRead(yy,&yv);CHKERRQ(ierr);
207   ierr = VecKokkosGetDeviceView(zz,&zv);CHKERRQ(ierr);
208   if (zz != yy) Kokkos::deep_copy(zv,yv);
209   aijkok = static_cast<Mat_SeqAIJKokkos*>(A->spptr);
210   KokkosSparse::spmv("C",1.0/*alpha*/,aijkok->csr,xv,1.0/*beta*/,zv); /* z = alpha A^H x + beta z */
211   ierr = VecKokkosRestoreDeviceViewRead(xx,&xv);CHKERRQ(ierr);
212   ierr = VecKokkosRestoreDeviceViewRead(yy,&yv);CHKERRQ(ierr);
213   ierr = VecKokkosRestoreDeviceView(zz,&zv);CHKERRQ(ierr);
214   ierr = WaitForKokkos();CHKERRQ(ierr);
215   ierr = PetscLogGpuFlops(2.0*aijkok->csr.nnz());CHKERRQ(ierr);
216   PetscFunctionReturn(0);
217 }
218 
219 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJKokkos(Mat A, MatType mtype, MatReuse reuse, Mat* newmat)
220 {
221   PetscErrorCode   ierr;
222   Mat              B;
223   Mat_SeqAIJ       *aij;
224 
225   PetscFunctionBegin;
226   if (reuse == MAT_INITIAL_MATRIX) { /* Build a new mat */
227     ierr = MatDuplicate(A,MAT_COPY_VALUES,newmat);CHKERRQ(ierr);
228   } else if (reuse == MAT_REUSE_MATRIX) { /* Reuse the mat created before */
229     ierr = MatCopy(A,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
230   }
231 
232   B    = *newmat;
233   ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr);
234   ierr = PetscStrallocpy(VECKOKKOS,&B->defaultvectype);CHKERRQ(ierr);
235   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJKOKKOS);CHKERRQ(ierr);
236   ierr = MatSetOps_SeqAIJKokkos(B);CHKERRQ(ierr);
237   /* TODO: see ViennaCL and CUSPARSE once we have a BindToCPU? */
238   aij  = (Mat_SeqAIJ*)B->data;
239   aij->inode.use = PETSC_FALSE;
240 
241   B->offloadmask = PETSC_OFFLOAD_CPU;
242   PetscFunctionReturn(0);
243 }
244 
245 static PetscErrorCode MatDuplicate_SeqAIJKokkos(Mat A,MatDuplicateOption cpvalues,Mat *B)
246 {
247   PetscErrorCode ierr;
248 
249   PetscFunctionBegin;
250   ierr = MatDuplicate_SeqAIJ(A,cpvalues,B);CHKERRQ(ierr);
251   ierr = MatConvert_SeqAIJ_SeqAIJKokkos(*B,MATSEQAIJKOKKOS,MAT_INPLACE_MATRIX,B);CHKERRQ(ierr);
252   PetscFunctionReturn(0);
253 }
254 
255 static PetscErrorCode MatDestroy_SeqAIJKokkos(Mat A)
256 {
257   PetscErrorCode        ierr;
258   Mat_SeqAIJKokkos      *aijkok = static_cast<Mat_SeqAIJKokkos*>(A->spptr);
259 
260   PetscFunctionBegin;
261   if (aijkok && aijkok->device_mat_d.data()) {
262     delete aijkok->colmap_d;
263     delete aijkok->i_uncompressed_d;
264   }
265   delete aijkok;
266   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
267   PetscFunctionReturn(0);
268 }
269 
270 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJKokkos(Mat A)
271 {
272   PetscErrorCode ierr;
273 
274   PetscFunctionBegin;
275   ierr = PetscKokkosInitializeCheck();CHKERRQ(ierr);
276   ierr = MatCreate_SeqAIJ(A);CHKERRQ(ierr);
277   ierr = MatConvert_SeqAIJ_SeqAIJKokkos(A,MATSEQAIJKOKKOS,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
278   PetscFunctionReturn(0);
279 }
280 
281 static PetscErrorCode MatSetValues_SeqAIJKokkos(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
282 {
283   PetscErrorCode    ierr;
284   Mat_SeqAIJKokkos  *aijkok = static_cast<Mat_SeqAIJKokkos*>(A->spptr);
285   PetscFunctionBegin;
286   if (aijkok && aijkok->device_mat_d.data()) {
287     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Mixing GPU and non-GPU assembly not supported");
288   }
289   ierr = MatSetValues_SeqAIJ(A,m,im,n,in,v,is);CHKERRQ(ierr);
290   PetscFunctionReturn(0);
291 }
292 
293 static PetscErrorCode MatZeroEntries_SeqAIJKokkos(Mat A)
294 {
295   PetscErrorCode             ierr;
296   PetscBool                  both = PETSC_FALSE;
297   Mat_SeqAIJKokkos           *aijkok = static_cast<Mat_SeqAIJKokkos*>(A->spptr);
298   Mat_SeqAIJ                 *a = (Mat_SeqAIJ*)A->data;
299 
300   PetscFunctionBegin;
301   if (aijkok && aijkok->a_d.data()) {
302     Kokkos::parallel_for (aijkok->a_d.size(), KOKKOS_LAMBDA (int i) { aijkok->a_d(i) = 0; });
303     both = PETSC_TRUE;
304   }
305   ierr = PetscArrayzero(a->a,a->i[A->rmap->n]);CHKERRQ(ierr);
306   ierr = MatSeqAIJInvalidateDiagonal(A);CHKERRQ(ierr);
307   if (both) A->offloadmask = PETSC_OFFLOAD_BOTH;
308   else A->offloadmask = PETSC_OFFLOAD_CPU;
309 
310   PetscFunctionReturn(0);
311 }
312 
313 static PetscErrorCode MatAXPY_SeqAIJKokkos(Mat Y,PetscScalar a,Mat X,MatStructure str) // put axpy in aijcusparse, etc.
314 {
315   PetscErrorCode ierr;
316   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data,*y = (Mat_SeqAIJ*)Y->data;
317   PetscBool      flgx,flgy;
318 
319   PetscFunctionBegin;
320   if (a == (PetscScalar)0.0) PetscFunctionReturn(0);
321   PetscValidHeaderSpecific(Y,MAT_CLASSID,1);
322   PetscValidHeaderSpecific(X,MAT_CLASSID,3);
323   ierr = PetscObjectTypeCompare((PetscObject)Y,MATSEQAIJKOKKOS,&flgy);CHKERRQ(ierr);
324   ierr = PetscObjectTypeCompare((PetscObject)X,MATSEQAIJKOKKOS,&flgx);CHKERRQ(ierr);
325   if (!flgx || !flgy) {
326     ierr = MatAXPY_SeqAIJ( Y, a, X, str);CHKERRQ(ierr);
327     PetscFunctionReturn(0);
328   }
329   if (str == DIFFERENT_NONZERO_PATTERN) {
330     if (x->nz == y->nz) {
331       PetscBool e;
332       ierr = PetscArraycmp(x->i,y->i,Y->rmap->n+1,&e);CHKERRQ(ierr);
333       if (e) {
334         ierr = PetscArraycmp(x->j,y->j,y->nz,&e);CHKERRQ(ierr);
335         if (e) {
336           str = SAME_NONZERO_PATTERN;
337         }
338       }
339     }
340   }
341   if (str != SAME_NONZERO_PATTERN) {
342     ierr = MatAXPY_SeqAIJ( Y, a, X, str);CHKERRQ(ierr);
343     PetscFunctionReturn(0);
344   } else {
345     if (Y->offloadmask==PETSC_OFFLOAD_CPU && X->offloadmask==PETSC_OFFLOAD_CPU) {
346       ierr = MatAXPY_SeqAIJ( Y, a, X, str);CHKERRQ(ierr);
347       PetscFunctionReturn(0);
348     } else if ((Y->offloadmask==PETSC_OFFLOAD_GPU || Y->offloadmask==PETSC_OFFLOAD_BOTH) && X->offloadmask == PETSC_OFFLOAD_CPU) {
349       ierr    = MatSeqAIJKokkosSyncDevice(X);CHKERRQ(ierr);
350     } else if ((X->offloadmask==PETSC_OFFLOAD_GPU || X->offloadmask==PETSC_OFFLOAD_BOTH) && Y->offloadmask == PETSC_OFFLOAD_CPU) {
351       ierr    = MatSeqAIJKokkosSyncDevice(Y);CHKERRQ(ierr); /* promote Y */
352     }
353     {
354       Mat_SeqAIJKokkos *aijkokY = static_cast<Mat_SeqAIJKokkos*>(Y->spptr);
355       Mat_SeqAIJKokkos *aijkokX = static_cast<Mat_SeqAIJKokkos*>(X->spptr);
356       if (aijkokY && aijkokX && aijkokY->a_d.data() && aijkokX->a_d.data()) {
357         Kokkos::parallel_for (aijkokY->a_d.size(), KOKKOS_LAMBDA (int i) { aijkokY->a_d(i) += a*aijkokX->a_d(i); });
358         Kokkos::deep_copy(aijkokY->a_h,aijkokY->a_d); // we could not copy and keep GPU
359         Y->offloadmask = PETSC_OFFLOAD_BOTH;
360         ierr = PetscLogGpuFlops(2.0*aijkokY->a_d.size());CHKERRQ(ierr);
361       } else SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_PLIB,"no Mat_SeqAIJKokkos ???");
362     }
363   }
364   PetscFunctionReturn(0);
365 }
366 
367 static PetscErrorCode MatSetOps_SeqAIJKokkos(Mat A)
368 {
369   PetscFunctionBegin;
370   A->ops->setvalues                 = MatSetValues_SeqAIJKokkos; /* protect with DEBUG, but MatSeqAIJSetTotalPreallocation defeats this ??? */
371   A->ops->assemblyend               = MatAssemblyEnd_SeqAIJKokkos;
372   A->ops->destroy                   = MatDestroy_SeqAIJKokkos;
373   A->ops->duplicate                 = MatDuplicate_SeqAIJKokkos;
374 
375   A->ops->axpy                      = MatAXPY_SeqAIJKokkos;
376   A->ops->zeroentries               = MatZeroEntries_SeqAIJKokkos;
377   A->ops->mult                      = MatMult_SeqAIJKokkos;
378   A->ops->multadd                   = MatMultAdd_SeqAIJKokkos;
379   A->ops->multtranspose             = MatMultTranspose_SeqAIJKokkos;
380   A->ops->multtransposeadd          = MatMultTransposeAdd_SeqAIJKokkos;
381   A->ops->multhermitiantranspose    = MatMultHermitianTranspose_SeqAIJKokkos;
382   A->ops->multhermitiantransposeadd = MatMultHermitianTransposeAdd_SeqAIJKokkos;
383   PetscFunctionReturn(0);
384 }
385 
386 /* --------------------------------------------------------------------------------*/
387 /*C
388    MatCreateSeqAIJKokkos - Creates a sparse matrix in AIJ (compressed row) format
389    (the default parallel PETSc format). This matrix will ultimately be handled by
390    Kokkos for calculations. For good matrix
391    assembly performance the user should preallocate the matrix storage by setting
392    the parameter nz (or the array nnz).  By setting these parameters accurately,
393    performance during matrix assembly can be increased by more than a factor of 50.
394 
395    Collective
396 
397    Input Parameters:
398 +  comm - MPI communicator, set to PETSC_COMM_SELF
399 .  m - number of rows
400 .  n - number of columns
401 .  nz - number of nonzeros per row (same for all rows)
402 -  nnz - array containing the number of nonzeros in the various rows
403          (possibly different for each row) or NULL
404 
405    Output Parameter:
406 .  A - the matrix
407 
408    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
409    MatXXXXSetPreallocation() paradgm instead of this routine directly.
410    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
411 
412    Notes:
413    If nnz is given then nz is ignored
414 
415    The AIJ format (also called the Yale sparse matrix format or
416    compressed row storage), is fully compatible with standard Fortran 77
417    storage.  That is, the stored row and column indices can begin at
418    either one (as in Fortran) or zero.  See the users' manual for details.
419 
420    Specify the preallocated storage with either nz or nnz (not both).
421    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
422    allocation.  For large problems you MUST preallocate memory or you
423    will get TERRIBLE performance, see the users' manual chapter on matrices.
424 
425    By default, this format uses inodes (identical nodes) when possible, to
426    improve numerical efficiency of matrix-vector products and solves. We
427    search for consecutive rows with the same nonzero structure, thereby
428    reusing matrix information to achieve increased efficiency.
429 
430    Level: intermediate
431 
432 .seealso: MatCreate(), MatCreateAIJ(), MatSetValues(), MatSeqAIJSetColumnIndices(), MatCreateSeqAIJWithArrays(), MatCreateAIJ(), MATSeqAIJKokkos, MATAIJKOKKOS
433 @*/
434 PetscErrorCode  MatCreateSeqAIJKokkos(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
435 {
436   PetscErrorCode ierr;
437 
438   PetscFunctionBegin;
439   ierr = PetscKokkosInitializeCheck();CHKERRQ(ierr);
440   ierr = MatCreate(comm,A);CHKERRQ(ierr);
441   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
442   ierr = MatSetType(*A,MATSEQAIJKOKKOS);CHKERRQ(ierr);
443   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,(PetscInt*)nnz);CHKERRQ(ierr);
444   PetscFunctionReturn(0);
445 }
446