xref: /petsc/src/mat/impls/aij/seq/kokkos/aijkok.kokkos.cxx (revision da9060c4ebf05b549ae65df4db64c7bee7b2b8a9)
1 #include "petsc/private/petscimpl.h"
2 #include <petscsystypes.h>
3 #include <petscerror.h>
4 #include <petscvec.hpp>
5 
6 #include <Kokkos_Core.hpp>
7 #include <KokkosSparse_CrsMatrix.hpp>
8 #include <KokkosSparse_spmv.hpp>
9 #include <KokkosSparse_spmv.hpp>
10 #include <../src/mat/impls/aij/seq/aij.h>
11 
12 #include <../src/mat/impls/aij/seq/kokkos/aijkokkosimpl.hpp>
13 
14 static PetscErrorCode MatSetOps_SeqAIJKokkos(Mat); /* Forward declaration */
15 
16 static PetscErrorCode MatAssemblyEnd_SeqAIJKokkos(Mat A,MatAssemblyType mode)
17 {
18   PetscErrorCode ierr;
19 
20   PetscFunctionBegin;
21   ierr = MatAssemblyEnd_SeqAIJ(A,mode);CHKERRQ(ierr);
22   A->offloadmask = PETSC_OFFLOAD_CPU;
23   /* Don't build (or update) the Mat_SeqAIJKokkos struct. We delay it to the very last moment until we need it. */
24   PetscFunctionReturn(0);
25 }
26 
27 static PetscErrorCode MatSeqAIJKokkosSyncDevice(Mat A)
28 {
29   Mat_SeqAIJ                *aijseq = (Mat_SeqAIJ*)A->data;
30   Mat_SeqAIJKokkos          *aijkok = static_cast<Mat_SeqAIJKokkos*>(A->spptr);
31   PetscInt                  nrows   = A->rmap->n,ncols = A->cmap->n,nnz = aijseq->nz;
32 
33   PetscFunctionBegin;
34   /* If aijkok is not built yet or the nonzero pattern on CPU has changed, build aijkok from the scratch */
35   if (!aijkok || aijkok->nonzerostate != A->nonzerostate) {
36     delete aijkok;
37     aijkok               = new Mat_SeqAIJKokkos(nrows,ncols,nnz,aijseq->i,aijseq->j,aijseq->a);
38     aijkok->nonzerostate = A->nonzerostate;
39     A->spptr             = aijkok;
40   } else if (A->offloadmask == PETSC_OFFLOAD_CPU) { /* Copy values only */
41     Kokkos::deep_copy(aijkok->a_d,aijkok->a_h);
42   }
43   A->offloadmask = PETSC_OFFLOAD_BOTH;
44   PetscFunctionReturn(0);
45 }
46 
47 /* y = A x */
48 static PetscErrorCode MatMult_SeqAIJKokkos(Mat A,Vec xx,Vec yy)
49 {
50   PetscErrorCode                   ierr;
51   Mat_SeqAIJKokkos                 *aijkok;
52   ConstPetscScalarViewDevice_t     xv;
53   PetscScalarViewDevice_t          yv;
54 
55   PetscFunctionBegin;
56   ierr   = MatSeqAIJKokkosSyncDevice(A);CHKERRQ(ierr);
57   ierr   = VecKokkosGetDeviceViewRead(xx,&xv);CHKERRQ(ierr);
58   ierr   = VecKokkosGetDeviceView(yy,&yv);CHKERRQ(ierr);
59   aijkok = static_cast<Mat_SeqAIJKokkos*>(A->spptr);
60   KokkosSparse::spmv("N",1.0/*alpha*/,aijkok->csr,xv,0.0/*beta*/,yv); /* y = alpha A x + beta y */
61   ierr   = VecKokkosRestoreDeviceViewRead(xx,&xv);CHKERRQ(ierr);
62   ierr   = VecKokkosRestoreDeviceView(yy,&yv);CHKERRQ(ierr);
63   PetscFunctionReturn(0);
64 }
65 
66 /* y = A^T x */
67 static PetscErrorCode MatMultTranspose_SeqAIJKokkos(Mat A,Vec xx,Vec yy)
68 {
69   PetscErrorCode                   ierr;
70   Mat_SeqAIJKokkos                 *aijkok;
71   ConstPetscScalarViewDevice_t     xv;
72   PetscScalarViewDevice_t          yv;
73 
74   PetscFunctionBegin;
75   ierr = MatSeqAIJKokkosSyncDevice(A);CHKERRQ(ierr);
76   ierr = VecKokkosGetDeviceViewRead(xx,&xv);CHKERRQ(ierr);
77   ierr = VecKokkosGetDeviceView(yy,&yv);CHKERRQ(ierr);
78   aijkok = static_cast<Mat_SeqAIJKokkos*>(A->spptr);
79   KokkosSparse::spmv("T",1.0/*alpha*/,aijkok->csr,xv,0.0/*beta*/,yv); /* y = alpha A^T x + beta y */
80   ierr = VecKokkosRestoreDeviceViewRead(xx,&xv);CHKERRQ(ierr);
81   ierr = VecKokkosRestoreDeviceView(yy,&yv);CHKERRQ(ierr);
82   PetscFunctionReturn(0);
83 }
84 
85 /* y = A^H x */
86 static PetscErrorCode MatMultHermitianTranspose_SeqAIJKokkos(Mat A,Vec xx,Vec yy)
87 {
88   PetscErrorCode                   ierr;
89   Mat_SeqAIJKokkos                 *aijkok;
90   ConstPetscScalarViewDevice_t     xv;
91   PetscScalarViewDevice_t          yv;
92 
93   PetscFunctionBegin;
94   ierr = MatSeqAIJKokkosSyncDevice(A);CHKERRQ(ierr);
95   ierr = VecKokkosGetDeviceViewRead(xx,&xv);CHKERRQ(ierr);
96   ierr = VecKokkosGetDeviceView(yy,&yv);CHKERRQ(ierr);
97   aijkok = static_cast<Mat_SeqAIJKokkos*>(A->spptr);
98   KokkosSparse::spmv("C",1.0/*alpha*/,aijkok->csr,xv,0.0/*beta*/,yv); /* y = alpha A^H x + beta y */
99   ierr = VecKokkosRestoreDeviceViewRead(xx,&xv);CHKERRQ(ierr);
100   ierr = VecKokkosRestoreDeviceView(yy,&yv);CHKERRQ(ierr);
101   PetscFunctionReturn(0);
102 }
103 
104 /* z = A x + y */
105 static PetscErrorCode MatMultAdd_SeqAIJKokkos(Mat A,Vec xx,Vec yy, Vec zz)
106 {
107   PetscErrorCode                   ierr;
108   Mat_SeqAIJKokkos                 *aijkok;
109   ConstPetscScalarViewDevice_t     xv,yv;
110   PetscScalarViewDevice_t          zv;
111 
112   PetscFunctionBegin;
113   ierr = MatSeqAIJKokkosSyncDevice(A);CHKERRQ(ierr);
114   ierr = VecKokkosGetDeviceViewRead(xx,&xv);CHKERRQ(ierr);
115   ierr = VecKokkosGetDeviceViewRead(yy,&yv);CHKERRQ(ierr);
116   ierr = VecKokkosGetDeviceView(zz,&zv);CHKERRQ(ierr);
117   if (zz != yy) Kokkos::deep_copy(zv,yv);
118   aijkok = static_cast<Mat_SeqAIJKokkos*>(A->spptr);
119   KokkosSparse::spmv("N",1.0/*alpha*/,aijkok->csr,xv,1.0/*beta*/,zv); /* z = alpha A x + beta z */
120   ierr = VecKokkosRestoreDeviceViewRead(xx,&xv);CHKERRQ(ierr);
121   ierr = VecKokkosRestoreDeviceViewRead(yy,&yv);CHKERRQ(ierr);
122   ierr = VecKokkosRestoreDeviceView(zz,&zv);CHKERRQ(ierr);
123   PetscFunctionReturn(0);
124 }
125 
126 /* z = A^T x + y */
127 static PetscErrorCode MatMultTransposeAdd_SeqAIJKokkos(Mat A,Vec xx,Vec yy,Vec zz)
128 {
129   PetscErrorCode                   ierr;
130   Mat_SeqAIJKokkos                 *aijkok;
131   ConstPetscScalarViewDevice_t     xv,yv;
132   PetscScalarViewDevice_t          zv;
133 
134   PetscFunctionBegin;
135   ierr = MatSeqAIJKokkosSyncDevice(A);CHKERRQ(ierr);
136   ierr = VecKokkosGetDeviceViewRead(xx,&xv);CHKERRQ(ierr);
137   ierr = VecKokkosGetDeviceViewRead(yy,&yv);CHKERRQ(ierr);
138   ierr = VecKokkosGetDeviceView(zz,&zv);CHKERRQ(ierr);
139   if (zz != yy) Kokkos::deep_copy(zv,yv);
140   aijkok = static_cast<Mat_SeqAIJKokkos*>(A->spptr);
141   KokkosSparse::spmv("T",1.0/*alpha*/,aijkok->csr,xv,1.0/*beta*/,zv); /* z = alpha A^T x + beta z */
142   ierr = VecKokkosRestoreDeviceViewRead(xx,&xv);CHKERRQ(ierr);
143   ierr = VecKokkosRestoreDeviceViewRead(yy,&yv);CHKERRQ(ierr);
144   ierr = VecKokkosRestoreDeviceView(zz,&zv);CHKERRQ(ierr);
145   PetscFunctionReturn(0);
146 }
147 
148 /* z = A^H x + y */
149 static PetscErrorCode MatMultHermitianTransposeAdd_SeqAIJKokkos(Mat A,Vec xx,Vec yy,Vec zz)
150 {
151   PetscErrorCode                   ierr;
152   Mat_SeqAIJKokkos                 *aijkok;
153   ConstPetscScalarViewDevice_t     xv,yv;
154   PetscScalarViewDevice_t          zv;
155 
156   PetscFunctionBegin;
157   ierr = MatSeqAIJKokkosSyncDevice(A);CHKERRQ(ierr);
158   ierr = VecKokkosGetDeviceViewRead(xx,&xv);CHKERRQ(ierr);
159   ierr = VecKokkosGetDeviceViewRead(yy,&yv);CHKERRQ(ierr);
160   ierr = VecKokkosGetDeviceView(zz,&zv);CHKERRQ(ierr);
161   if (zz != yy) Kokkos::deep_copy(zv,yv);
162   aijkok = static_cast<Mat_SeqAIJKokkos*>(A->spptr);
163   KokkosSparse::spmv("C",1.0/*alpha*/,aijkok->csr,xv,1.0/*beta*/,zv); /* z = alpha A^H x + beta z */
164   ierr = VecKokkosRestoreDeviceViewRead(xx,&xv);CHKERRQ(ierr);
165   ierr = VecKokkosRestoreDeviceViewRead(yy,&yv);CHKERRQ(ierr);
166   ierr = VecKokkosRestoreDeviceView(zz,&zv);CHKERRQ(ierr);
167   PetscFunctionReturn(0);
168 }
169 
170 PetscErrorCode MatConvert_SeqAIJ_SeqAIJKokkos(Mat A, MatType mtype, MatReuse reuse, Mat* newmat)
171 {
172   PetscErrorCode   ierr;
173   Mat              B;
174 
175   PetscFunctionBegin;
176   if (reuse == MAT_INITIAL_MATRIX) { /* Build a new mat */
177     ierr = MatDuplicate(A,MAT_COPY_VALUES,newmat);CHKERRQ(ierr);
178   } else if (reuse == MAT_REUSE_MATRIX) { /* Reuse the mat created before */
179     ierr = MatCopy(A,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
180   }
181 
182   B    = *newmat;
183   ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr);
184   ierr = PetscStrallocpy(VECKOKKOS,&B->defaultvectype);CHKERRQ(ierr);
185   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJKOKKOS);CHKERRQ(ierr);
186   ierr = MatSetOps_SeqAIJKokkos(A);CHKERRQ(ierr);
187 
188   B->offloadmask = PETSC_OFFLOAD_CPU;
189   PetscFunctionReturn(0);
190 }
191 
192 static PetscErrorCode MatDuplicate_SeqAIJKokkos(Mat A,MatDuplicateOption cpvalues,Mat *B)
193 {
194   PetscErrorCode ierr;
195 
196   PetscFunctionBegin;
197   ierr = MatDuplicate_SeqAIJ(A,cpvalues,B);CHKERRQ(ierr);
198   ierr = MatConvert_SeqAIJ_SeqAIJKokkos(*B,MATSEQAIJKOKKOS,MAT_INPLACE_MATRIX,B);CHKERRQ(ierr);
199   PetscFunctionReturn(0);
200 }
201 
202 static PetscErrorCode MatDestroy_SeqAIJKokkos(Mat A)
203 {
204   PetscErrorCode        ierr;
205   Mat_SeqAIJKokkos      *aijkok = static_cast<Mat_SeqAIJKokkos*>(A->spptr);
206 
207   PetscFunctionBegin;
208   delete aijkok;
209   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
210   PetscFunctionReturn(0);
211 }
212 
213 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJKokkos(Mat A)
214 {
215   PetscErrorCode ierr;
216 
217   PetscFunctionBegin;
218   ierr = PetscKokkosInitializeCheck();CHKERRQ(ierr);
219   ierr = MatCreate_SeqAIJ(A);CHKERRQ(ierr);
220   ierr = MatConvert_SeqAIJ_SeqAIJKokkos(A,MATSEQAIJKOKKOS,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
221   PetscFunctionReturn(0);
222 }
223 
224 static PetscErrorCode MatSetOps_SeqAIJKokkos(Mat A)
225 {
226   PetscFunctionBegin;
227   A->ops->assemblyend               = MatAssemblyEnd_SeqAIJKokkos;
228   A->ops->destroy                   = MatDestroy_SeqAIJKokkos;
229   A->ops->duplicate                 = MatDuplicate_SeqAIJKokkos;
230 
231   A->ops->mult                      = MatMult_SeqAIJKokkos;
232   A->ops->multadd                   = MatMultAdd_SeqAIJKokkos;
233   A->ops->multtranspose             = MatMultTranspose_SeqAIJKokkos;
234   A->ops->multtransposeadd          = MatMultTransposeAdd_SeqAIJKokkos;
235   A->ops->multhermitiantranspose    = MatMultHermitianTranspose_SeqAIJKokkos;
236   A->ops->multhermitiantransposeadd = MatMultHermitianTransposeAdd_SeqAIJKokkos;
237   PetscFunctionReturn(0);
238 }
239 
240 /* --------------------------------------------------------------------------------*/
241 /*C
242    MatCreateSeqAIJKokkos - Creates a sparse matrix in AIJ (compressed row) format
243    (the default parallel PETSc format). This matrix will ultimately be handled by
244    Kokkos for calculations. For good matrix
245    assembly performance the user should preallocate the matrix storage by setting
246    the parameter nz (or the array nnz).  By setting these parameters accurately,
247    performance during matrix assembly can be increased by more than a factor of 50.
248 
249    Collective
250 
251    Input Parameters:
252 +  comm - MPI communicator, set to PETSC_COMM_SELF
253 .  m - number of rows
254 .  n - number of columns
255 .  nz - number of nonzeros per row (same for all rows)
256 -  nnz - array containing the number of nonzeros in the various rows
257          (possibly different for each row) or NULL
258 
259    Output Parameter:
260 .  A - the matrix
261 
262    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
263    MatXXXXSetPreallocation() paradgm instead of this routine directly.
264    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
265 
266    Notes:
267    If nnz is given then nz is ignored
268 
269    The AIJ format (also called the Yale sparse matrix format or
270    compressed row storage), is fully compatible with standard Fortran 77
271    storage.  That is, the stored row and column indices can begin at
272    either one (as in Fortran) or zero.  See the users' manual for details.
273 
274    Specify the preallocated storage with either nz or nnz (not both).
275    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
276    allocation.  For large problems you MUST preallocate memory or you
277    will get TERRIBLE performance, see the users' manual chapter on matrices.
278 
279    By default, this format uses inodes (identical nodes) when possible, to
280    improve numerical efficiency of matrix-vector products and solves. We
281    search for consecutive rows with the same nonzero structure, thereby
282    reusing matrix information to achieve increased efficiency.
283 
284    Level: intermediate
285 
286 .seealso: MatCreate(), MatCreateAIJ(), MatSetValues(), MatSeqAIJSetColumnIndices(), MatCreateSeqAIJWithArrays(), MatCreateAIJ(), MATSeqAIJKokkos, MATAIJKOKKOS
287 @*/
288 PetscErrorCode  MatCreateSeqAIJKokkos(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
289 {
290   PetscErrorCode ierr;
291 
292   PetscFunctionBegin;
293   ierr = PetscKokkosInitializeCheck();CHKERRQ(ierr);
294   ierr = MatCreate(comm,A);CHKERRQ(ierr);
295   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
296   ierr = MatSetType(*A,MATSEQAIJKOKKOS);CHKERRQ(ierr);
297   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,(PetscInt*)nnz);CHKERRQ(ierr);
298   PetscFunctionReturn(0);
299 }
300