xref: /petsc/src/mat/impls/aij/seq/seqviennacl/aijviennacl.cxx (revision bcd4bb4a4158aa96f212e9537e87b40407faf83e)
1 /*
2     Defines the basic matrix operations for the AIJ (compressed row)
3   matrix storage format.
4 */
5 
6 #include <petscconf.h>
7 #define PETSC_SKIP_IMMINTRIN_H_CUDAWORKAROUND 1
8 #include <../src/mat/impls/aij/seq/aij.h> /*I "petscmat.h" I*/
9 #include <petscbt.h>
10 #include <../src/vec/vec/impls/dvecimpl.h>
11 #include <petsc/private/vecimpl.h>
12 
13 #include <../src/mat/impls/aij/seq/seqviennacl/viennaclmatimpl.h>
14 
15 #include <algorithm>
16 #include <vector>
17 #include <string>
18 
19 #include "viennacl/linalg/prod.hpp"
20 
21 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJViennaCL(Mat A, MatType type, MatReuse reuse, Mat *newmat);
22 PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat, MatFactorType, Mat *);
23 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqDense(Mat);
24 
25 PetscErrorCode MatViennaCLCopyToGPU(Mat A)
26 {
27   Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL *)A->spptr;
28   Mat_SeqAIJ         *a              = (Mat_SeqAIJ *)A->data;
29 
30   PetscFunctionBegin;
31   if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) { //some OpenCL SDKs have issues with buffers of size 0
32     if (A->offloadmask == PETSC_OFFLOAD_UNALLOCATED || A->offloadmask == PETSC_OFFLOAD_CPU) {
33       PetscCall(PetscLogEventBegin(MAT_ViennaCLCopyToGPU, A, 0, 0, 0));
34 
35       try {
36         if (a->compressedrow.use) {
37           if (!viennaclstruct->compressed_mat) viennaclstruct->compressed_mat = new ViennaCLCompressedAIJMatrix();
38 
39           // Since PetscInt is different from cl_uint, we have to convert:
40           viennacl::backend::mem_handle dummy;
41 
42           viennacl::backend::typesafe_host_array<unsigned int> row_buffer;
43           row_buffer.raw_resize(dummy, a->compressedrow.nrows + 1);
44           for (PetscInt i = 0; i <= a->compressedrow.nrows; ++i) row_buffer.set(i, (a->compressedrow.i)[i]);
45 
46           viennacl::backend::typesafe_host_array<unsigned int> row_indices;
47           row_indices.raw_resize(dummy, a->compressedrow.nrows);
48           for (PetscInt i = 0; i < a->compressedrow.nrows; ++i) row_indices.set(i, (a->compressedrow.rindex)[i]);
49 
50           viennacl::backend::typesafe_host_array<unsigned int> col_buffer;
51           col_buffer.raw_resize(dummy, a->nz);
52           for (PetscInt i = 0; i < a->nz; ++i) col_buffer.set(i, (a->j)[i]);
53 
54           viennaclstruct->compressed_mat->set(row_buffer.get(), row_indices.get(), col_buffer.get(), a->a, A->rmap->n, A->cmap->n, a->compressedrow.nrows, a->nz);
55           PetscCall(PetscLogCpuToGpu(((2 * a->compressedrow.nrows) + 1 + a->nz) * sizeof(PetscInt) + (a->nz) * sizeof(PetscScalar)));
56         } else {
57           if (!viennaclstruct->mat) viennaclstruct->mat = new ViennaCLAIJMatrix();
58 
59           // Since PetscInt is in general different from cl_uint, we have to convert:
60           viennacl::backend::mem_handle dummy;
61 
62           viennacl::backend::typesafe_host_array<unsigned int> row_buffer;
63           row_buffer.raw_resize(dummy, A->rmap->n + 1);
64           for (PetscInt i = 0; i <= A->rmap->n; ++i) row_buffer.set(i, (a->i)[i]);
65 
66           viennacl::backend::typesafe_host_array<unsigned int> col_buffer;
67           col_buffer.raw_resize(dummy, a->nz);
68           for (PetscInt i = 0; i < a->nz; ++i) col_buffer.set(i, (a->j)[i]);
69 
70           viennaclstruct->mat->set(row_buffer.get(), col_buffer.get(), a->a, A->rmap->n, A->cmap->n, a->nz);
71           PetscCall(PetscLogCpuToGpu(((A->rmap->n + 1) + a->nz) * sizeof(PetscInt) + (a->nz) * sizeof(PetscScalar)));
72         }
73         ViennaCLWaitForGPU();
74       } catch (std::exception const &ex) {
75         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
76       }
77 
78       // Create temporary vector for v += A*x:
79       if (viennaclstruct->tempvec) {
80         if (viennaclstruct->tempvec->size() != static_cast<std::size_t>(A->rmap->n)) {
81           delete (ViennaCLVector *)viennaclstruct->tempvec;
82           viennaclstruct->tempvec = new ViennaCLVector(A->rmap->n);
83         } else {
84           viennaclstruct->tempvec->clear();
85         }
86       } else {
87         viennaclstruct->tempvec = new ViennaCLVector(A->rmap->n);
88       }
89 
90       A->offloadmask = PETSC_OFFLOAD_BOTH;
91 
92       PetscCall(PetscLogEventEnd(MAT_ViennaCLCopyToGPU, A, 0, 0, 0));
93     }
94   }
95   PetscFunctionReturn(PETSC_SUCCESS);
96 }
97 
98 PetscErrorCode MatViennaCLCopyFromGPU(Mat A, const ViennaCLAIJMatrix *Agpu)
99 {
100   Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL *)A->spptr;
101   Mat_SeqAIJ         *a              = (Mat_SeqAIJ *)A->data;
102   PetscInt            m              = A->rmap->n;
103 
104   PetscFunctionBegin;
105   if (A->offloadmask == PETSC_OFFLOAD_BOTH) PetscFunctionReturn(PETSC_SUCCESS);
106   if (A->offloadmask == PETSC_OFFLOAD_UNALLOCATED && Agpu) {
107     try {
108       PetscCheck(!a->compressedrow.use, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ViennaCL: Cannot handle row compression for GPU matrices");
109       PetscCheck((PetscInt)Agpu->size1() == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "GPU matrix has %lu rows, should be %" PetscInt_FMT, Agpu->size1(), m);
110       a->nz           = Agpu->nnz();
111       a->maxnz        = a->nz; /* Since we allocate exactly the right amount */
112       A->preallocated = PETSC_TRUE;
113       if (a->free_a) PetscCall(PetscShmgetDeallocateArray((void **)a->a));
114       if (a->free_ij) PetscCall(PetscShmgetDeallocateArray((void **)a->j));
115       if (a->free_ij) PetscCall(PetscShmgetDeallocateArray((void **)a->i));
116       PetscCall(PetscShmgetAllocateArray(a->nz, sizeof(PetscScalar), (void **)&a->a));
117       PetscCall(PetscShmgetAllocateArray(a->nz, sizeof(PetscInt), (void **)&a->j));
118       PetscCall(PetscShmgetAllocateArray(m + 1, sizeof(PetscInt), (void **)&a->i));
119       a->free_a  = PETSC_TRUE;
120       a->free_ij = PETSC_TRUE;
121 
122       /* Setup row lengths */
123       PetscCall(PetscFree(a->imax));
124       PetscCall(PetscFree(a->ilen));
125       PetscCall(PetscMalloc1(m, &a->imax));
126       PetscCall(PetscMalloc1(m, &a->ilen));
127 
128       /* Copy data back from GPU */
129       viennacl::backend::typesafe_host_array<unsigned int> row_buffer;
130       row_buffer.raw_resize(Agpu->handle1(), Agpu->size1() + 1);
131 
132       // copy row array
133       viennacl::backend::memory_read(Agpu->handle1(), 0, row_buffer.raw_size(), row_buffer.get());
134       (a->i)[0] = row_buffer[0];
135       for (PetscInt i = 0; i < (PetscInt)Agpu->size1(); ++i) {
136         (a->i)[i + 1] = row_buffer[i + 1];
137         a->imax[i] = a->ilen[i] = a->i[i + 1] - a->i[i]; //Set imax[] and ilen[] arrays at the same time as i[] for better cache reuse
138       }
139 
140       // copy column indices
141       viennacl::backend::typesafe_host_array<unsigned int> col_buffer;
142       col_buffer.raw_resize(Agpu->handle2(), Agpu->nnz());
143       viennacl::backend::memory_read(Agpu->handle2(), 0, col_buffer.raw_size(), col_buffer.get());
144       for (PetscInt i = 0; i < (PetscInt)Agpu->nnz(); ++i) (a->j)[i] = col_buffer[i];
145 
146       // copy nonzero entries directly to destination (no conversion required)
147       viennacl::backend::memory_read(Agpu->handle(), 0, sizeof(PetscScalar) * Agpu->nnz(), a->a);
148 
149       PetscCall(PetscLogGpuToCpu(row_buffer.raw_size() + col_buffer.raw_size() + (Agpu->nnz() * sizeof(PetscScalar))));
150       ViennaCLWaitForGPU();
151     } catch (std::exception const &ex) {
152       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
153     }
154   } else if (A->offloadmask == PETSC_OFFLOAD_UNALLOCATED) {
155     PetscFunctionReturn(PETSC_SUCCESS);
156   } else {
157     if (!Agpu && A->offloadmask != PETSC_OFFLOAD_GPU) PetscFunctionReturn(PETSC_SUCCESS);
158 
159     PetscCheck(!a->compressedrow.use, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ViennaCL: Cannot handle row compression for GPU matrices");
160     if (!Agpu) {
161       viennacl::backend::memory_read(viennaclstruct->mat->handle(), 0, sizeof(PetscScalar) * viennaclstruct->mat->nnz(), a->a);
162     } else {
163       viennacl::backend::memory_read(Agpu->handle(), 0, sizeof(PetscScalar) * Agpu->nnz(), a->a);
164     }
165   }
166   A->offloadmask = PETSC_OFFLOAD_BOTH;
167   /* This assembly prevents resetting the flag to PETSC_OFFLOAD_CPU and recopying */
168   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
169   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
170   PetscFunctionReturn(PETSC_SUCCESS);
171 }
172 
173 static PetscErrorCode MatMult_SeqAIJViennaCL(Mat A, Vec xx, Vec yy)
174 {
175   Mat_SeqAIJ           *a              = (Mat_SeqAIJ *)A->data;
176   Mat_SeqAIJViennaCL   *viennaclstruct = (Mat_SeqAIJViennaCL *)A->spptr;
177   const ViennaCLVector *xgpu           = NULL;
178   ViennaCLVector       *ygpu           = NULL;
179 
180   PetscFunctionBegin;
181   /* The line below is necessary due to the operations that modify the matrix on the CPU (axpy, scale, etc) */
182   PetscCall(MatViennaCLCopyToGPU(A));
183   if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) {
184     PetscCall(VecViennaCLGetArrayRead(xx, &xgpu));
185     PetscCall(VecViennaCLGetArrayWrite(yy, &ygpu));
186     PetscCall(PetscLogGpuTimeBegin());
187     try {
188       if (a->compressedrow.use) {
189         *ygpu = viennacl::linalg::prod(*viennaclstruct->compressed_mat, *xgpu);
190       } else {
191         *ygpu = viennacl::linalg::prod(*viennaclstruct->mat, *xgpu);
192       }
193       ViennaCLWaitForGPU();
194     } catch (std::exception const &ex) {
195       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
196     }
197     PetscCall(PetscLogGpuTimeEnd());
198     PetscCall(VecViennaCLRestoreArrayRead(xx, &xgpu));
199     PetscCall(VecViennaCLRestoreArrayWrite(yy, &ygpu));
200     PetscCall(PetscLogGpuFlops(2.0 * a->nz - a->nonzerorowcnt));
201   } else {
202     PetscCall(VecSet_SeqViennaCL(yy, 0));
203   }
204   PetscFunctionReturn(PETSC_SUCCESS);
205 }
206 
207 static PetscErrorCode MatMultAdd_SeqAIJViennaCL(Mat A, Vec xx, Vec yy, Vec zz)
208 {
209   Mat_SeqAIJ           *a              = (Mat_SeqAIJ *)A->data;
210   Mat_SeqAIJViennaCL   *viennaclstruct = (Mat_SeqAIJViennaCL *)A->spptr;
211   const ViennaCLVector *xgpu = NULL, *ygpu = NULL;
212   ViennaCLVector       *zgpu = NULL;
213 
214   PetscFunctionBegin;
215   /* The line below is necessary due to the operations that modify the matrix on the CPU (axpy, scale, etc) */
216   PetscCall(MatViennaCLCopyToGPU(A));
217   if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) {
218     try {
219       PetscCall(VecViennaCLGetArrayRead(xx, &xgpu));
220       PetscCall(VecViennaCLGetArrayRead(yy, &ygpu));
221       PetscCall(VecViennaCLGetArrayWrite(zz, &zgpu));
222       PetscCall(PetscLogGpuTimeBegin());
223       if (a->compressedrow.use) *viennaclstruct->tempvec = viennacl::linalg::prod(*viennaclstruct->compressed_mat, *xgpu);
224       else *viennaclstruct->tempvec = viennacl::linalg::prod(*viennaclstruct->mat, *xgpu);
225       if (zz != yy) *zgpu = *ygpu + *viennaclstruct->tempvec;
226       else *zgpu += *viennaclstruct->tempvec;
227       ViennaCLWaitForGPU();
228       PetscCall(PetscLogGpuTimeEnd());
229       PetscCall(VecViennaCLRestoreArrayRead(xx, &xgpu));
230       PetscCall(VecViennaCLRestoreArrayRead(yy, &ygpu));
231       PetscCall(VecViennaCLRestoreArrayWrite(zz, &zgpu));
232 
233     } catch (std::exception const &ex) {
234       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
235     }
236     PetscCall(PetscLogGpuFlops(2.0 * a->nz));
237   } else {
238     PetscCall(VecCopy_SeqViennaCL(yy, zz));
239   }
240   PetscFunctionReturn(PETSC_SUCCESS);
241 }
242 
243 static PetscErrorCode MatAssemblyEnd_SeqAIJViennaCL(Mat A, MatAssemblyType mode)
244 {
245   PetscFunctionBegin;
246   PetscCall(MatAssemblyEnd_SeqAIJ(A, mode));
247   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(PETSC_SUCCESS);
248   if (!A->boundtocpu) PetscCall(MatViennaCLCopyToGPU(A));
249   PetscFunctionReturn(PETSC_SUCCESS);
250 }
251 
252 /* --------------------------------------------------------------------------------*/
253 /*@C
254   MatCreateSeqAIJViennaCL - Creates a sparse matrix in `MATSEQAIJVIENNACL` (compressed row) format
255   (the default parallel PETSc format).  This matrix will ultimately be pushed down
256   to GPUs and use the ViennaCL library for calculations.
257 
258   Collective
259 
260   Input Parameters:
261 + comm - MPI communicator, set to `PETSC_COMM_SELF`
262 . m    - number of rows
263 . n    - number of columns
264 . nz   - number of nonzeros per row (same for all rows), ignored if `nnz` is set
265 - nnz  - array containing the number of nonzeros in the various rows (possibly different for each row) or `NULL`
266 
267   Output Parameter:
268 . A - the matrix
269 
270    It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
271    MatXXXXSetPreallocation() paradigm instead of this routine directly.
272    [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
273 
274   Notes:
275   The AIJ format, also called
276   compressed row storage, is fully compatible with standard Fortran
277   storage.  That is, the stored row and column indices can begin at
278   either one (as in Fortran) or zero.
279 
280   Specify the preallocated storage with either `nz` or `nnz` (not both).
281   Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory
282   allocation.
283 
284   Level: intermediate
285 
286 .seealso: `MATSEQAIJVIENNACL`, `MatCreate()`, `MatCreateAIJ()`, `MatCreateAIJCUSPARSE()`, `MatSetValues()`, `MatSeqAIJSetColumnIndices()`, `MatCreateSeqAIJWithArrays()`
287 @*/
288 PetscErrorCode MatCreateSeqAIJViennaCL(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A)
289 {
290   PetscFunctionBegin;
291   PetscCall(MatCreate(comm, A));
292   PetscCall(MatSetSizes(*A, m, n, m, n));
293   PetscCall(MatSetType(*A, MATSEQAIJVIENNACL));
294   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(*A, nz, (PetscInt *)nnz));
295   PetscFunctionReturn(PETSC_SUCCESS);
296 }
297 
298 static PetscErrorCode MatDestroy_SeqAIJViennaCL(Mat A)
299 {
300   Mat_SeqAIJViennaCL *viennaclcontainer = (Mat_SeqAIJViennaCL *)A->spptr;
301 
302   PetscFunctionBegin;
303   try {
304     if (viennaclcontainer) {
305       delete viennaclcontainer->tempvec;
306       delete viennaclcontainer->mat;
307       delete viennaclcontainer->compressed_mat;
308       delete viennaclcontainer;
309     }
310     A->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
311   } catch (std::exception const &ex) {
312     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
313   }
314 
315   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqaij_seqaijviennacl_C", NULL));
316   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaijviennacl_seqdense_C", NULL));
317   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaijviennacl_seqaij_C", NULL));
318 
319   /* this next line is because MatDestroy tries to PetscFree spptr if it is not zero, and PetscFree only works if the memory was allocated with PetscNew or PetscMalloc, which don't call the constructor */
320   A->spptr = 0;
321   PetscCall(MatDestroy_SeqAIJ(A));
322   PetscFunctionReturn(PETSC_SUCCESS);
323 }
324 
325 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJViennaCL(Mat B)
326 {
327   PetscFunctionBegin;
328   PetscCall(MatCreate_SeqAIJ(B));
329   PetscCall(MatConvert_SeqAIJ_SeqAIJViennaCL(B, MATSEQAIJVIENNACL, MAT_INPLACE_MATRIX, &B));
330   PetscFunctionReturn(PETSC_SUCCESS);
331 }
332 
333 static PetscErrorCode MatBindToCPU_SeqAIJViennaCL(Mat, PetscBool);
334 static PetscErrorCode MatDuplicate_SeqAIJViennaCL(Mat A, MatDuplicateOption cpvalues, Mat *B)
335 {
336   Mat C;
337 
338   PetscFunctionBegin;
339   PetscCall(MatDuplicate_SeqAIJ(A, cpvalues, B));
340   C = *B;
341 
342   PetscCall(MatBindToCPU_SeqAIJViennaCL(A, PETSC_FALSE));
343   C->ops->bindtocpu = MatBindToCPU_SeqAIJViennaCL;
344 
345   C->spptr                                         = new Mat_SeqAIJViennaCL();
346   ((Mat_SeqAIJViennaCL *)C->spptr)->tempvec        = NULL;
347   ((Mat_SeqAIJViennaCL *)C->spptr)->mat            = NULL;
348   ((Mat_SeqAIJViennaCL *)C->spptr)->compressed_mat = NULL;
349 
350   PetscCall(PetscObjectChangeTypeName((PetscObject)C, MATSEQAIJVIENNACL));
351 
352   C->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
353 
354   /* If the source matrix is already assembled, copy the destination matrix to the GPU */
355   if (C->assembled) PetscCall(MatViennaCLCopyToGPU(C));
356   PetscFunctionReturn(PETSC_SUCCESS);
357 }
358 
359 static PetscErrorCode MatSeqAIJGetArray_SeqAIJViennaCL(Mat A, PetscScalar *array[])
360 {
361   PetscFunctionBegin;
362   PetscCall(MatViennaCLCopyFromGPU(A, (const ViennaCLAIJMatrix *)NULL));
363   *array = ((Mat_SeqAIJ *)A->data)->a;
364   PetscFunctionReturn(PETSC_SUCCESS);
365 }
366 
367 static PetscErrorCode MatSeqAIJRestoreArray_SeqAIJViennaCL(Mat A, PetscScalar *array[])
368 {
369   PetscFunctionBegin;
370   A->offloadmask = PETSC_OFFLOAD_CPU;
371   *array         = NULL;
372   PetscFunctionReturn(PETSC_SUCCESS);
373 }
374 
375 static PetscErrorCode MatSeqAIJGetArrayRead_SeqAIJViennaCL(Mat A, const PetscScalar *array[])
376 {
377   PetscFunctionBegin;
378   PetscCall(MatViennaCLCopyFromGPU(A, (const ViennaCLAIJMatrix *)NULL));
379   *array = ((Mat_SeqAIJ *)A->data)->a;
380   PetscFunctionReturn(PETSC_SUCCESS);
381 }
382 
383 static PetscErrorCode MatSeqAIJRestoreArrayRead_SeqAIJViennaCL(Mat A, const PetscScalar *array[])
384 {
385   PetscFunctionBegin;
386   *array = NULL;
387   /* No A->offloadmask = PETSC_OFFLOAD_CPU since if A->offloadmask was PETSC_OFFLOAD_BOTH, it is still BOTH */
388   PetscFunctionReturn(PETSC_SUCCESS);
389 }
390 
391 static PetscErrorCode MatSeqAIJGetArrayWrite_SeqAIJViennaCL(Mat A, PetscScalar *array[])
392 {
393   PetscFunctionBegin;
394   *array = ((Mat_SeqAIJ *)A->data)->a;
395   PetscFunctionReturn(PETSC_SUCCESS);
396 }
397 
398 static PetscErrorCode MatSeqAIJRestoreArrayWrite_SeqAIJViennaCL(Mat A, PetscScalar *array[])
399 {
400   PetscFunctionBegin;
401   A->offloadmask = PETSC_OFFLOAD_CPU;
402   *array         = NULL;
403   PetscFunctionReturn(PETSC_SUCCESS);
404 }
405 
406 static PetscErrorCode MatBindToCPU_SeqAIJViennaCL(Mat A, PetscBool flg)
407 {
408   Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
409 
410   PetscFunctionBegin;
411   A->boundtocpu = flg;
412   if (flg && a->inode.size_csr) {
413     a->inode.use = PETSC_TRUE;
414   } else {
415     a->inode.use = PETSC_FALSE;
416   }
417   if (flg) {
418     /* make sure we have an up-to-date copy on the CPU */
419     PetscCall(MatViennaCLCopyFromGPU(A, (const ViennaCLAIJMatrix *)NULL));
420     A->ops->mult        = MatMult_SeqAIJ;
421     A->ops->multadd     = MatMultAdd_SeqAIJ;
422     A->ops->assemblyend = MatAssemblyEnd_SeqAIJ;
423     A->ops->duplicate   = MatDuplicate_SeqAIJ;
424     PetscCall(PetscMemzero(a->ops, sizeof(Mat_SeqAIJOps)));
425   } else {
426     A->ops->mult        = MatMult_SeqAIJViennaCL;
427     A->ops->multadd     = MatMultAdd_SeqAIJViennaCL;
428     A->ops->assemblyend = MatAssemblyEnd_SeqAIJViennaCL;
429     A->ops->destroy     = MatDestroy_SeqAIJViennaCL;
430     A->ops->duplicate   = MatDuplicate_SeqAIJViennaCL;
431 
432     a->ops->getarray          = MatSeqAIJGetArray_SeqAIJViennaCL;
433     a->ops->restorearray      = MatSeqAIJRestoreArray_SeqAIJViennaCL;
434     a->ops->getarrayread      = MatSeqAIJGetArrayRead_SeqAIJViennaCL;
435     a->ops->restorearrayread  = MatSeqAIJRestoreArrayRead_SeqAIJViennaCL;
436     a->ops->getarraywrite     = MatSeqAIJGetArrayWrite_SeqAIJViennaCL;
437     a->ops->restorearraywrite = MatSeqAIJRestoreArrayWrite_SeqAIJViennaCL;
438   }
439   PetscFunctionReturn(PETSC_SUCCESS);
440 }
441 
442 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJViennaCL(Mat A, MatType type, MatReuse reuse, Mat *newmat)
443 {
444   Mat B;
445 
446   PetscFunctionBegin;
447   PetscCheck(reuse != MAT_REUSE_MATRIX, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MAT_REUSE_MATRIX is not supported. Consider using MAT_INPLACE_MATRIX instead");
448 
449   if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, newmat));
450 
451   B = *newmat;
452 
453   B->spptr = new Mat_SeqAIJViennaCL();
454 
455   ((Mat_SeqAIJViennaCL *)B->spptr)->tempvec        = NULL;
456   ((Mat_SeqAIJViennaCL *)B->spptr)->mat            = NULL;
457   ((Mat_SeqAIJViennaCL *)B->spptr)->compressed_mat = NULL;
458 
459   PetscCall(MatBindToCPU_SeqAIJViennaCL(A, PETSC_FALSE));
460   A->ops->bindtocpu = MatBindToCPU_SeqAIJViennaCL;
461 
462   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJVIENNACL));
463   PetscCall(PetscFree(B->defaultvectype));
464   PetscCall(PetscStrallocpy(VECVIENNACL, &B->defaultvectype));
465 
466   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqaij_seqaijviennacl_C", MatConvert_SeqAIJ_SeqAIJViennaCL));
467   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaijviennacl_seqdense_C", MatProductSetFromOptions_SeqAIJ_SeqDense));
468   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaijviennacl_seqaij_C", MatProductSetFromOptions_SeqAIJ));
469 
470   B->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
471 
472   /* If the source matrix is already assembled, copy the destination matrix to the GPU */
473   if (B->assembled) PetscCall(MatViennaCLCopyToGPU(B));
474   PetscFunctionReturn(PETSC_SUCCESS);
475 }
476 
477 /*MC
478    MATSEQAIJVIENNACL - MATAIJVIENNACL = "aijviennacl" = "seqaijviennacl" - A matrix type to be used for sparse matrices.
479 
480    A matrix type whose data resides on GPUs. These matrices are in CSR format by
481    default. All matrix calculations are performed using the ViennaCL library.
482 
483    Options Database Keys:
484 +  -mat_type aijviennacl - sets the matrix type to `MATSEQAIJVIENNACL` during a call to `MatSetFromOptions()
485 .  -mat_viennacl_storage_format csr - sets the storage format of matrices for `MatMult()` during a call to `MatSetFromOptions()`.
486 -  -mat_viennacl_mult_storage_format csr - sets the storage format of matrices for `MatMult()` during a call to `MatSetFromOptions()`.
487 
488   Level: beginner
489 
490 .seealso: `MatCreateSeqAIJViennaCL()`, `MATAIJVIENNACL`, `MatCreateAIJViennaCL()`
491 M*/
492 
493 PETSC_INTERN PetscErrorCode MatSolverTypeRegister_ViennaCL(void)
494 {
495   PetscFunctionBegin;
496   PetscCall(MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_LU, MatGetFactor_seqaij_petsc));
497   PetscCall(MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_CHOLESKY, MatGetFactor_seqaij_petsc));
498   PetscCall(MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_ILU, MatGetFactor_seqaij_petsc));
499   PetscCall(MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_ICC, MatGetFactor_seqaij_petsc));
500   PetscFunctionReturn(PETSC_SUCCESS);
501 }
502