xref: /petsc/src/mat/impls/aij/seq/seqviennacl/aijviennacl.cxx (revision 8e010fa18d00c25e58c23165efbc8aef665b8a2c)
1 
2 /*
3     Defines the basic matrix operations for the AIJ (compressed row)
4   matrix storage format.
5 */
6 
7 #include <petscconf.h>
8 #define PETSC_SKIP_IMMINTRIN_H_CUDAWORKAROUND 1
9 #include <../src/mat/impls/aij/seq/aij.h> /*I "petscmat.h" I*/
10 #include <petscbt.h>
11 #include <../src/vec/vec/impls/dvecimpl.h>
12 #include <petsc/private/vecimpl.h>
13 
14 #include <../src/mat/impls/aij/seq/seqviennacl/viennaclmatimpl.h>
15 
16 #include <algorithm>
17 #include <vector>
18 #include <string>
19 
20 #include "viennacl/linalg/prod.hpp"
21 
22 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJViennaCL(Mat A, MatType type, MatReuse reuse, Mat *newmat);
23 PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat, MatFactorType, Mat *);
24 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqDense(Mat);
25 
26 PetscErrorCode MatViennaCLCopyToGPU(Mat A)
27 {
28   Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL *)A->spptr;
29   Mat_SeqAIJ         *a              = (Mat_SeqAIJ *)A->data;
30 
31   PetscFunctionBegin;
32   if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) { //some OpenCL SDKs have issues with buffers of size 0
33     if (A->offloadmask == PETSC_OFFLOAD_UNALLOCATED || A->offloadmask == PETSC_OFFLOAD_CPU) {
34       PetscCall(PetscLogEventBegin(MAT_ViennaCLCopyToGPU, A, 0, 0, 0));
35 
36       try {
37         if (a->compressedrow.use) {
38           if (!viennaclstruct->compressed_mat) viennaclstruct->compressed_mat = new ViennaCLCompressedAIJMatrix();
39 
40           // Since PetscInt is different from cl_uint, we have to convert:
41           viennacl::backend::mem_handle dummy;
42 
43           viennacl::backend::typesafe_host_array<unsigned int> row_buffer;
44           row_buffer.raw_resize(dummy, a->compressedrow.nrows + 1);
45           for (PetscInt i = 0; i <= a->compressedrow.nrows; ++i) row_buffer.set(i, (a->compressedrow.i)[i]);
46 
47           viennacl::backend::typesafe_host_array<unsigned int> row_indices;
48           row_indices.raw_resize(dummy, a->compressedrow.nrows);
49           for (PetscInt i = 0; i < a->compressedrow.nrows; ++i) row_indices.set(i, (a->compressedrow.rindex)[i]);
50 
51           viennacl::backend::typesafe_host_array<unsigned int> col_buffer;
52           col_buffer.raw_resize(dummy, a->nz);
53           for (PetscInt i = 0; i < a->nz; ++i) col_buffer.set(i, (a->j)[i]);
54 
55           viennaclstruct->compressed_mat->set(row_buffer.get(), row_indices.get(), col_buffer.get(), a->a, A->rmap->n, A->cmap->n, a->compressedrow.nrows, a->nz);
56           PetscCall(PetscLogCpuToGpu(((2 * a->compressedrow.nrows) + 1 + a->nz) * sizeof(PetscInt) + (a->nz) * sizeof(PetscScalar)));
57         } else {
58           if (!viennaclstruct->mat) viennaclstruct->mat = new ViennaCLAIJMatrix();
59 
60           // Since PetscInt is in general different from cl_uint, we have to convert:
61           viennacl::backend::mem_handle dummy;
62 
63           viennacl::backend::typesafe_host_array<unsigned int> row_buffer;
64           row_buffer.raw_resize(dummy, A->rmap->n + 1);
65           for (PetscInt i = 0; i <= A->rmap->n; ++i) row_buffer.set(i, (a->i)[i]);
66 
67           viennacl::backend::typesafe_host_array<unsigned int> col_buffer;
68           col_buffer.raw_resize(dummy, a->nz);
69           for (PetscInt i = 0; i < a->nz; ++i) col_buffer.set(i, (a->j)[i]);
70 
71           viennaclstruct->mat->set(row_buffer.get(), col_buffer.get(), a->a, A->rmap->n, A->cmap->n, a->nz);
72           PetscCall(PetscLogCpuToGpu(((A->rmap->n + 1) + a->nz) * sizeof(PetscInt) + (a->nz) * sizeof(PetscScalar)));
73         }
74         ViennaCLWaitForGPU();
75       } catch (std::exception const &ex) {
76         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
77       }
78 
79       // Create temporary vector for v += A*x:
80       if (viennaclstruct->tempvec) {
81         if (viennaclstruct->tempvec->size() != static_cast<std::size_t>(A->rmap->n)) {
82           delete (ViennaCLVector *)viennaclstruct->tempvec;
83           viennaclstruct->tempvec = new ViennaCLVector(A->rmap->n);
84         } else {
85           viennaclstruct->tempvec->clear();
86         }
87       } else {
88         viennaclstruct->tempvec = new ViennaCLVector(A->rmap->n);
89       }
90 
91       A->offloadmask = PETSC_OFFLOAD_BOTH;
92 
93       PetscCall(PetscLogEventEnd(MAT_ViennaCLCopyToGPU, A, 0, 0, 0));
94     }
95   }
96   PetscFunctionReturn(PETSC_SUCCESS);
97 }
98 
99 PetscErrorCode MatViennaCLCopyFromGPU(Mat A, const ViennaCLAIJMatrix *Agpu)
100 {
101   Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL *)A->spptr;
102   Mat_SeqAIJ         *a              = (Mat_SeqAIJ *)A->data;
103   PetscInt            m              = A->rmap->n;
104 
105   PetscFunctionBegin;
106   if (A->offloadmask == PETSC_OFFLOAD_BOTH) PetscFunctionReturn(PETSC_SUCCESS);
107   if (A->offloadmask == PETSC_OFFLOAD_UNALLOCATED && Agpu) {
108     try {
109       PetscCheck(!a->compressedrow.use, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ViennaCL: Cannot handle row compression for GPU matrices");
110       PetscCheck((PetscInt)Agpu->size1() == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "GPU matrix has %lu rows, should be %" PetscInt_FMT, Agpu->size1(), m);
111       a->nz           = Agpu->nnz();
112       a->maxnz        = a->nz; /* Since we allocate exactly the right amount */
113       A->preallocated = PETSC_TRUE;
114       if (a->singlemalloc) {
115         if (a->a) PetscCall(PetscFree3(a->a, a->j, a->i));
116       } else {
117         if (a->i) PetscCall(PetscFree(a->i));
118         if (a->j) PetscCall(PetscFree(a->j));
119         if (a->a) PetscCall(PetscFree(a->a));
120       }
121       PetscCall(PetscMalloc3(a->nz, &a->a, a->nz, &a->j, m + 1, &a->i));
122 
123       a->singlemalloc = PETSC_TRUE;
124 
125       /* Setup row lengths */
126       PetscCall(PetscFree(a->imax));
127       PetscCall(PetscFree(a->ilen));
128       PetscCall(PetscMalloc1(m, &a->imax));
129       PetscCall(PetscMalloc1(m, &a->ilen));
130 
131       /* Copy data back from GPU */
132       viennacl::backend::typesafe_host_array<unsigned int> row_buffer;
133       row_buffer.raw_resize(Agpu->handle1(), Agpu->size1() + 1);
134 
135       // copy row array
136       viennacl::backend::memory_read(Agpu->handle1(), 0, row_buffer.raw_size(), row_buffer.get());
137       (a->i)[0] = row_buffer[0];
138       for (PetscInt i = 0; i < (PetscInt)Agpu->size1(); ++i) {
139         (a->i)[i + 1] = row_buffer[i + 1];
140         a->imax[i] = a->ilen[i] = a->i[i + 1] - a->i[i]; //Set imax[] and ilen[] arrays at the same time as i[] for better cache reuse
141       }
142 
143       // copy column indices
144       viennacl::backend::typesafe_host_array<unsigned int> col_buffer;
145       col_buffer.raw_resize(Agpu->handle2(), Agpu->nnz());
146       viennacl::backend::memory_read(Agpu->handle2(), 0, col_buffer.raw_size(), col_buffer.get());
147       for (PetscInt i = 0; i < (PetscInt)Agpu->nnz(); ++i) (a->j)[i] = col_buffer[i];
148 
149       // copy nonzero entries directly to destination (no conversion required)
150       viennacl::backend::memory_read(Agpu->handle(), 0, sizeof(PetscScalar) * Agpu->nnz(), a->a);
151 
152       PetscCall(PetscLogGpuToCpu(row_buffer.raw_size() + col_buffer.raw_size() + (Agpu->nnz() * sizeof(PetscScalar))));
153       ViennaCLWaitForGPU();
154       /* TODO: Once a->diag is moved out of MatAssemblyEnd(), invalidate it here. */
155     } catch (std::exception const &ex) {
156       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
157     }
158   } else if (A->offloadmask == PETSC_OFFLOAD_UNALLOCATED) {
159     PetscFunctionReturn(PETSC_SUCCESS);
160   } else {
161     if (!Agpu && A->offloadmask != PETSC_OFFLOAD_GPU) PetscFunctionReturn(PETSC_SUCCESS);
162 
163     PetscCheck(!a->compressedrow.use, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ViennaCL: Cannot handle row compression for GPU matrices");
164     if (!Agpu) {
165       viennacl::backend::memory_read(viennaclstruct->mat->handle(), 0, sizeof(PetscScalar) * viennaclstruct->mat->nnz(), a->a);
166     } else {
167       viennacl::backend::memory_read(Agpu->handle(), 0, sizeof(PetscScalar) * Agpu->nnz(), a->a);
168     }
169   }
170   A->offloadmask = PETSC_OFFLOAD_BOTH;
171   /* This assembly prevents resetting the flag to PETSC_OFFLOAD_CPU and recopying */
172   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
173   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
174   PetscFunctionReturn(PETSC_SUCCESS);
175 }
176 
177 static PetscErrorCode MatMult_SeqAIJViennaCL(Mat A, Vec xx, Vec yy)
178 {
179   Mat_SeqAIJ           *a              = (Mat_SeqAIJ *)A->data;
180   Mat_SeqAIJViennaCL   *viennaclstruct = (Mat_SeqAIJViennaCL *)A->spptr;
181   const ViennaCLVector *xgpu           = NULL;
182   ViennaCLVector       *ygpu           = NULL;
183 
184   PetscFunctionBegin;
185   /* The line below is necessary due to the operations that modify the matrix on the CPU (axpy, scale, etc) */
186   PetscCall(MatViennaCLCopyToGPU(A));
187   if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) {
188     PetscCall(VecViennaCLGetArrayRead(xx, &xgpu));
189     PetscCall(VecViennaCLGetArrayWrite(yy, &ygpu));
190     PetscCall(PetscLogGpuTimeBegin());
191     try {
192       if (a->compressedrow.use) {
193         *ygpu = viennacl::linalg::prod(*viennaclstruct->compressed_mat, *xgpu);
194       } else {
195         *ygpu = viennacl::linalg::prod(*viennaclstruct->mat, *xgpu);
196       }
197       ViennaCLWaitForGPU();
198     } catch (std::exception const &ex) {
199       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
200     }
201     PetscCall(PetscLogGpuTimeEnd());
202     PetscCall(VecViennaCLRestoreArrayRead(xx, &xgpu));
203     PetscCall(VecViennaCLRestoreArrayWrite(yy, &ygpu));
204     PetscCall(PetscLogGpuFlops(2.0 * a->nz - a->nonzerorowcnt));
205   } else {
206     PetscCall(VecSet_SeqViennaCL(yy, 0));
207   }
208   PetscFunctionReturn(PETSC_SUCCESS);
209 }
210 
211 static PetscErrorCode MatMultAdd_SeqAIJViennaCL(Mat A, Vec xx, Vec yy, Vec zz)
212 {
213   Mat_SeqAIJ           *a              = (Mat_SeqAIJ *)A->data;
214   Mat_SeqAIJViennaCL   *viennaclstruct = (Mat_SeqAIJViennaCL *)A->spptr;
215   const ViennaCLVector *xgpu = NULL, *ygpu = NULL;
216   ViennaCLVector       *zgpu = NULL;
217 
218   PetscFunctionBegin;
219   /* The line below is necessary due to the operations that modify the matrix on the CPU (axpy, scale, etc) */
220   PetscCall(MatViennaCLCopyToGPU(A));
221   if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) {
222     try {
223       PetscCall(VecViennaCLGetArrayRead(xx, &xgpu));
224       PetscCall(VecViennaCLGetArrayRead(yy, &ygpu));
225       PetscCall(VecViennaCLGetArrayWrite(zz, &zgpu));
226       PetscCall(PetscLogGpuTimeBegin());
227       if (a->compressedrow.use) *viennaclstruct->tempvec = viennacl::linalg::prod(*viennaclstruct->compressed_mat, *xgpu);
228       else *viennaclstruct->tempvec = viennacl::linalg::prod(*viennaclstruct->mat, *xgpu);
229       if (zz != yy) *zgpu = *ygpu + *viennaclstruct->tempvec;
230       else *zgpu += *viennaclstruct->tempvec;
231       ViennaCLWaitForGPU();
232       PetscCall(PetscLogGpuTimeEnd());
233       PetscCall(VecViennaCLRestoreArrayRead(xx, &xgpu));
234       PetscCall(VecViennaCLRestoreArrayRead(yy, &ygpu));
235       PetscCall(VecViennaCLRestoreArrayWrite(zz, &zgpu));
236 
237     } catch (std::exception const &ex) {
238       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
239     }
240     PetscCall(PetscLogGpuFlops(2.0 * a->nz));
241   } else {
242     PetscCall(VecCopy_SeqViennaCL(yy, zz));
243   }
244   PetscFunctionReturn(PETSC_SUCCESS);
245 }
246 
247 static PetscErrorCode MatAssemblyEnd_SeqAIJViennaCL(Mat A, MatAssemblyType mode)
248 {
249   PetscFunctionBegin;
250   PetscCall(MatAssemblyEnd_SeqAIJ(A, mode));
251   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(PETSC_SUCCESS);
252   if (!A->boundtocpu) PetscCall(MatViennaCLCopyToGPU(A));
253   PetscFunctionReturn(PETSC_SUCCESS);
254 }
255 
256 /* --------------------------------------------------------------------------------*/
257 /*@C
258   MatCreateSeqAIJViennaCL - Creates a sparse matrix in `MATSEQAIJVIENNACL` (compressed row) format
259   (the default parallel PETSc format).  This matrix will ultimately be pushed down
260   to GPUs and use the ViennaCL library for calculations.
261 
262   Collective
263 
264   Input Parameters:
265 + comm - MPI communicator, set to `PETSC_COMM_SELF`
266 . m    - number of rows
267 . n    - number of columns
268 . nz   - number of nonzeros per row (same for all rows), ignored if `nnz` is set
269 - nnz  - array containing the number of nonzeros in the various rows (possibly different for each row) or `NULL`
270 
271   Output Parameter:
272 . A - the matrix
273 
274    It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
275    MatXXXXSetPreallocation() paradigm instead of this routine directly.
276    [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
277 
278   Notes:
279   The AIJ format, also called
280   compressed row storage, is fully compatible with standard Fortran
281   storage.  That is, the stored row and column indices can begin at
282   either one (as in Fortran) or zero.
283 
284   Specify the preallocated storage with either `nz` or `nnz` (not both).
285   Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory
286   allocation.
287 
288   Level: intermediate
289 
290 .seealso: `MATSEQAIJVIENNACL`, `MatCreate()`, `MatCreateAIJ()`, `MatCreateAIJCUSPARSE()`, `MatSetValues()`, `MatSeqAIJSetColumnIndices()`, `MatCreateSeqAIJWithArrays()`
291 @*/
292 PetscErrorCode MatCreateSeqAIJViennaCL(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A)
293 {
294   PetscFunctionBegin;
295   PetscCall(MatCreate(comm, A));
296   PetscCall(MatSetSizes(*A, m, n, m, n));
297   PetscCall(MatSetType(*A, MATSEQAIJVIENNACL));
298   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(*A, nz, (PetscInt *)nnz));
299   PetscFunctionReturn(PETSC_SUCCESS);
300 }
301 
302 static PetscErrorCode MatDestroy_SeqAIJViennaCL(Mat A)
303 {
304   Mat_SeqAIJViennaCL *viennaclcontainer = (Mat_SeqAIJViennaCL *)A->spptr;
305 
306   PetscFunctionBegin;
307   try {
308     if (viennaclcontainer) {
309       delete viennaclcontainer->tempvec;
310       delete viennaclcontainer->mat;
311       delete viennaclcontainer->compressed_mat;
312       delete viennaclcontainer;
313     }
314     A->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
315   } catch (std::exception const &ex) {
316     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
317   }
318 
319   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqaij_seqaijviennacl_C", NULL));
320   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaijviennacl_seqdense_C", NULL));
321   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaijviennacl_seqaij_C", NULL));
322 
323   /* this next line is because MatDestroy tries to PetscFree spptr if it is not zero, and PetscFree only works if the memory was allocated with PetscNew or PetscMalloc, which don't call the constructor */
324   A->spptr = 0;
325   PetscCall(MatDestroy_SeqAIJ(A));
326   PetscFunctionReturn(PETSC_SUCCESS);
327 }
328 
329 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJViennaCL(Mat B)
330 {
331   PetscFunctionBegin;
332   PetscCall(MatCreate_SeqAIJ(B));
333   PetscCall(MatConvert_SeqAIJ_SeqAIJViennaCL(B, MATSEQAIJVIENNACL, MAT_INPLACE_MATRIX, &B));
334   PetscFunctionReturn(PETSC_SUCCESS);
335 }
336 
337 static PetscErrorCode MatBindToCPU_SeqAIJViennaCL(Mat, PetscBool);
338 static PetscErrorCode MatDuplicate_SeqAIJViennaCL(Mat A, MatDuplicateOption cpvalues, Mat *B)
339 {
340   Mat C;
341 
342   PetscFunctionBegin;
343   PetscCall(MatDuplicate_SeqAIJ(A, cpvalues, B));
344   C = *B;
345 
346   PetscCall(MatBindToCPU_SeqAIJViennaCL(A, PETSC_FALSE));
347   C->ops->bindtocpu = MatBindToCPU_SeqAIJViennaCL;
348 
349   C->spptr                                         = new Mat_SeqAIJViennaCL();
350   ((Mat_SeqAIJViennaCL *)C->spptr)->tempvec        = NULL;
351   ((Mat_SeqAIJViennaCL *)C->spptr)->mat            = NULL;
352   ((Mat_SeqAIJViennaCL *)C->spptr)->compressed_mat = NULL;
353 
354   PetscCall(PetscObjectChangeTypeName((PetscObject)C, MATSEQAIJVIENNACL));
355 
356   C->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
357 
358   /* If the source matrix is already assembled, copy the destination matrix to the GPU */
359   if (C->assembled) PetscCall(MatViennaCLCopyToGPU(C));
360   PetscFunctionReturn(PETSC_SUCCESS);
361 }
362 
363 static PetscErrorCode MatSeqAIJGetArray_SeqAIJViennaCL(Mat A, PetscScalar *array[])
364 {
365   PetscFunctionBegin;
366   PetscCall(MatViennaCLCopyFromGPU(A, (const ViennaCLAIJMatrix *)NULL));
367   *array = ((Mat_SeqAIJ *)A->data)->a;
368   PetscFunctionReturn(PETSC_SUCCESS);
369 }
370 
371 static PetscErrorCode MatSeqAIJRestoreArray_SeqAIJViennaCL(Mat A, PetscScalar *array[])
372 {
373   PetscFunctionBegin;
374   A->offloadmask = PETSC_OFFLOAD_CPU;
375   *array         = NULL;
376   PetscFunctionReturn(PETSC_SUCCESS);
377 }
378 
379 static PetscErrorCode MatSeqAIJGetArrayRead_SeqAIJViennaCL(Mat A, const PetscScalar *array[])
380 {
381   PetscFunctionBegin;
382   PetscCall(MatViennaCLCopyFromGPU(A, (const ViennaCLAIJMatrix *)NULL));
383   *array = ((Mat_SeqAIJ *)A->data)->a;
384   PetscFunctionReturn(PETSC_SUCCESS);
385 }
386 
387 static PetscErrorCode MatSeqAIJRestoreArrayRead_SeqAIJViennaCL(Mat A, const PetscScalar *array[])
388 {
389   PetscFunctionBegin;
390   *array = NULL;
391   /* No A->offloadmask = PETSC_OFFLOAD_CPU since if A->offloadmask was PETSC_OFFLOAD_BOTH, it is still BOTH */
392   PetscFunctionReturn(PETSC_SUCCESS);
393 }
394 
395 static PetscErrorCode MatSeqAIJGetArrayWrite_SeqAIJViennaCL(Mat A, PetscScalar *array[])
396 {
397   PetscFunctionBegin;
398   *array = ((Mat_SeqAIJ *)A->data)->a;
399   PetscFunctionReturn(PETSC_SUCCESS);
400 }
401 
402 static PetscErrorCode MatSeqAIJRestoreArrayWrite_SeqAIJViennaCL(Mat A, PetscScalar *array[])
403 {
404   PetscFunctionBegin;
405   A->offloadmask = PETSC_OFFLOAD_CPU;
406   *array         = NULL;
407   PetscFunctionReturn(PETSC_SUCCESS);
408 }
409 
410 static PetscErrorCode MatBindToCPU_SeqAIJViennaCL(Mat A, PetscBool flg)
411 {
412   Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
413 
414   PetscFunctionBegin;
415   A->boundtocpu = flg;
416   if (flg && a->inode.size) {
417     a->inode.use = PETSC_TRUE;
418   } else {
419     a->inode.use = PETSC_FALSE;
420   }
421   if (flg) {
422     /* make sure we have an up-to-date copy on the CPU */
423     PetscCall(MatViennaCLCopyFromGPU(A, (const ViennaCLAIJMatrix *)NULL));
424     A->ops->mult        = MatMult_SeqAIJ;
425     A->ops->multadd     = MatMultAdd_SeqAIJ;
426     A->ops->assemblyend = MatAssemblyEnd_SeqAIJ;
427     A->ops->duplicate   = MatDuplicate_SeqAIJ;
428     PetscCall(PetscMemzero(a->ops, sizeof(Mat_SeqAIJOps)));
429   } else {
430     A->ops->mult        = MatMult_SeqAIJViennaCL;
431     A->ops->multadd     = MatMultAdd_SeqAIJViennaCL;
432     A->ops->assemblyend = MatAssemblyEnd_SeqAIJViennaCL;
433     A->ops->destroy     = MatDestroy_SeqAIJViennaCL;
434     A->ops->duplicate   = MatDuplicate_SeqAIJViennaCL;
435 
436     a->ops->getarray          = MatSeqAIJGetArray_SeqAIJViennaCL;
437     a->ops->restorearray      = MatSeqAIJRestoreArray_SeqAIJViennaCL;
438     a->ops->getarrayread      = MatSeqAIJGetArrayRead_SeqAIJViennaCL;
439     a->ops->restorearrayread  = MatSeqAIJRestoreArrayRead_SeqAIJViennaCL;
440     a->ops->getarraywrite     = MatSeqAIJGetArrayWrite_SeqAIJViennaCL;
441     a->ops->restorearraywrite = MatSeqAIJRestoreArrayWrite_SeqAIJViennaCL;
442   }
443   PetscFunctionReturn(PETSC_SUCCESS);
444 }
445 
446 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJViennaCL(Mat A, MatType type, MatReuse reuse, Mat *newmat)
447 {
448   Mat B;
449 
450   PetscFunctionBegin;
451   PetscCheck(reuse != MAT_REUSE_MATRIX, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MAT_REUSE_MATRIX is not supported. Consider using MAT_INPLACE_MATRIX instead");
452 
453   if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, newmat));
454 
455   B = *newmat;
456 
457   B->spptr = new Mat_SeqAIJViennaCL();
458 
459   ((Mat_SeqAIJViennaCL *)B->spptr)->tempvec        = NULL;
460   ((Mat_SeqAIJViennaCL *)B->spptr)->mat            = NULL;
461   ((Mat_SeqAIJViennaCL *)B->spptr)->compressed_mat = NULL;
462 
463   PetscCall(MatBindToCPU_SeqAIJViennaCL(A, PETSC_FALSE));
464   A->ops->bindtocpu = MatBindToCPU_SeqAIJViennaCL;
465 
466   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJVIENNACL));
467   PetscCall(PetscFree(B->defaultvectype));
468   PetscCall(PetscStrallocpy(VECVIENNACL, &B->defaultvectype));
469 
470   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqaij_seqaijviennacl_C", MatConvert_SeqAIJ_SeqAIJViennaCL));
471   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaijviennacl_seqdense_C", MatProductSetFromOptions_SeqAIJ_SeqDense));
472   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaijviennacl_seqaij_C", MatProductSetFromOptions_SeqAIJ));
473 
474   B->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
475 
476   /* If the source matrix is already assembled, copy the destination matrix to the GPU */
477   if (B->assembled) PetscCall(MatViennaCLCopyToGPU(B));
478   PetscFunctionReturn(PETSC_SUCCESS);
479 }
480 
481 /*MC
482    MATSEQAIJVIENNACL - MATAIJVIENNACL = "aijviennacl" = "seqaijviennacl" - A matrix type to be used for sparse matrices.
483 
484    A matrix type type whose data resides on GPUs. These matrices are in CSR format by
485    default. All matrix calculations are performed using the ViennaCL library.
486 
487    Options Database Keys:
488 +  -mat_type aijviennacl - sets the matrix type to `MATSEQAIJVIENNACL` during a call to `MatSetFromOptions()
489 .  -mat_viennacl_storage_format csr - sets the storage format of matrices for `MatMult()` during a call to `MatSetFromOptions()`.
490 -  -mat_viennacl_mult_storage_format csr - sets the storage format of matrices for `MatMult()` during a call to `MatSetFromOptions()`.
491 
492   Level: beginner
493 
494 .seealso: `MatCreateSeqAIJViennaCL()`, `MATAIJVIENNACL`, `MatCreateAIJViennaCL()`
495 M*/
496 
497 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ViennaCL(void)
498 {
499   PetscFunctionBegin;
500   PetscCall(MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_LU, MatGetFactor_seqaij_petsc));
501   PetscCall(MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_CHOLESKY, MatGetFactor_seqaij_petsc));
502   PetscCall(MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_ILU, MatGetFactor_seqaij_petsc));
503   PetscCall(MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_ICC, MatGetFactor_seqaij_petsc));
504   PetscFunctionReturn(PETSC_SUCCESS);
505 }
506