xref: /petsc/src/mat/impls/aij/seq/seqviennacl/aijviennacl.cxx (revision f13dfd9ea68e0ddeee984e65c377a1819eab8a8a)
1 /*
2     Defines the basic matrix operations for the AIJ (compressed row)
3   matrix storage format.
4 */
5 
6 #include <petscconf.h>
7 #define PETSC_SKIP_IMMINTRIN_H_CUDAWORKAROUND 1
8 #include <../src/mat/impls/aij/seq/aij.h> /*I "petscmat.h" I*/
9 #include <petscbt.h>
10 #include <../src/vec/vec/impls/dvecimpl.h>
11 #include <petsc/private/vecimpl.h>
12 
13 #include <../src/mat/impls/aij/seq/seqviennacl/viennaclmatimpl.h>
14 
15 #include <algorithm>
16 #include <vector>
17 #include <string>
18 
19 #include "viennacl/linalg/prod.hpp"
20 
21 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJViennaCL(Mat A, MatType type, MatReuse reuse, Mat *newmat);
22 PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat, MatFactorType, Mat *);
23 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqDense(Mat);
24 
25 PetscErrorCode MatViennaCLCopyToGPU(Mat A)
26 {
27   Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL *)A->spptr;
28   Mat_SeqAIJ         *a              = (Mat_SeqAIJ *)A->data;
29 
30   PetscFunctionBegin;
31   if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) { //some OpenCL SDKs have issues with buffers of size 0
32     if (A->offloadmask == PETSC_OFFLOAD_UNALLOCATED || A->offloadmask == PETSC_OFFLOAD_CPU) {
33       PetscCall(PetscLogEventBegin(MAT_ViennaCLCopyToGPU, A, 0, 0, 0));
34 
35       try {
36         if (a->compressedrow.use) {
37           if (!viennaclstruct->compressed_mat) viennaclstruct->compressed_mat = new ViennaCLCompressedAIJMatrix();
38 
39           // Since PetscInt is different from cl_uint, we have to convert:
40           viennacl::backend::mem_handle dummy;
41 
42           viennacl::backend::typesafe_host_array<unsigned int> row_buffer;
43           row_buffer.raw_resize(dummy, a->compressedrow.nrows + 1);
44           for (PetscInt i = 0; i <= a->compressedrow.nrows; ++i) row_buffer.set(i, (a->compressedrow.i)[i]);
45 
46           viennacl::backend::typesafe_host_array<unsigned int> row_indices;
47           row_indices.raw_resize(dummy, a->compressedrow.nrows);
48           for (PetscInt i = 0; i < a->compressedrow.nrows; ++i) row_indices.set(i, (a->compressedrow.rindex)[i]);
49 
50           viennacl::backend::typesafe_host_array<unsigned int> col_buffer;
51           col_buffer.raw_resize(dummy, a->nz);
52           for (PetscInt i = 0; i < a->nz; ++i) col_buffer.set(i, (a->j)[i]);
53 
54           viennaclstruct->compressed_mat->set(row_buffer.get(), row_indices.get(), col_buffer.get(), a->a, A->rmap->n, A->cmap->n, a->compressedrow.nrows, a->nz);
55           PetscCall(PetscLogCpuToGpu(((2 * a->compressedrow.nrows) + 1 + a->nz) * sizeof(PetscInt) + (a->nz) * sizeof(PetscScalar)));
56         } else {
57           if (!viennaclstruct->mat) viennaclstruct->mat = new ViennaCLAIJMatrix();
58 
59           // Since PetscInt is in general different from cl_uint, we have to convert:
60           viennacl::backend::mem_handle dummy;
61 
62           viennacl::backend::typesafe_host_array<unsigned int> row_buffer;
63           row_buffer.raw_resize(dummy, A->rmap->n + 1);
64           for (PetscInt i = 0; i <= A->rmap->n; ++i) row_buffer.set(i, (a->i)[i]);
65 
66           viennacl::backend::typesafe_host_array<unsigned int> col_buffer;
67           col_buffer.raw_resize(dummy, a->nz);
68           for (PetscInt i = 0; i < a->nz; ++i) col_buffer.set(i, (a->j)[i]);
69 
70           viennaclstruct->mat->set(row_buffer.get(), col_buffer.get(), a->a, A->rmap->n, A->cmap->n, a->nz);
71           PetscCall(PetscLogCpuToGpu(((A->rmap->n + 1) + a->nz) * sizeof(PetscInt) + (a->nz) * sizeof(PetscScalar)));
72         }
73         ViennaCLWaitForGPU();
74       } catch (std::exception const &ex) {
75         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
76       }
77 
78       // Create temporary vector for v += A*x:
79       if (viennaclstruct->tempvec) {
80         if (viennaclstruct->tempvec->size() != static_cast<std::size_t>(A->rmap->n)) {
81           delete (ViennaCLVector *)viennaclstruct->tempvec;
82           viennaclstruct->tempvec = new ViennaCLVector(A->rmap->n);
83         } else {
84           viennaclstruct->tempvec->clear();
85         }
86       } else {
87         viennaclstruct->tempvec = new ViennaCLVector(A->rmap->n);
88       }
89 
90       A->offloadmask = PETSC_OFFLOAD_BOTH;
91 
92       PetscCall(PetscLogEventEnd(MAT_ViennaCLCopyToGPU, A, 0, 0, 0));
93     }
94   }
95   PetscFunctionReturn(PETSC_SUCCESS);
96 }
97 
98 PetscErrorCode MatViennaCLCopyFromGPU(Mat A, const ViennaCLAIJMatrix *Agpu)
99 {
100   Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL *)A->spptr;
101   Mat_SeqAIJ         *a              = (Mat_SeqAIJ *)A->data;
102   PetscInt            m              = A->rmap->n;
103 
104   PetscFunctionBegin;
105   if (A->offloadmask == PETSC_OFFLOAD_BOTH) PetscFunctionReturn(PETSC_SUCCESS);
106   if (A->offloadmask == PETSC_OFFLOAD_UNALLOCATED && Agpu) {
107     try {
108       PetscCheck(!a->compressedrow.use, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ViennaCL: Cannot handle row compression for GPU matrices");
109       PetscCheck((PetscInt)Agpu->size1() == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "GPU matrix has %lu rows, should be %" PetscInt_FMT, Agpu->size1(), m);
110       a->nz           = Agpu->nnz();
111       a->maxnz        = a->nz; /* Since we allocate exactly the right amount */
112       A->preallocated = PETSC_TRUE;
113       if (a->singlemalloc) {
114         if (a->a) PetscCall(PetscFree3(a->a, a->j, a->i));
115       } else {
116         if (a->i) PetscCall(PetscFree(a->i));
117         if (a->j) PetscCall(PetscFree(a->j));
118         if (a->a) PetscCall(PetscFree(a->a));
119       }
120       PetscCall(PetscMalloc3(a->nz, &a->a, a->nz, &a->j, m + 1, &a->i));
121 
122       a->singlemalloc = PETSC_TRUE;
123 
124       /* Setup row lengths */
125       PetscCall(PetscFree(a->imax));
126       PetscCall(PetscFree(a->ilen));
127       PetscCall(PetscMalloc1(m, &a->imax));
128       PetscCall(PetscMalloc1(m, &a->ilen));
129 
130       /* Copy data back from GPU */
131       viennacl::backend::typesafe_host_array<unsigned int> row_buffer;
132       row_buffer.raw_resize(Agpu->handle1(), Agpu->size1() + 1);
133 
134       // copy row array
135       viennacl::backend::memory_read(Agpu->handle1(), 0, row_buffer.raw_size(), row_buffer.get());
136       (a->i)[0] = row_buffer[0];
137       for (PetscInt i = 0; i < (PetscInt)Agpu->size1(); ++i) {
138         (a->i)[i + 1] = row_buffer[i + 1];
139         a->imax[i] = a->ilen[i] = a->i[i + 1] - a->i[i]; //Set imax[] and ilen[] arrays at the same time as i[] for better cache reuse
140       }
141 
142       // copy column indices
143       viennacl::backend::typesafe_host_array<unsigned int> col_buffer;
144       col_buffer.raw_resize(Agpu->handle2(), Agpu->nnz());
145       viennacl::backend::memory_read(Agpu->handle2(), 0, col_buffer.raw_size(), col_buffer.get());
146       for (PetscInt i = 0; i < (PetscInt)Agpu->nnz(); ++i) (a->j)[i] = col_buffer[i];
147 
148       // copy nonzero entries directly to destination (no conversion required)
149       viennacl::backend::memory_read(Agpu->handle(), 0, sizeof(PetscScalar) * Agpu->nnz(), a->a);
150 
151       PetscCall(PetscLogGpuToCpu(row_buffer.raw_size() + col_buffer.raw_size() + (Agpu->nnz() * sizeof(PetscScalar))));
152       ViennaCLWaitForGPU();
153       /* TODO: Once a->diag is moved out of MatAssemblyEnd(), invalidate it here. */
154     } catch (std::exception const &ex) {
155       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
156     }
157   } else if (A->offloadmask == PETSC_OFFLOAD_UNALLOCATED) {
158     PetscFunctionReturn(PETSC_SUCCESS);
159   } else {
160     if (!Agpu && A->offloadmask != PETSC_OFFLOAD_GPU) PetscFunctionReturn(PETSC_SUCCESS);
161 
162     PetscCheck(!a->compressedrow.use, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ViennaCL: Cannot handle row compression for GPU matrices");
163     if (!Agpu) {
164       viennacl::backend::memory_read(viennaclstruct->mat->handle(), 0, sizeof(PetscScalar) * viennaclstruct->mat->nnz(), a->a);
165     } else {
166       viennacl::backend::memory_read(Agpu->handle(), 0, sizeof(PetscScalar) * Agpu->nnz(), a->a);
167     }
168   }
169   A->offloadmask = PETSC_OFFLOAD_BOTH;
170   /* This assembly prevents resetting the flag to PETSC_OFFLOAD_CPU and recopying */
171   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
172   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
173   PetscFunctionReturn(PETSC_SUCCESS);
174 }
175 
176 static PetscErrorCode MatMult_SeqAIJViennaCL(Mat A, Vec xx, Vec yy)
177 {
178   Mat_SeqAIJ           *a              = (Mat_SeqAIJ *)A->data;
179   Mat_SeqAIJViennaCL   *viennaclstruct = (Mat_SeqAIJViennaCL *)A->spptr;
180   const ViennaCLVector *xgpu           = NULL;
181   ViennaCLVector       *ygpu           = NULL;
182 
183   PetscFunctionBegin;
184   /* The line below is necessary due to the operations that modify the matrix on the CPU (axpy, scale, etc) */
185   PetscCall(MatViennaCLCopyToGPU(A));
186   if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) {
187     PetscCall(VecViennaCLGetArrayRead(xx, &xgpu));
188     PetscCall(VecViennaCLGetArrayWrite(yy, &ygpu));
189     PetscCall(PetscLogGpuTimeBegin());
190     try {
191       if (a->compressedrow.use) {
192         *ygpu = viennacl::linalg::prod(*viennaclstruct->compressed_mat, *xgpu);
193       } else {
194         *ygpu = viennacl::linalg::prod(*viennaclstruct->mat, *xgpu);
195       }
196       ViennaCLWaitForGPU();
197     } catch (std::exception const &ex) {
198       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
199     }
200     PetscCall(PetscLogGpuTimeEnd());
201     PetscCall(VecViennaCLRestoreArrayRead(xx, &xgpu));
202     PetscCall(VecViennaCLRestoreArrayWrite(yy, &ygpu));
203     PetscCall(PetscLogGpuFlops(2.0 * a->nz - a->nonzerorowcnt));
204   } else {
205     PetscCall(VecSet_SeqViennaCL(yy, 0));
206   }
207   PetscFunctionReturn(PETSC_SUCCESS);
208 }
209 
210 static PetscErrorCode MatMultAdd_SeqAIJViennaCL(Mat A, Vec xx, Vec yy, Vec zz)
211 {
212   Mat_SeqAIJ           *a              = (Mat_SeqAIJ *)A->data;
213   Mat_SeqAIJViennaCL   *viennaclstruct = (Mat_SeqAIJViennaCL *)A->spptr;
214   const ViennaCLVector *xgpu = NULL, *ygpu = NULL;
215   ViennaCLVector       *zgpu = NULL;
216 
217   PetscFunctionBegin;
218   /* The line below is necessary due to the operations that modify the matrix on the CPU (axpy, scale, etc) */
219   PetscCall(MatViennaCLCopyToGPU(A));
220   if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) {
221     try {
222       PetscCall(VecViennaCLGetArrayRead(xx, &xgpu));
223       PetscCall(VecViennaCLGetArrayRead(yy, &ygpu));
224       PetscCall(VecViennaCLGetArrayWrite(zz, &zgpu));
225       PetscCall(PetscLogGpuTimeBegin());
226       if (a->compressedrow.use) *viennaclstruct->tempvec = viennacl::linalg::prod(*viennaclstruct->compressed_mat, *xgpu);
227       else *viennaclstruct->tempvec = viennacl::linalg::prod(*viennaclstruct->mat, *xgpu);
228       if (zz != yy) *zgpu = *ygpu + *viennaclstruct->tempvec;
229       else *zgpu += *viennaclstruct->tempvec;
230       ViennaCLWaitForGPU();
231       PetscCall(PetscLogGpuTimeEnd());
232       PetscCall(VecViennaCLRestoreArrayRead(xx, &xgpu));
233       PetscCall(VecViennaCLRestoreArrayRead(yy, &ygpu));
234       PetscCall(VecViennaCLRestoreArrayWrite(zz, &zgpu));
235 
236     } catch (std::exception const &ex) {
237       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
238     }
239     PetscCall(PetscLogGpuFlops(2.0 * a->nz));
240   } else {
241     PetscCall(VecCopy_SeqViennaCL(yy, zz));
242   }
243   PetscFunctionReturn(PETSC_SUCCESS);
244 }
245 
246 static PetscErrorCode MatAssemblyEnd_SeqAIJViennaCL(Mat A, MatAssemblyType mode)
247 {
248   PetscFunctionBegin;
249   PetscCall(MatAssemblyEnd_SeqAIJ(A, mode));
250   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(PETSC_SUCCESS);
251   if (!A->boundtocpu) PetscCall(MatViennaCLCopyToGPU(A));
252   PetscFunctionReturn(PETSC_SUCCESS);
253 }
254 
255 /* --------------------------------------------------------------------------------*/
256 /*@C
257   MatCreateSeqAIJViennaCL - Creates a sparse matrix in `MATSEQAIJVIENNACL` (compressed row) format
258   (the default parallel PETSc format).  This matrix will ultimately be pushed down
259   to GPUs and use the ViennaCL library for calculations.
260 
261   Collective
262 
263   Input Parameters:
264 + comm - MPI communicator, set to `PETSC_COMM_SELF`
265 . m    - number of rows
266 . n    - number of columns
267 . nz   - number of nonzeros per row (same for all rows), ignored if `nnz` is set
268 - nnz  - array containing the number of nonzeros in the various rows (possibly different for each row) or `NULL`
269 
270   Output Parameter:
271 . A - the matrix
272 
273    It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
274    MatXXXXSetPreallocation() paradigm instead of this routine directly.
275    [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
276 
277   Notes:
278   The AIJ format, also called
279   compressed row storage, is fully compatible with standard Fortran
280   storage.  That is, the stored row and column indices can begin at
281   either one (as in Fortran) or zero.
282 
283   Specify the preallocated storage with either `nz` or `nnz` (not both).
284   Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory
285   allocation.
286 
287   Level: intermediate
288 
289 .seealso: `MATSEQAIJVIENNACL`, `MatCreate()`, `MatCreateAIJ()`, `MatCreateAIJCUSPARSE()`, `MatSetValues()`, `MatSeqAIJSetColumnIndices()`, `MatCreateSeqAIJWithArrays()`
290 @*/
291 PetscErrorCode MatCreateSeqAIJViennaCL(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A)
292 {
293   PetscFunctionBegin;
294   PetscCall(MatCreate(comm, A));
295   PetscCall(MatSetSizes(*A, m, n, m, n));
296   PetscCall(MatSetType(*A, MATSEQAIJVIENNACL));
297   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(*A, nz, (PetscInt *)nnz));
298   PetscFunctionReturn(PETSC_SUCCESS);
299 }
300 
301 static PetscErrorCode MatDestroy_SeqAIJViennaCL(Mat A)
302 {
303   Mat_SeqAIJViennaCL *viennaclcontainer = (Mat_SeqAIJViennaCL *)A->spptr;
304 
305   PetscFunctionBegin;
306   try {
307     if (viennaclcontainer) {
308       delete viennaclcontainer->tempvec;
309       delete viennaclcontainer->mat;
310       delete viennaclcontainer->compressed_mat;
311       delete viennaclcontainer;
312     }
313     A->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
314   } catch (std::exception const &ex) {
315     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
316   }
317 
318   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqaij_seqaijviennacl_C", NULL));
319   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaijviennacl_seqdense_C", NULL));
320   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaijviennacl_seqaij_C", NULL));
321 
322   /* this next line is because MatDestroy tries to PetscFree spptr if it is not zero, and PetscFree only works if the memory was allocated with PetscNew or PetscMalloc, which don't call the constructor */
323   A->spptr = 0;
324   PetscCall(MatDestroy_SeqAIJ(A));
325   PetscFunctionReturn(PETSC_SUCCESS);
326 }
327 
328 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJViennaCL(Mat B)
329 {
330   PetscFunctionBegin;
331   PetscCall(MatCreate_SeqAIJ(B));
332   PetscCall(MatConvert_SeqAIJ_SeqAIJViennaCL(B, MATSEQAIJVIENNACL, MAT_INPLACE_MATRIX, &B));
333   PetscFunctionReturn(PETSC_SUCCESS);
334 }
335 
336 static PetscErrorCode MatBindToCPU_SeqAIJViennaCL(Mat, PetscBool);
337 static PetscErrorCode MatDuplicate_SeqAIJViennaCL(Mat A, MatDuplicateOption cpvalues, Mat *B)
338 {
339   Mat C;
340 
341   PetscFunctionBegin;
342   PetscCall(MatDuplicate_SeqAIJ(A, cpvalues, B));
343   C = *B;
344 
345   PetscCall(MatBindToCPU_SeqAIJViennaCL(A, PETSC_FALSE));
346   C->ops->bindtocpu = MatBindToCPU_SeqAIJViennaCL;
347 
348   C->spptr                                         = new Mat_SeqAIJViennaCL();
349   ((Mat_SeqAIJViennaCL *)C->spptr)->tempvec        = NULL;
350   ((Mat_SeqAIJViennaCL *)C->spptr)->mat            = NULL;
351   ((Mat_SeqAIJViennaCL *)C->spptr)->compressed_mat = NULL;
352 
353   PetscCall(PetscObjectChangeTypeName((PetscObject)C, MATSEQAIJVIENNACL));
354 
355   C->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
356 
357   /* If the source matrix is already assembled, copy the destination matrix to the GPU */
358   if (C->assembled) PetscCall(MatViennaCLCopyToGPU(C));
359   PetscFunctionReturn(PETSC_SUCCESS);
360 }
361 
362 static PetscErrorCode MatSeqAIJGetArray_SeqAIJViennaCL(Mat A, PetscScalar *array[])
363 {
364   PetscFunctionBegin;
365   PetscCall(MatViennaCLCopyFromGPU(A, (const ViennaCLAIJMatrix *)NULL));
366   *array = ((Mat_SeqAIJ *)A->data)->a;
367   PetscFunctionReturn(PETSC_SUCCESS);
368 }
369 
370 static PetscErrorCode MatSeqAIJRestoreArray_SeqAIJViennaCL(Mat A, PetscScalar *array[])
371 {
372   PetscFunctionBegin;
373   A->offloadmask = PETSC_OFFLOAD_CPU;
374   *array         = NULL;
375   PetscFunctionReturn(PETSC_SUCCESS);
376 }
377 
378 static PetscErrorCode MatSeqAIJGetArrayRead_SeqAIJViennaCL(Mat A, const PetscScalar *array[])
379 {
380   PetscFunctionBegin;
381   PetscCall(MatViennaCLCopyFromGPU(A, (const ViennaCLAIJMatrix *)NULL));
382   *array = ((Mat_SeqAIJ *)A->data)->a;
383   PetscFunctionReturn(PETSC_SUCCESS);
384 }
385 
386 static PetscErrorCode MatSeqAIJRestoreArrayRead_SeqAIJViennaCL(Mat A, const PetscScalar *array[])
387 {
388   PetscFunctionBegin;
389   *array = NULL;
390   /* No A->offloadmask = PETSC_OFFLOAD_CPU since if A->offloadmask was PETSC_OFFLOAD_BOTH, it is still BOTH */
391   PetscFunctionReturn(PETSC_SUCCESS);
392 }
393 
394 static PetscErrorCode MatSeqAIJGetArrayWrite_SeqAIJViennaCL(Mat A, PetscScalar *array[])
395 {
396   PetscFunctionBegin;
397   *array = ((Mat_SeqAIJ *)A->data)->a;
398   PetscFunctionReturn(PETSC_SUCCESS);
399 }
400 
401 static PetscErrorCode MatSeqAIJRestoreArrayWrite_SeqAIJViennaCL(Mat A, PetscScalar *array[])
402 {
403   PetscFunctionBegin;
404   A->offloadmask = PETSC_OFFLOAD_CPU;
405   *array         = NULL;
406   PetscFunctionReturn(PETSC_SUCCESS);
407 }
408 
409 static PetscErrorCode MatBindToCPU_SeqAIJViennaCL(Mat A, PetscBool flg)
410 {
411   Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
412 
413   PetscFunctionBegin;
414   A->boundtocpu = flg;
415   if (flg && a->inode.size) {
416     a->inode.use = PETSC_TRUE;
417   } else {
418     a->inode.use = PETSC_FALSE;
419   }
420   if (flg) {
421     /* make sure we have an up-to-date copy on the CPU */
422     PetscCall(MatViennaCLCopyFromGPU(A, (const ViennaCLAIJMatrix *)NULL));
423     A->ops->mult        = MatMult_SeqAIJ;
424     A->ops->multadd     = MatMultAdd_SeqAIJ;
425     A->ops->assemblyend = MatAssemblyEnd_SeqAIJ;
426     A->ops->duplicate   = MatDuplicate_SeqAIJ;
427     PetscCall(PetscMemzero(a->ops, sizeof(Mat_SeqAIJOps)));
428   } else {
429     A->ops->mult        = MatMult_SeqAIJViennaCL;
430     A->ops->multadd     = MatMultAdd_SeqAIJViennaCL;
431     A->ops->assemblyend = MatAssemblyEnd_SeqAIJViennaCL;
432     A->ops->destroy     = MatDestroy_SeqAIJViennaCL;
433     A->ops->duplicate   = MatDuplicate_SeqAIJViennaCL;
434 
435     a->ops->getarray          = MatSeqAIJGetArray_SeqAIJViennaCL;
436     a->ops->restorearray      = MatSeqAIJRestoreArray_SeqAIJViennaCL;
437     a->ops->getarrayread      = MatSeqAIJGetArrayRead_SeqAIJViennaCL;
438     a->ops->restorearrayread  = MatSeqAIJRestoreArrayRead_SeqAIJViennaCL;
439     a->ops->getarraywrite     = MatSeqAIJGetArrayWrite_SeqAIJViennaCL;
440     a->ops->restorearraywrite = MatSeqAIJRestoreArrayWrite_SeqAIJViennaCL;
441   }
442   PetscFunctionReturn(PETSC_SUCCESS);
443 }
444 
445 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJViennaCL(Mat A, MatType type, MatReuse reuse, Mat *newmat)
446 {
447   Mat B;
448 
449   PetscFunctionBegin;
450   PetscCheck(reuse != MAT_REUSE_MATRIX, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MAT_REUSE_MATRIX is not supported. Consider using MAT_INPLACE_MATRIX instead");
451 
452   if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, newmat));
453 
454   B = *newmat;
455 
456   B->spptr = new Mat_SeqAIJViennaCL();
457 
458   ((Mat_SeqAIJViennaCL *)B->spptr)->tempvec        = NULL;
459   ((Mat_SeqAIJViennaCL *)B->spptr)->mat            = NULL;
460   ((Mat_SeqAIJViennaCL *)B->spptr)->compressed_mat = NULL;
461 
462   PetscCall(MatBindToCPU_SeqAIJViennaCL(A, PETSC_FALSE));
463   A->ops->bindtocpu = MatBindToCPU_SeqAIJViennaCL;
464 
465   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJVIENNACL));
466   PetscCall(PetscFree(B->defaultvectype));
467   PetscCall(PetscStrallocpy(VECVIENNACL, &B->defaultvectype));
468 
469   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqaij_seqaijviennacl_C", MatConvert_SeqAIJ_SeqAIJViennaCL));
470   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaijviennacl_seqdense_C", MatProductSetFromOptions_SeqAIJ_SeqDense));
471   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaijviennacl_seqaij_C", MatProductSetFromOptions_SeqAIJ));
472 
473   B->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
474 
475   /* If the source matrix is already assembled, copy the destination matrix to the GPU */
476   if (B->assembled) PetscCall(MatViennaCLCopyToGPU(B));
477   PetscFunctionReturn(PETSC_SUCCESS);
478 }
479 
480 /*MC
481    MATSEQAIJVIENNACL - MATAIJVIENNACL = "aijviennacl" = "seqaijviennacl" - A matrix type to be used for sparse matrices.
482 
483    A matrix type whose data resides on GPUs. These matrices are in CSR format by
484    default. All matrix calculations are performed using the ViennaCL library.
485 
486    Options Database Keys:
487 +  -mat_type aijviennacl - sets the matrix type to `MATSEQAIJVIENNACL` during a call to `MatSetFromOptions()
488 .  -mat_viennacl_storage_format csr - sets the storage format of matrices for `MatMult()` during a call to `MatSetFromOptions()`.
489 -  -mat_viennacl_mult_storage_format csr - sets the storage format of matrices for `MatMult()` during a call to `MatSetFromOptions()`.
490 
491   Level: beginner
492 
493 .seealso: `MatCreateSeqAIJViennaCL()`, `MATAIJVIENNACL`, `MatCreateAIJViennaCL()`
494 M*/
495 
496 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ViennaCL(void)
497 {
498   PetscFunctionBegin;
499   PetscCall(MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_LU, MatGetFactor_seqaij_petsc));
500   PetscCall(MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_CHOLESKY, MatGetFactor_seqaij_petsc));
501   PetscCall(MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_ILU, MatGetFactor_seqaij_petsc));
502   PetscCall(MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_ICC, MatGetFactor_seqaij_petsc));
503   PetscFunctionReturn(PETSC_SUCCESS);
504 }
505