xref: /petsc/src/mat/impls/aij/seq/seqviennacl/aijviennacl.cxx (revision e0f5bfbec699682fa3e8b8532b1176849ea4e12a)
1 
2 /*
3     Defines the basic matrix operations for the AIJ (compressed row)
4   matrix storage format.
5 */
6 
7 #include <petscconf.h>
8 #define PETSC_SKIP_IMMINTRIN_H_CUDAWORKAROUND 1
9 #include <../src/mat/impls/aij/seq/aij.h> /*I "petscmat.h" I*/
10 #include <petscbt.h>
11 #include <../src/vec/vec/impls/dvecimpl.h>
12 #include <petsc/private/vecimpl.h>
13 
14 #include <../src/mat/impls/aij/seq/seqviennacl/viennaclmatimpl.h>
15 
16 #include <algorithm>
17 #include <vector>
18 #include <string>
19 
20 #include "viennacl/linalg/prod.hpp"
21 
22 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJViennaCL(Mat A, MatType type, MatReuse reuse, Mat *newmat);
23 PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat, MatFactorType, Mat *);
24 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqDense(Mat);
25 
26 PetscErrorCode MatViennaCLCopyToGPU(Mat A) {
27   Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL *)A->spptr;
28   Mat_SeqAIJ         *a              = (Mat_SeqAIJ *)A->data;
29 
30   PetscFunctionBegin;
31   if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) { //some OpenCL SDKs have issues with buffers of size 0
32     if (A->offloadmask == PETSC_OFFLOAD_UNALLOCATED || A->offloadmask == PETSC_OFFLOAD_CPU) {
33       PetscCall(PetscLogEventBegin(MAT_ViennaCLCopyToGPU, A, 0, 0, 0));
34 
35       try {
36         if (a->compressedrow.use) {
37           if (!viennaclstruct->compressed_mat) viennaclstruct->compressed_mat = new ViennaCLCompressedAIJMatrix();
38 
39           // Since PetscInt is different from cl_uint, we have to convert:
40           viennacl::backend::mem_handle dummy;
41 
42           viennacl::backend::typesafe_host_array<unsigned int> row_buffer;
43           row_buffer.raw_resize(dummy, a->compressedrow.nrows + 1);
44           for (PetscInt i = 0; i <= a->compressedrow.nrows; ++i) row_buffer.set(i, (a->compressedrow.i)[i]);
45 
46           viennacl::backend::typesafe_host_array<unsigned int> row_indices;
47           row_indices.raw_resize(dummy, a->compressedrow.nrows);
48           for (PetscInt i = 0; i < a->compressedrow.nrows; ++i) row_indices.set(i, (a->compressedrow.rindex)[i]);
49 
50           viennacl::backend::typesafe_host_array<unsigned int> col_buffer;
51           col_buffer.raw_resize(dummy, a->nz);
52           for (PetscInt i = 0; i < a->nz; ++i) col_buffer.set(i, (a->j)[i]);
53 
54           viennaclstruct->compressed_mat->set(row_buffer.get(), row_indices.get(), col_buffer.get(), a->a, A->rmap->n, A->cmap->n, a->compressedrow.nrows, a->nz);
55           PetscCall(PetscLogCpuToGpu(((2 * a->compressedrow.nrows) + 1 + a->nz) * sizeof(PetscInt) + (a->nz) * sizeof(PetscScalar)));
56         } else {
57           if (!viennaclstruct->mat) viennaclstruct->mat = new ViennaCLAIJMatrix();
58 
59           // Since PetscInt is in general different from cl_uint, we have to convert:
60           viennacl::backend::mem_handle dummy;
61 
62           viennacl::backend::typesafe_host_array<unsigned int> row_buffer;
63           row_buffer.raw_resize(dummy, A->rmap->n + 1);
64           for (PetscInt i = 0; i <= A->rmap->n; ++i) row_buffer.set(i, (a->i)[i]);
65 
66           viennacl::backend::typesafe_host_array<unsigned int> col_buffer;
67           col_buffer.raw_resize(dummy, a->nz);
68           for (PetscInt i = 0; i < a->nz; ++i) col_buffer.set(i, (a->j)[i]);
69 
70           viennaclstruct->mat->set(row_buffer.get(), col_buffer.get(), a->a, A->rmap->n, A->cmap->n, a->nz);
71           PetscCall(PetscLogCpuToGpu(((A->rmap->n + 1) + a->nz) * sizeof(PetscInt) + (a->nz) * sizeof(PetscScalar)));
72         }
73         ViennaCLWaitForGPU();
74       } catch (std::exception const &ex) { SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what()); }
75 
76       // Create temporary vector for v += A*x:
77       if (viennaclstruct->tempvec) {
78         if (viennaclstruct->tempvec->size() != static_cast<std::size_t>(A->rmap->n)) {
79           delete (ViennaCLVector *)viennaclstruct->tempvec;
80           viennaclstruct->tempvec = new ViennaCLVector(A->rmap->n);
81         } else {
82           viennaclstruct->tempvec->clear();
83         }
84       } else {
85         viennaclstruct->tempvec = new ViennaCLVector(A->rmap->n);
86       }
87 
88       A->offloadmask = PETSC_OFFLOAD_BOTH;
89 
90       PetscCall(PetscLogEventEnd(MAT_ViennaCLCopyToGPU, A, 0, 0, 0));
91     }
92   }
93   PetscFunctionReturn(0);
94 }
95 
96 PetscErrorCode MatViennaCLCopyFromGPU(Mat A, const ViennaCLAIJMatrix *Agpu) {
97   Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL *)A->spptr;
98   Mat_SeqAIJ         *a              = (Mat_SeqAIJ *)A->data;
99   PetscInt            m              = A->rmap->n;
100 
101   PetscFunctionBegin;
102   if (A->offloadmask == PETSC_OFFLOAD_BOTH) PetscFunctionReturn(0);
103   if (A->offloadmask == PETSC_OFFLOAD_UNALLOCATED && Agpu) {
104     try {
105       PetscCheck(!a->compressedrow.use, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ViennaCL: Cannot handle row compression for GPU matrices");
106       PetscCheck((PetscInt)Agpu->size1() == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "GPU matrix has %lu rows, should be %" PetscInt_FMT, Agpu->size1(), m);
107       a->nz           = Agpu->nnz();
108       a->maxnz        = a->nz; /* Since we allocate exactly the right amount */
109       A->preallocated = PETSC_TRUE;
110       if (a->singlemalloc) {
111         if (a->a) PetscCall(PetscFree3(a->a, a->j, a->i));
112       } else {
113         if (a->i) PetscCall(PetscFree(a->i));
114         if (a->j) PetscCall(PetscFree(a->j));
115         if (a->a) PetscCall(PetscFree(a->a));
116       }
117       PetscCall(PetscMalloc3(a->nz, &a->a, a->nz, &a->j, m + 1, &a->i));
118 
119       a->singlemalloc = PETSC_TRUE;
120 
121       /* Setup row lengths */
122       PetscCall(PetscFree(a->imax));
123       PetscCall(PetscFree(a->ilen));
124       PetscCall(PetscMalloc1(m, &a->imax));
125       PetscCall(PetscMalloc1(m, &a->ilen));
126 
127       /* Copy data back from GPU */
128       viennacl::backend::typesafe_host_array<unsigned int> row_buffer;
129       row_buffer.raw_resize(Agpu->handle1(), Agpu->size1() + 1);
130 
131       // copy row array
132       viennacl::backend::memory_read(Agpu->handle1(), 0, row_buffer.raw_size(), row_buffer.get());
133       (a->i)[0] = row_buffer[0];
134       for (PetscInt i = 0; i < (PetscInt)Agpu->size1(); ++i) {
135         (a->i)[i + 1] = row_buffer[i + 1];
136         a->imax[i] = a->ilen[i] = a->i[i + 1] - a->i[i]; //Set imax[] and ilen[] arrays at the same time as i[] for better cache reuse
137       }
138 
139       // copy column indices
140       viennacl::backend::typesafe_host_array<unsigned int> col_buffer;
141       col_buffer.raw_resize(Agpu->handle2(), Agpu->nnz());
142       viennacl::backend::memory_read(Agpu->handle2(), 0, col_buffer.raw_size(), col_buffer.get());
143       for (PetscInt i = 0; i < (PetscInt)Agpu->nnz(); ++i) (a->j)[i] = col_buffer[i];
144 
145       // copy nonzero entries directly to destination (no conversion required)
146       viennacl::backend::memory_read(Agpu->handle(), 0, sizeof(PetscScalar) * Agpu->nnz(), a->a);
147 
148       PetscCall(PetscLogGpuToCpu(row_buffer.raw_size() + col_buffer.raw_size() + (Agpu->nnz() * sizeof(PetscScalar))));
149       ViennaCLWaitForGPU();
150       /* TODO: Once a->diag is moved out of MatAssemblyEnd(), invalidate it here. */
151     } catch (std::exception const &ex) { SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what()); }
152   } else if (A->offloadmask == PETSC_OFFLOAD_UNALLOCATED) {
153     PetscFunctionReturn(0);
154   } else {
155     if (!Agpu && A->offloadmask != PETSC_OFFLOAD_GPU) PetscFunctionReturn(0);
156 
157     PetscCheck(!a->compressedrow.use, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ViennaCL: Cannot handle row compression for GPU matrices");
158     if (!Agpu) {
159       viennacl::backend::memory_read(viennaclstruct->mat->handle(), 0, sizeof(PetscScalar) * viennaclstruct->mat->nnz(), a->a);
160     } else {
161       viennacl::backend::memory_read(Agpu->handle(), 0, sizeof(PetscScalar) * Agpu->nnz(), a->a);
162     }
163   }
164   A->offloadmask = PETSC_OFFLOAD_BOTH;
165   /* This assembly prevents resetting the flag to PETSC_OFFLOAD_CPU and recopying */
166   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
167   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
168   PetscFunctionReturn(0);
169 }
170 
171 PetscErrorCode MatMult_SeqAIJViennaCL(Mat A, Vec xx, Vec yy) {
172   Mat_SeqAIJ           *a              = (Mat_SeqAIJ *)A->data;
173   Mat_SeqAIJViennaCL   *viennaclstruct = (Mat_SeqAIJViennaCL *)A->spptr;
174   const ViennaCLVector *xgpu           = NULL;
175   ViennaCLVector       *ygpu           = NULL;
176 
177   PetscFunctionBegin;
178   /* The line below is necessary due to the operations that modify the matrix on the CPU (axpy, scale, etc) */
179   PetscCall(MatViennaCLCopyToGPU(A));
180   if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) {
181     PetscCall(VecViennaCLGetArrayRead(xx, &xgpu));
182     PetscCall(VecViennaCLGetArrayWrite(yy, &ygpu));
183     PetscCall(PetscLogGpuTimeBegin());
184     try {
185       if (a->compressedrow.use) {
186         *ygpu = viennacl::linalg::prod(*viennaclstruct->compressed_mat, *xgpu);
187       } else {
188         *ygpu = viennacl::linalg::prod(*viennaclstruct->mat, *xgpu);
189       }
190       ViennaCLWaitForGPU();
191     } catch (std::exception const &ex) { SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what()); }
192     PetscCall(PetscLogGpuTimeEnd());
193     PetscCall(VecViennaCLRestoreArrayRead(xx, &xgpu));
194     PetscCall(VecViennaCLRestoreArrayWrite(yy, &ygpu));
195     PetscCall(PetscLogGpuFlops(2.0 * a->nz - a->nonzerorowcnt));
196   } else {
197     PetscCall(VecSet_SeqViennaCL(yy, 0));
198   }
199   PetscFunctionReturn(0);
200 }
201 
202 PetscErrorCode MatMultAdd_SeqAIJViennaCL(Mat A, Vec xx, Vec yy, Vec zz) {
203   Mat_SeqAIJ           *a              = (Mat_SeqAIJ *)A->data;
204   Mat_SeqAIJViennaCL   *viennaclstruct = (Mat_SeqAIJViennaCL *)A->spptr;
205   const ViennaCLVector *xgpu = NULL, *ygpu = NULL;
206   ViennaCLVector       *zgpu = NULL;
207 
208   PetscFunctionBegin;
209   /* The line below is necessary due to the operations that modify the matrix on the CPU (axpy, scale, etc) */
210   PetscCall(MatViennaCLCopyToGPU(A));
211   if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) {
212     try {
213       PetscCall(VecViennaCLGetArrayRead(xx, &xgpu));
214       PetscCall(VecViennaCLGetArrayRead(yy, &ygpu));
215       PetscCall(VecViennaCLGetArrayWrite(zz, &zgpu));
216       PetscCall(PetscLogGpuTimeBegin());
217       if (a->compressedrow.use) *viennaclstruct->tempvec = viennacl::linalg::prod(*viennaclstruct->compressed_mat, *xgpu);
218       else *viennaclstruct->tempvec = viennacl::linalg::prod(*viennaclstruct->mat, *xgpu);
219       if (zz != yy) *zgpu = *ygpu + *viennaclstruct->tempvec;
220       else *zgpu += *viennaclstruct->tempvec;
221       ViennaCLWaitForGPU();
222       PetscCall(PetscLogGpuTimeEnd());
223       PetscCall(VecViennaCLRestoreArrayRead(xx, &xgpu));
224       PetscCall(VecViennaCLRestoreArrayRead(yy, &ygpu));
225       PetscCall(VecViennaCLRestoreArrayWrite(zz, &zgpu));
226 
227     } catch (std::exception const &ex) { SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what()); }
228     PetscCall(PetscLogGpuFlops(2.0 * a->nz));
229   } else {
230     PetscCall(VecCopy_SeqViennaCL(yy, zz));
231   }
232   PetscFunctionReturn(0);
233 }
234 
235 PetscErrorCode MatAssemblyEnd_SeqAIJViennaCL(Mat A, MatAssemblyType mode) {
236   PetscFunctionBegin;
237   PetscCall(MatAssemblyEnd_SeqAIJ(A, mode));
238   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
239   if (!A->boundtocpu) PetscCall(MatViennaCLCopyToGPU(A));
240   PetscFunctionReturn(0);
241 }
242 
243 /* --------------------------------------------------------------------------------*/
244 /*@C
245    MatCreateSeqAIJViennaCL - Creates a sparse matrix in `MATSEQAIJVIENNACL` (compressed row) format
246    (the default parallel PETSc format).  This matrix will ultimately be pushed down
247    to GPUs and use the ViennaCL library for calculations. For good matrix
248    assembly performance the user should preallocate the matrix storage by setting
249    the parameter nz (or the array nnz).  By setting these parameters accurately,
250    performance during matrix assembly can be increased substantially.
251 
252    Collective
253 
254    Input Parameters:
255 +  comm - MPI communicator, set to `PETSC_COMM_SELF`
256 .  m - number of rows
257 .  n - number of columns
258 .  nz - number of nonzeros per row (same for all rows)
259 -  nnz - array containing the number of nonzeros in the various rows
260          (possibly different for each row) or NULL
261 
262    Output Parameter:
263 .  A - the matrix
264 
265    It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
266    MatXXXXSetPreallocation() paradigm instead of this routine directly.
267    [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
268 
269    Notes:
270    If nnz is given then nz is ignored
271 
272    The AIJ format, also called
273    compressed row storage, is fully compatible with standard Fortran 77
274    storage.  That is, the stored row and column indices can begin at
275    either one (as in Fortran) or zero.  See the users' manual for details.
276 
277    Specify the preallocated storage with either nz or nnz (not both).
278    Set nz = `PETSC_DEFAULT` and nnz = NULL for PETSc to control dynamic memory
279    allocation.  For large problems you MUST preallocate memory or you
280    will get TERRIBLE performance, see the users' manual chapter on matrices.
281 
282    Level: intermediate
283 
284 .seealso: `MATSEQAIJVIENNACL`, `MatCreate()`, `MatCreateAIJ()`, `MatCreateAIJCUSPARSE()`, `MatSetValues()`, `MatSeqAIJSetColumnIndices()`, `MatCreateSeqAIJWithArrays()`, `MatCreateAIJ()`
285 @*/
286 PetscErrorCode MatCreateSeqAIJViennaCL(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A) {
287   PetscFunctionBegin;
288   PetscCall(MatCreate(comm, A));
289   PetscCall(MatSetSizes(*A, m, n, m, n));
290   PetscCall(MatSetType(*A, MATSEQAIJVIENNACL));
291   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(*A, nz, (PetscInt *)nnz));
292   PetscFunctionReturn(0);
293 }
294 
295 PetscErrorCode MatDestroy_SeqAIJViennaCL(Mat A) {
296   Mat_SeqAIJViennaCL *viennaclcontainer = (Mat_SeqAIJViennaCL *)A->spptr;
297 
298   PetscFunctionBegin;
299   try {
300     if (viennaclcontainer) {
301       delete viennaclcontainer->tempvec;
302       delete viennaclcontainer->mat;
303       delete viennaclcontainer->compressed_mat;
304       delete viennaclcontainer;
305     }
306     A->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
307   } catch (std::exception const &ex) { SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what()); }
308 
309   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqaij_seqaijviennacl_C", NULL));
310   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaijviennacl_seqdense_C", NULL));
311   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaijviennacl_seqaij_C", NULL));
312 
313   /* this next line is because MatDestroy tries to PetscFree spptr if it is not zero, and PetscFree only works if the memory was allocated with PetscNew or PetscMalloc, which don't call the constructor */
314   A->spptr = 0;
315   PetscCall(MatDestroy_SeqAIJ(A));
316   PetscFunctionReturn(0);
317 }
318 
319 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJViennaCL(Mat B) {
320   PetscFunctionBegin;
321   PetscCall(MatCreate_SeqAIJ(B));
322   PetscCall(MatConvert_SeqAIJ_SeqAIJViennaCL(B, MATSEQAIJVIENNACL, MAT_INPLACE_MATRIX, &B));
323   PetscFunctionReturn(0);
324 }
325 
326 static PetscErrorCode MatBindToCPU_SeqAIJViennaCL(Mat, PetscBool);
327 static PetscErrorCode MatDuplicate_SeqAIJViennaCL(Mat A, MatDuplicateOption cpvalues, Mat *B) {
328   Mat C;
329 
330   PetscFunctionBegin;
331   PetscCall(MatDuplicate_SeqAIJ(A, cpvalues, B));
332   C = *B;
333 
334   PetscCall(MatBindToCPU_SeqAIJViennaCL(A, PETSC_FALSE));
335   C->ops->bindtocpu = MatBindToCPU_SeqAIJViennaCL;
336 
337   C->spptr                                         = new Mat_SeqAIJViennaCL();
338   ((Mat_SeqAIJViennaCL *)C->spptr)->tempvec        = NULL;
339   ((Mat_SeqAIJViennaCL *)C->spptr)->mat            = NULL;
340   ((Mat_SeqAIJViennaCL *)C->spptr)->compressed_mat = NULL;
341 
342   PetscCall(PetscObjectChangeTypeName((PetscObject)C, MATSEQAIJVIENNACL));
343 
344   C->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
345 
346   /* If the source matrix is already assembled, copy the destination matrix to the GPU */
347   if (C->assembled) PetscCall(MatViennaCLCopyToGPU(C));
348   PetscFunctionReturn(0);
349 }
350 
351 static PetscErrorCode MatSeqAIJGetArray_SeqAIJViennaCL(Mat A, PetscScalar *array[]) {
352   PetscFunctionBegin;
353   PetscCall(MatViennaCLCopyFromGPU(A, (const ViennaCLAIJMatrix *)NULL));
354   *array = ((Mat_SeqAIJ *)A->data)->a;
355   PetscFunctionReturn(0);
356 }
357 
358 static PetscErrorCode MatSeqAIJRestoreArray_SeqAIJViennaCL(Mat A, PetscScalar *array[]) {
359   PetscFunctionBegin;
360   A->offloadmask = PETSC_OFFLOAD_CPU;
361   *array         = NULL;
362   PetscFunctionReturn(0);
363 }
364 
365 static PetscErrorCode MatSeqAIJGetArrayRead_SeqAIJViennaCL(Mat A, const PetscScalar *array[]) {
366   PetscFunctionBegin;
367   PetscCall(MatViennaCLCopyFromGPU(A, (const ViennaCLAIJMatrix *)NULL));
368   *array = ((Mat_SeqAIJ *)A->data)->a;
369   PetscFunctionReturn(0);
370 }
371 
372 static PetscErrorCode MatSeqAIJRestoreArrayRead_SeqAIJViennaCL(Mat A, const PetscScalar *array[]) {
373   PetscFunctionBegin;
374   *array = NULL;
375   /* No A->offloadmask = PETSC_OFFLOAD_CPU since if A->offloadmask was PETSC_OFFLOAD_BOTH, it is still BOTH */
376   PetscFunctionReturn(0);
377 }
378 
379 static PetscErrorCode MatSeqAIJGetArrayWrite_SeqAIJViennaCL(Mat A, PetscScalar *array[]) {
380   PetscFunctionBegin;
381   *array = ((Mat_SeqAIJ *)A->data)->a;
382   PetscFunctionReturn(0);
383 }
384 
385 static PetscErrorCode MatSeqAIJRestoreArrayWrite_SeqAIJViennaCL(Mat A, PetscScalar *array[]) {
386   PetscFunctionBegin;
387   A->offloadmask = PETSC_OFFLOAD_CPU;
388   *array         = NULL;
389   PetscFunctionReturn(0);
390 }
391 
392 static PetscErrorCode MatBindToCPU_SeqAIJViennaCL(Mat A, PetscBool flg) {
393   Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
394 
395   PetscFunctionBegin;
396   A->boundtocpu = flg;
397   if (flg && a->inode.size) {
398     a->inode.use = PETSC_TRUE;
399   } else {
400     a->inode.use = PETSC_FALSE;
401   }
402   if (flg) {
403     /* make sure we have an up-to-date copy on the CPU */
404     PetscCall(MatViennaCLCopyFromGPU(A, (const ViennaCLAIJMatrix *)NULL));
405     A->ops->mult        = MatMult_SeqAIJ;
406     A->ops->multadd     = MatMultAdd_SeqAIJ;
407     A->ops->assemblyend = MatAssemblyEnd_SeqAIJ;
408     A->ops->duplicate   = MatDuplicate_SeqAIJ;
409     PetscCall(PetscMemzero(a->ops, sizeof(Mat_SeqAIJOps)));
410   } else {
411     A->ops->mult        = MatMult_SeqAIJViennaCL;
412     A->ops->multadd     = MatMultAdd_SeqAIJViennaCL;
413     A->ops->assemblyend = MatAssemblyEnd_SeqAIJViennaCL;
414     A->ops->destroy     = MatDestroy_SeqAIJViennaCL;
415     A->ops->duplicate   = MatDuplicate_SeqAIJViennaCL;
416 
417     a->ops->getarray          = MatSeqAIJGetArray_SeqAIJViennaCL;
418     a->ops->restorearray      = MatSeqAIJRestoreArray_SeqAIJViennaCL;
419     a->ops->getarrayread      = MatSeqAIJGetArrayRead_SeqAIJViennaCL;
420     a->ops->restorearrayread  = MatSeqAIJRestoreArrayRead_SeqAIJViennaCL;
421     a->ops->getarraywrite     = MatSeqAIJGetArrayWrite_SeqAIJViennaCL;
422     a->ops->restorearraywrite = MatSeqAIJRestoreArrayWrite_SeqAIJViennaCL;
423   }
424   PetscFunctionReturn(0);
425 }
426 
427 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJViennaCL(Mat A, MatType type, MatReuse reuse, Mat *newmat) {
428   Mat B;
429 
430   PetscFunctionBegin;
431   PetscCheck(reuse != MAT_REUSE_MATRIX, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MAT_REUSE_MATRIX is not supported. Consider using MAT_INPLACE_MATRIX instead");
432 
433   if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, newmat));
434 
435   B = *newmat;
436 
437   B->spptr = new Mat_SeqAIJViennaCL();
438 
439   ((Mat_SeqAIJViennaCL *)B->spptr)->tempvec        = NULL;
440   ((Mat_SeqAIJViennaCL *)B->spptr)->mat            = NULL;
441   ((Mat_SeqAIJViennaCL *)B->spptr)->compressed_mat = NULL;
442 
443   PetscCall(MatBindToCPU_SeqAIJViennaCL(A, PETSC_FALSE));
444   A->ops->bindtocpu = MatBindToCPU_SeqAIJViennaCL;
445 
446   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJVIENNACL));
447   PetscCall(PetscFree(B->defaultvectype));
448   PetscCall(PetscStrallocpy(VECVIENNACL, &B->defaultvectype));
449 
450   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqaij_seqaijviennacl_C", MatConvert_SeqAIJ_SeqAIJViennaCL));
451   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaijviennacl_seqdense_C", MatProductSetFromOptions_SeqAIJ_SeqDense));
452   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaijviennacl_seqaij_C", MatProductSetFromOptions_SeqAIJ));
453 
454   B->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
455 
456   /* If the source matrix is already assembled, copy the destination matrix to the GPU */
457   if (B->assembled) PetscCall(MatViennaCLCopyToGPU(B));
458   PetscFunctionReturn(0);
459 }
460 
461 /*MC
462    MATSEQAIJVIENNACL - MATAIJVIENNACL = "aijviennacl" = "seqaijviennacl" - A matrix type to be used for sparse matrices.
463 
464    A matrix type type whose data resides on GPUs. These matrices are in CSR format by
465    default. All matrix calculations are performed using the ViennaCL library.
466 
467    Options Database Keys:
468 +  -mat_type aijviennacl - sets the matrix type to `MATSEQAIJVIENNACL` during a call to `MatSetFromOptions()
469 .  -mat_viennacl_storage_format csr - sets the storage format of matrices for `MatMult()` during a call to `MatSetFromOptions()`.
470 -  -mat_viennacl_mult_storage_format csr - sets the storage format of matrices for `MatMult()` during a call to `MatSetFromOptions()`.
471 
472   Level: beginner
473 
474 .seealso: `MatCreateSeqAIJViennaCL()`, `MATAIJVIENNACL`, `MatCreateAIJViennaCL()`
475 M*/
476 
477 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ViennaCL(void) {
478   PetscFunctionBegin;
479   PetscCall(MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_LU, MatGetFactor_seqaij_petsc));
480   PetscCall(MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_CHOLESKY, MatGetFactor_seqaij_petsc));
481   PetscCall(MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_ILU, MatGetFactor_seqaij_petsc));
482   PetscCall(MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_ICC, MatGetFactor_seqaij_petsc));
483   PetscFunctionReturn(0);
484 }
485