xref: /petsc/src/mat/impls/aij/seq/seqviennacl/aijviennacl.cxx (revision 2d30e087755efd99e28fdfe792ffbeb2ee1ea928)
1 
2 /*
3     Defines the basic matrix operations for the AIJ (compressed row)
4   matrix storage format.
5 */
6 
7 #include <petscconf.h>
8 #define PETSC_SKIP_IMMINTRIN_H_CUDAWORKAROUND 1
9 #include <../src/mat/impls/aij/seq/aij.h> /*I "petscmat.h" I*/
10 #include <petscbt.h>
11 #include <../src/vec/vec/impls/dvecimpl.h>
12 #include <petsc/private/vecimpl.h>
13 
14 #include <../src/mat/impls/aij/seq/seqviennacl/viennaclmatimpl.h>
15 
16 #include <algorithm>
17 #include <vector>
18 #include <string>
19 
20 #include "viennacl/linalg/prod.hpp"
21 
22 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJViennaCL(Mat A, MatType type, MatReuse reuse, Mat *newmat);
23 PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat, MatFactorType, Mat *);
24 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqDense(Mat);
25 
26 PetscErrorCode MatViennaCLCopyToGPU(Mat A) {
27   Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL *)A->spptr;
28   Mat_SeqAIJ         *a              = (Mat_SeqAIJ *)A->data;
29 
30   PetscFunctionBegin;
31   if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) { //some OpenCL SDKs have issues with buffers of size 0
32     if (A->offloadmask == PETSC_OFFLOAD_UNALLOCATED || A->offloadmask == PETSC_OFFLOAD_CPU) {
33       PetscCall(PetscLogEventBegin(MAT_ViennaCLCopyToGPU, A, 0, 0, 0));
34 
35       try {
36         if (a->compressedrow.use) {
37           if (!viennaclstruct->compressed_mat) viennaclstruct->compressed_mat = new ViennaCLCompressedAIJMatrix();
38 
39           // Since PetscInt is different from cl_uint, we have to convert:
40           viennacl::backend::mem_handle dummy;
41 
42           viennacl::backend::typesafe_host_array<unsigned int> row_buffer;
43           row_buffer.raw_resize(dummy, a->compressedrow.nrows + 1);
44           for (PetscInt i = 0; i <= a->compressedrow.nrows; ++i) row_buffer.set(i, (a->compressedrow.i)[i]);
45 
46           viennacl::backend::typesafe_host_array<unsigned int> row_indices;
47           row_indices.raw_resize(dummy, a->compressedrow.nrows);
48           for (PetscInt i = 0; i < a->compressedrow.nrows; ++i) row_indices.set(i, (a->compressedrow.rindex)[i]);
49 
50           viennacl::backend::typesafe_host_array<unsigned int> col_buffer;
51           col_buffer.raw_resize(dummy, a->nz);
52           for (PetscInt i = 0; i < a->nz; ++i) col_buffer.set(i, (a->j)[i]);
53 
54           viennaclstruct->compressed_mat->set(row_buffer.get(), row_indices.get(), col_buffer.get(), a->a, A->rmap->n, A->cmap->n, a->compressedrow.nrows, a->nz);
55           PetscCall(PetscLogCpuToGpu(((2 * a->compressedrow.nrows) + 1 + a->nz) * sizeof(PetscInt) + (a->nz) * sizeof(PetscScalar)));
56         } else {
57           if (!viennaclstruct->mat) viennaclstruct->mat = new ViennaCLAIJMatrix();
58 
59           // Since PetscInt is in general different from cl_uint, we have to convert:
60           viennacl::backend::mem_handle dummy;
61 
62           viennacl::backend::typesafe_host_array<unsigned int> row_buffer;
63           row_buffer.raw_resize(dummy, A->rmap->n + 1);
64           for (PetscInt i = 0; i <= A->rmap->n; ++i) row_buffer.set(i, (a->i)[i]);
65 
66           viennacl::backend::typesafe_host_array<unsigned int> col_buffer;
67           col_buffer.raw_resize(dummy, a->nz);
68           for (PetscInt i = 0; i < a->nz; ++i) col_buffer.set(i, (a->j)[i]);
69 
70           viennaclstruct->mat->set(row_buffer.get(), col_buffer.get(), a->a, A->rmap->n, A->cmap->n, a->nz);
71           PetscCall(PetscLogCpuToGpu(((A->rmap->n + 1) + a->nz) * sizeof(PetscInt) + (a->nz) * sizeof(PetscScalar)));
72         }
73         ViennaCLWaitForGPU();
74       } catch (std::exception const &ex) { SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what()); }
75 
76       // Create temporary vector for v += A*x:
77       if (viennaclstruct->tempvec) {
78         if (viennaclstruct->tempvec->size() != static_cast<std::size_t>(A->rmap->n)) {
79           delete (ViennaCLVector *)viennaclstruct->tempvec;
80           viennaclstruct->tempvec = new ViennaCLVector(A->rmap->n);
81         } else {
82           viennaclstruct->tempvec->clear();
83         }
84       } else {
85         viennaclstruct->tempvec = new ViennaCLVector(A->rmap->n);
86       }
87 
88       A->offloadmask = PETSC_OFFLOAD_BOTH;
89 
90       PetscCall(PetscLogEventEnd(MAT_ViennaCLCopyToGPU, A, 0, 0, 0));
91     }
92   }
93   PetscFunctionReturn(0);
94 }
95 
96 PetscErrorCode MatViennaCLCopyFromGPU(Mat A, const ViennaCLAIJMatrix *Agpu) {
97   Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL *)A->spptr;
98   Mat_SeqAIJ         *a              = (Mat_SeqAIJ *)A->data;
99   PetscInt            m              = A->rmap->n;
100 
101   PetscFunctionBegin;
102   if (A->offloadmask == PETSC_OFFLOAD_BOTH) PetscFunctionReturn(0);
103   if (A->offloadmask == PETSC_OFFLOAD_UNALLOCATED && Agpu) {
104     try {
105       PetscCheck(!a->compressedrow.use, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ViennaCL: Cannot handle row compression for GPU matrices");
106       PetscCheck((PetscInt)Agpu->size1() == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "GPU matrix has %lu rows, should be %" PetscInt_FMT, Agpu->size1(), m);
107       a->nz           = Agpu->nnz();
108       a->maxnz        = a->nz; /* Since we allocate exactly the right amount */
109       A->preallocated = PETSC_TRUE;
110       if (a->singlemalloc) {
111         if (a->a) PetscCall(PetscFree3(a->a, a->j, a->i));
112       } else {
113         if (a->i) PetscCall(PetscFree(a->i));
114         if (a->j) PetscCall(PetscFree(a->j));
115         if (a->a) PetscCall(PetscFree(a->a));
116       }
117       PetscCall(PetscMalloc3(a->nz, &a->a, a->nz, &a->j, m + 1, &a->i));
118       PetscCall(PetscLogObjectMemory((PetscObject)A, a->nz * (sizeof(PetscScalar) + sizeof(PetscInt)) + (m + 1) * sizeof(PetscInt)));
119 
120       a->singlemalloc = PETSC_TRUE;
121 
122       /* Setup row lengths */
123       PetscCall(PetscFree(a->imax));
124       PetscCall(PetscFree(a->ilen));
125       PetscCall(PetscMalloc1(m, &a->imax));
126       PetscCall(PetscMalloc1(m, &a->ilen));
127       PetscCall(PetscLogObjectMemory((PetscObject)A, 2 * m * sizeof(PetscInt)));
128 
129       /* Copy data back from GPU */
130       viennacl::backend::typesafe_host_array<unsigned int> row_buffer;
131       row_buffer.raw_resize(Agpu->handle1(), Agpu->size1() + 1);
132 
133       // copy row array
134       viennacl::backend::memory_read(Agpu->handle1(), 0, row_buffer.raw_size(), row_buffer.get());
135       (a->i)[0] = row_buffer[0];
136       for (PetscInt i = 0; i < (PetscInt)Agpu->size1(); ++i) {
137         (a->i)[i + 1] = row_buffer[i + 1];
138         a->imax[i] = a->ilen[i] = a->i[i + 1] - a->i[i]; //Set imax[] and ilen[] arrays at the same time as i[] for better cache reuse
139       }
140 
141       // copy column indices
142       viennacl::backend::typesafe_host_array<unsigned int> col_buffer;
143       col_buffer.raw_resize(Agpu->handle2(), Agpu->nnz());
144       viennacl::backend::memory_read(Agpu->handle2(), 0, col_buffer.raw_size(), col_buffer.get());
145       for (PetscInt i = 0; i < (PetscInt)Agpu->nnz(); ++i) (a->j)[i] = col_buffer[i];
146 
147       // copy nonzero entries directly to destination (no conversion required)
148       viennacl::backend::memory_read(Agpu->handle(), 0, sizeof(PetscScalar) * Agpu->nnz(), a->a);
149 
150       PetscCall(PetscLogGpuToCpu(row_buffer.raw_size() + col_buffer.raw_size() + (Agpu->nnz() * sizeof(PetscScalar))));
151       ViennaCLWaitForGPU();
152       /* TODO: Once a->diag is moved out of MatAssemblyEnd(), invalidate it here. */
153     } catch (std::exception const &ex) { SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what()); }
154   } else if (A->offloadmask == PETSC_OFFLOAD_UNALLOCATED) {
155     PetscFunctionReturn(0);
156   } else {
157     if (!Agpu && A->offloadmask != PETSC_OFFLOAD_GPU) PetscFunctionReturn(0);
158 
159     PetscCheck(!a->compressedrow.use, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ViennaCL: Cannot handle row compression for GPU matrices");
160     if (!Agpu) {
161       viennacl::backend::memory_read(viennaclstruct->mat->handle(), 0, sizeof(PetscScalar) * viennaclstruct->mat->nnz(), a->a);
162     } else {
163       viennacl::backend::memory_read(Agpu->handle(), 0, sizeof(PetscScalar) * Agpu->nnz(), a->a);
164     }
165   }
166   A->offloadmask = PETSC_OFFLOAD_BOTH;
167   /* This assembly prevents resetting the flag to PETSC_OFFLOAD_CPU and recopying */
168   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
169   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
170   PetscFunctionReturn(0);
171 }
172 
173 PetscErrorCode MatMult_SeqAIJViennaCL(Mat A, Vec xx, Vec yy) {
174   Mat_SeqAIJ           *a              = (Mat_SeqAIJ *)A->data;
175   Mat_SeqAIJViennaCL   *viennaclstruct = (Mat_SeqAIJViennaCL *)A->spptr;
176   const ViennaCLVector *xgpu           = NULL;
177   ViennaCLVector       *ygpu           = NULL;
178 
179   PetscFunctionBegin;
180   /* The line below is necessary due to the operations that modify the matrix on the CPU (axpy, scale, etc) */
181   PetscCall(MatViennaCLCopyToGPU(A));
182   if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) {
183     PetscCall(VecViennaCLGetArrayRead(xx, &xgpu));
184     PetscCall(VecViennaCLGetArrayWrite(yy, &ygpu));
185     PetscCall(PetscLogGpuTimeBegin());
186     try {
187       if (a->compressedrow.use) {
188         *ygpu = viennacl::linalg::prod(*viennaclstruct->compressed_mat, *xgpu);
189       } else {
190         *ygpu = viennacl::linalg::prod(*viennaclstruct->mat, *xgpu);
191       }
192       ViennaCLWaitForGPU();
193     } catch (std::exception const &ex) { SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what()); }
194     PetscCall(PetscLogGpuTimeEnd());
195     PetscCall(VecViennaCLRestoreArrayRead(xx, &xgpu));
196     PetscCall(VecViennaCLRestoreArrayWrite(yy, &ygpu));
197     PetscCall(PetscLogGpuFlops(2.0 * a->nz - a->nonzerorowcnt));
198   } else {
199     PetscCall(VecSet_SeqViennaCL(yy, 0));
200   }
201   PetscFunctionReturn(0);
202 }
203 
204 PetscErrorCode MatMultAdd_SeqAIJViennaCL(Mat A, Vec xx, Vec yy, Vec zz) {
205   Mat_SeqAIJ           *a              = (Mat_SeqAIJ *)A->data;
206   Mat_SeqAIJViennaCL   *viennaclstruct = (Mat_SeqAIJViennaCL *)A->spptr;
207   const ViennaCLVector *xgpu = NULL, *ygpu = NULL;
208   ViennaCLVector       *zgpu = NULL;
209 
210   PetscFunctionBegin;
211   /* The line below is necessary due to the operations that modify the matrix on the CPU (axpy, scale, etc) */
212   PetscCall(MatViennaCLCopyToGPU(A));
213   if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) {
214     try {
215       PetscCall(VecViennaCLGetArrayRead(xx, &xgpu));
216       PetscCall(VecViennaCLGetArrayRead(yy, &ygpu));
217       PetscCall(VecViennaCLGetArrayWrite(zz, &zgpu));
218       PetscCall(PetscLogGpuTimeBegin());
219       if (a->compressedrow.use) *viennaclstruct->tempvec = viennacl::linalg::prod(*viennaclstruct->compressed_mat, *xgpu);
220       else *viennaclstruct->tempvec = viennacl::linalg::prod(*viennaclstruct->mat, *xgpu);
221       if (zz != yy) *zgpu = *ygpu + *viennaclstruct->tempvec;
222       else *zgpu += *viennaclstruct->tempvec;
223       ViennaCLWaitForGPU();
224       PetscCall(PetscLogGpuTimeEnd());
225       PetscCall(VecViennaCLRestoreArrayRead(xx, &xgpu));
226       PetscCall(VecViennaCLRestoreArrayRead(yy, &ygpu));
227       PetscCall(VecViennaCLRestoreArrayWrite(zz, &zgpu));
228 
229     } catch (std::exception const &ex) { SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what()); }
230     PetscCall(PetscLogGpuFlops(2.0 * a->nz));
231   } else {
232     PetscCall(VecCopy_SeqViennaCL(yy, zz));
233   }
234   PetscFunctionReturn(0);
235 }
236 
237 PetscErrorCode MatAssemblyEnd_SeqAIJViennaCL(Mat A, MatAssemblyType mode) {
238   PetscFunctionBegin;
239   PetscCall(MatAssemblyEnd_SeqAIJ(A, mode));
240   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
241   if (!A->boundtocpu) PetscCall(MatViennaCLCopyToGPU(A));
242   PetscFunctionReturn(0);
243 }
244 
245 /* --------------------------------------------------------------------------------*/
246 /*@C
247    MatCreateSeqAIJViennaCL - Creates a sparse matrix in `MATSEQAIJVIENNACL` (compressed row) format
248    (the default parallel PETSc format).  This matrix will ultimately be pushed down
249    to GPUs and use the ViennaCL library for calculations. For good matrix
250    assembly performance the user should preallocate the matrix storage by setting
251    the parameter nz (or the array nnz).  By setting these parameters accurately,
252    performance during matrix assembly can be increased substantially.
253 
254    Collective
255 
256    Input Parameters:
257 +  comm - MPI communicator, set to `PETSC_COMM_SELF`
258 .  m - number of rows
259 .  n - number of columns
260 .  nz - number of nonzeros per row (same for all rows)
261 -  nnz - array containing the number of nonzeros in the various rows
262          (possibly different for each row) or NULL
263 
264    Output Parameter:
265 .  A - the matrix
266 
267    It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
268    MatXXXXSetPreallocation() paradigm instead of this routine directly.
269    [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
270 
271    Notes:
272    If nnz is given then nz is ignored
273 
274    The AIJ format, also called
275    compressed row storage, is fully compatible with standard Fortran 77
276    storage.  That is, the stored row and column indices can begin at
277    either one (as in Fortran) or zero.  See the users' manual for details.
278 
279    Specify the preallocated storage with either nz or nnz (not both).
280    Set nz = `PETSC_DEFAULT` and nnz = NULL for PETSc to control dynamic memory
281    allocation.  For large problems you MUST preallocate memory or you
282    will get TERRIBLE performance, see the users' manual chapter on matrices.
283 
284    Level: intermediate
285 
286 .seealso: `MATSEQAIJVIENNACL`, `MatCreate()`, `MatCreateAIJ()`, `MatCreateAIJCUSPARSE()`, `MatSetValues()`, `MatSeqAIJSetColumnIndices()`, `MatCreateSeqAIJWithArrays()`, `MatCreateAIJ()`
287 @*/
288 PetscErrorCode MatCreateSeqAIJViennaCL(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A) {
289   PetscFunctionBegin;
290   PetscCall(MatCreate(comm, A));
291   PetscCall(MatSetSizes(*A, m, n, m, n));
292   PetscCall(MatSetType(*A, MATSEQAIJVIENNACL));
293   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(*A, nz, (PetscInt *)nnz));
294   PetscFunctionReturn(0);
295 }
296 
297 PetscErrorCode MatDestroy_SeqAIJViennaCL(Mat A) {
298   Mat_SeqAIJViennaCL *viennaclcontainer = (Mat_SeqAIJViennaCL *)A->spptr;
299 
300   PetscFunctionBegin;
301   try {
302     if (viennaclcontainer) {
303       delete viennaclcontainer->tempvec;
304       delete viennaclcontainer->mat;
305       delete viennaclcontainer->compressed_mat;
306       delete viennaclcontainer;
307     }
308     A->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
309   } catch (std::exception const &ex) { SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what()); }
310 
311   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqaij_seqaijviennacl_C", NULL));
312   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaijviennacl_seqdense_C", NULL));
313   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaijviennacl_seqaij_C", NULL));
314 
315   /* this next line is because MatDestroy tries to PetscFree spptr if it is not zero, and PetscFree only works if the memory was allocated with PetscNew or PetscMalloc, which don't call the constructor */
316   A->spptr = 0;
317   PetscCall(MatDestroy_SeqAIJ(A));
318   PetscFunctionReturn(0);
319 }
320 
321 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJViennaCL(Mat B) {
322   PetscFunctionBegin;
323   PetscCall(MatCreate_SeqAIJ(B));
324   PetscCall(MatConvert_SeqAIJ_SeqAIJViennaCL(B, MATSEQAIJVIENNACL, MAT_INPLACE_MATRIX, &B));
325   PetscFunctionReturn(0);
326 }
327 
328 static PetscErrorCode MatBindToCPU_SeqAIJViennaCL(Mat, PetscBool);
329 static PetscErrorCode MatDuplicate_SeqAIJViennaCL(Mat A, MatDuplicateOption cpvalues, Mat *B) {
330   Mat C;
331 
332   PetscFunctionBegin;
333   PetscCall(MatDuplicate_SeqAIJ(A, cpvalues, B));
334   C = *B;
335 
336   PetscCall(MatBindToCPU_SeqAIJViennaCL(A, PETSC_FALSE));
337   C->ops->bindtocpu = MatBindToCPU_SeqAIJViennaCL;
338 
339   C->spptr                                         = new Mat_SeqAIJViennaCL();
340   ((Mat_SeqAIJViennaCL *)C->spptr)->tempvec        = NULL;
341   ((Mat_SeqAIJViennaCL *)C->spptr)->mat            = NULL;
342   ((Mat_SeqAIJViennaCL *)C->spptr)->compressed_mat = NULL;
343 
344   PetscCall(PetscObjectChangeTypeName((PetscObject)C, MATSEQAIJVIENNACL));
345 
346   C->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
347 
348   /* If the source matrix is already assembled, copy the destination matrix to the GPU */
349   if (C->assembled) PetscCall(MatViennaCLCopyToGPU(C));
350   PetscFunctionReturn(0);
351 }
352 
353 static PetscErrorCode MatSeqAIJGetArray_SeqAIJViennaCL(Mat A, PetscScalar *array[]) {
354   PetscFunctionBegin;
355   PetscCall(MatViennaCLCopyFromGPU(A, (const ViennaCLAIJMatrix *)NULL));
356   *array = ((Mat_SeqAIJ *)A->data)->a;
357   PetscFunctionReturn(0);
358 }
359 
360 static PetscErrorCode MatSeqAIJRestoreArray_SeqAIJViennaCL(Mat A, PetscScalar *array[]) {
361   PetscFunctionBegin;
362   A->offloadmask = PETSC_OFFLOAD_CPU;
363   *array         = NULL;
364   PetscFunctionReturn(0);
365 }
366 
367 static PetscErrorCode MatSeqAIJGetArrayRead_SeqAIJViennaCL(Mat A, const PetscScalar *array[]) {
368   PetscFunctionBegin;
369   PetscCall(MatViennaCLCopyFromGPU(A, (const ViennaCLAIJMatrix *)NULL));
370   *array = ((Mat_SeqAIJ *)A->data)->a;
371   PetscFunctionReturn(0);
372 }
373 
374 static PetscErrorCode MatSeqAIJRestoreArrayRead_SeqAIJViennaCL(Mat A, const PetscScalar *array[]) {
375   PetscFunctionBegin;
376   *array = NULL;
377   /* No A->offloadmask = PETSC_OFFLOAD_CPU since if A->offloadmask was PETSC_OFFLOAD_BOTH, it is still BOTH */
378   PetscFunctionReturn(0);
379 }
380 
381 static PetscErrorCode MatSeqAIJGetArrayWrite_SeqAIJViennaCL(Mat A, PetscScalar *array[]) {
382   PetscFunctionBegin;
383   *array = ((Mat_SeqAIJ *)A->data)->a;
384   PetscFunctionReturn(0);
385 }
386 
387 static PetscErrorCode MatSeqAIJRestoreArrayWrite_SeqAIJViennaCL(Mat A, PetscScalar *array[]) {
388   PetscFunctionBegin;
389   A->offloadmask = PETSC_OFFLOAD_CPU;
390   *array         = NULL;
391   PetscFunctionReturn(0);
392 }
393 
394 static PetscErrorCode MatBindToCPU_SeqAIJViennaCL(Mat A, PetscBool flg) {
395   Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
396 
397   PetscFunctionBegin;
398   A->boundtocpu = flg;
399   if (flg && a->inode.size) {
400     a->inode.use = PETSC_TRUE;
401   } else {
402     a->inode.use = PETSC_FALSE;
403   }
404   if (flg) {
405     /* make sure we have an up-to-date copy on the CPU */
406     PetscCall(MatViennaCLCopyFromGPU(A, (const ViennaCLAIJMatrix *)NULL));
407     A->ops->mult        = MatMult_SeqAIJ;
408     A->ops->multadd     = MatMultAdd_SeqAIJ;
409     A->ops->assemblyend = MatAssemblyEnd_SeqAIJ;
410     A->ops->duplicate   = MatDuplicate_SeqAIJ;
411     PetscCall(PetscMemzero(a->ops, sizeof(Mat_SeqAIJOps)));
412   } else {
413     A->ops->mult        = MatMult_SeqAIJViennaCL;
414     A->ops->multadd     = MatMultAdd_SeqAIJViennaCL;
415     A->ops->assemblyend = MatAssemblyEnd_SeqAIJViennaCL;
416     A->ops->destroy     = MatDestroy_SeqAIJViennaCL;
417     A->ops->duplicate   = MatDuplicate_SeqAIJViennaCL;
418 
419     a->ops->getarray          = MatSeqAIJGetArray_SeqAIJViennaCL;
420     a->ops->restorearray      = MatSeqAIJRestoreArray_SeqAIJViennaCL;
421     a->ops->getarrayread      = MatSeqAIJGetArrayRead_SeqAIJViennaCL;
422     a->ops->restorearrayread  = MatSeqAIJRestoreArrayRead_SeqAIJViennaCL;
423     a->ops->getarraywrite     = MatSeqAIJGetArrayWrite_SeqAIJViennaCL;
424     a->ops->restorearraywrite = MatSeqAIJRestoreArrayWrite_SeqAIJViennaCL;
425   }
426   PetscFunctionReturn(0);
427 }
428 
429 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJViennaCL(Mat A, MatType type, MatReuse reuse, Mat *newmat) {
430   Mat B;
431 
432   PetscFunctionBegin;
433   PetscCheck(reuse != MAT_REUSE_MATRIX, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MAT_REUSE_MATRIX is not supported. Consider using MAT_INPLACE_MATRIX instead");
434 
435   if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, newmat));
436 
437   B = *newmat;
438 
439   B->spptr = new Mat_SeqAIJViennaCL();
440 
441   ((Mat_SeqAIJViennaCL *)B->spptr)->tempvec        = NULL;
442   ((Mat_SeqAIJViennaCL *)B->spptr)->mat            = NULL;
443   ((Mat_SeqAIJViennaCL *)B->spptr)->compressed_mat = NULL;
444 
445   PetscCall(MatBindToCPU_SeqAIJViennaCL(A, PETSC_FALSE));
446   A->ops->bindtocpu = MatBindToCPU_SeqAIJViennaCL;
447 
448   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJVIENNACL));
449   PetscCall(PetscFree(B->defaultvectype));
450   PetscCall(PetscStrallocpy(VECVIENNACL, &B->defaultvectype));
451 
452   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqaij_seqaijviennacl_C", MatConvert_SeqAIJ_SeqAIJViennaCL));
453   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaijviennacl_seqdense_C", MatProductSetFromOptions_SeqAIJ_SeqDense));
454   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaijviennacl_seqaij_C", MatProductSetFromOptions_SeqAIJ));
455 
456   B->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
457 
458   /* If the source matrix is already assembled, copy the destination matrix to the GPU */
459   if (B->assembled) PetscCall(MatViennaCLCopyToGPU(B));
460   PetscFunctionReturn(0);
461 }
462 
463 /*MC
464    MATSEQAIJVIENNACL - MATAIJVIENNACL = "aijviennacl" = "seqaijviennacl" - A matrix type to be used for sparse matrices.
465 
466    A matrix type type whose data resides on GPUs. These matrices are in CSR format by
467    default. All matrix calculations are performed using the ViennaCL library.
468 
469    Options Database Keys:
470 +  -mat_type aijviennacl - sets the matrix type to `MATSEQAIJVIENNACL` during a call to `MatSetFromOptions()
471 .  -mat_viennacl_storage_format csr - sets the storage format of matrices for `MatMult()` during a call to `MatSetFromOptions()`.
472 -  -mat_viennacl_mult_storage_format csr - sets the storage format of matrices for `MatMult()` during a call to `MatSetFromOptions()`.
473 
474   Level: beginner
475 
476 .seealso: `MatCreateSeqAIJViennaCL()`, `MATAIJVIENNACL`, `MatCreateAIJViennaCL()`
477 M*/
478 
479 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ViennaCL(void) {
480   PetscFunctionBegin;
481   PetscCall(MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_LU, MatGetFactor_seqaij_petsc));
482   PetscCall(MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_CHOLESKY, MatGetFactor_seqaij_petsc));
483   PetscCall(MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_ILU, MatGetFactor_seqaij_petsc));
484   PetscCall(MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_ICC, MatGetFactor_seqaij_petsc));
485   PetscFunctionReturn(0);
486 }
487