xref: /petsc/src/mat/impls/aij/seq/seqviennacl/aijviennacl.cxx (revision 4ffacfe27a72f4cdf51b68a3bbb6aed96040fb2f)
1 
2 /*
3     Defines the basic matrix operations for the AIJ (compressed row)
4   matrix storage format.
5 */
6 
7 #include <petscconf.h>
8 #define PETSC_SKIP_IMMINTRIN_H_CUDAWORKAROUND 1
9 #include <../src/mat/impls/aij/seq/aij.h>          /*I "petscmat.h" I*/
10 #include <petscbt.h>
11 #include <../src/vec/vec/impls/dvecimpl.h>
12 #include <petsc/private/vecimpl.h>
13 
14 #include <../src/mat/impls/aij/seq/seqviennacl/viennaclmatimpl.h>
15 
16 #include <algorithm>
17 #include <vector>
18 #include <string>
19 
20 #include "viennacl/linalg/prod.hpp"
21 
22 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJViennaCL(Mat A, MatType type, MatReuse reuse, Mat *newmat);
23 PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat,MatFactorType,Mat*);
24 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqDense(Mat);
25 
26 PetscErrorCode MatViennaCLCopyToGPU(Mat A)
27 {
28   Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr;
29   Mat_SeqAIJ         *a              = (Mat_SeqAIJ*)A->data;
30 
31   PetscFunctionBegin;
32   if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) { //some OpenCL SDKs have issues with buffers of size 0
33     if (A->offloadmask == PETSC_OFFLOAD_UNALLOCATED || A->offloadmask == PETSC_OFFLOAD_CPU) {
34       PetscCall(PetscLogEventBegin(MAT_ViennaCLCopyToGPU,A,0,0,0));
35 
36       try {
37         if (a->compressedrow.use) {
38           if (!viennaclstruct->compressed_mat) viennaclstruct->compressed_mat = new ViennaCLCompressedAIJMatrix();
39 
40           // Since PetscInt is different from cl_uint, we have to convert:
41           viennacl::backend::mem_handle dummy;
42 
43           viennacl::backend::typesafe_host_array<unsigned int> row_buffer; row_buffer.raw_resize(dummy, a->compressedrow.nrows+1);
44           for (PetscInt i=0; i<=a->compressedrow.nrows; ++i)
45             row_buffer.set(i, (a->compressedrow.i)[i]);
46 
47           viennacl::backend::typesafe_host_array<unsigned int> row_indices; row_indices.raw_resize(dummy, a->compressedrow.nrows);
48           for (PetscInt i=0; i<a->compressedrow.nrows; ++i)
49             row_indices.set(i, (a->compressedrow.rindex)[i]);
50 
51           viennacl::backend::typesafe_host_array<unsigned int> col_buffer; col_buffer.raw_resize(dummy, a->nz);
52           for (PetscInt i=0; i<a->nz; ++i)
53             col_buffer.set(i, (a->j)[i]);
54 
55           viennaclstruct->compressed_mat->set(row_buffer.get(), row_indices.get(), col_buffer.get(), a->a, A->rmap->n, A->cmap->n, a->compressedrow.nrows, a->nz);
56           PetscCall(PetscLogCpuToGpu(((2*a->compressedrow.nrows)+1+a->nz)*sizeof(PetscInt) + (a->nz)*sizeof(PetscScalar)));
57         } else {
58           if (!viennaclstruct->mat) viennaclstruct->mat = new ViennaCLAIJMatrix();
59 
60           // Since PetscInt is in general different from cl_uint, we have to convert:
61           viennacl::backend::mem_handle dummy;
62 
63           viennacl::backend::typesafe_host_array<unsigned int> row_buffer; row_buffer.raw_resize(dummy, A->rmap->n+1);
64           for (PetscInt i=0; i<=A->rmap->n; ++i)
65             row_buffer.set(i, (a->i)[i]);
66 
67           viennacl::backend::typesafe_host_array<unsigned int> col_buffer; col_buffer.raw_resize(dummy, a->nz);
68           for (PetscInt i=0; i<a->nz; ++i)
69             col_buffer.set(i, (a->j)[i]);
70 
71           viennaclstruct->mat->set(row_buffer.get(), col_buffer.get(), a->a, A->rmap->n, A->cmap->n, a->nz);
72           PetscCall(PetscLogCpuToGpu(((A->rmap->n+1)+a->nz)*sizeof(PetscInt)+(a->nz)*sizeof(PetscScalar)));
73         }
74         ViennaCLWaitForGPU();
75       } catch(std::exception const & ex) {
76         SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
77       }
78 
79       // Create temporary vector for v += A*x:
80       if (viennaclstruct->tempvec) {
81         if (viennaclstruct->tempvec->size() != static_cast<std::size_t>(A->rmap->n)) {
82           delete (ViennaCLVector*)viennaclstruct->tempvec;
83           viennaclstruct->tempvec = new ViennaCLVector(A->rmap->n);
84         } else {
85           viennaclstruct->tempvec->clear();
86         }
87       } else {
88         viennaclstruct->tempvec = new ViennaCLVector(A->rmap->n);
89       }
90 
91       A->offloadmask = PETSC_OFFLOAD_BOTH;
92 
93       PetscCall(PetscLogEventEnd(MAT_ViennaCLCopyToGPU,A,0,0,0));
94     }
95   }
96   PetscFunctionReturn(0);
97 }
98 
99 PetscErrorCode MatViennaCLCopyFromGPU(Mat A, const ViennaCLAIJMatrix *Agpu)
100 {
101   Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr;
102   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
103   PetscInt           m  = A->rmap->n;
104 
105   PetscFunctionBegin;
106   if (A->offloadmask == PETSC_OFFLOAD_BOTH) PetscFunctionReturn(0);
107   if (A->offloadmask == PETSC_OFFLOAD_UNALLOCATED && Agpu) {
108     try {
109       PetscCheck(!a->compressedrow.use,PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ViennaCL: Cannot handle row compression for GPU matrices");
110       PetscCheck((PetscInt)Agpu->size1() == m,PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "GPU matrix has %lu rows, should be %" PetscInt_FMT, Agpu->size1(), m);
111       a->nz           = Agpu->nnz();
112       a->maxnz        = a->nz; /* Since we allocate exactly the right amount */
113       A->preallocated = PETSC_TRUE;
114       if (a->singlemalloc) {
115         if (a->a) PetscCall(PetscFree3(a->a,a->j,a->i));
116       } else {
117         if (a->i) PetscCall(PetscFree(a->i));
118         if (a->j) PetscCall(PetscFree(a->j));
119         if (a->a) PetscCall(PetscFree(a->a));
120       }
121       PetscCall(PetscMalloc3(a->nz,&a->a,a->nz,&a->j,m+1,&a->i));
122       PetscCall(PetscLogObjectMemory((PetscObject)A, a->nz*(sizeof(PetscScalar)+sizeof(PetscInt))+(m+1)*sizeof(PetscInt)));
123 
124       a->singlemalloc = PETSC_TRUE;
125 
126       /* Setup row lengths */
127       PetscCall(PetscFree(a->imax));
128       PetscCall(PetscFree(a->ilen));
129       PetscCall(PetscMalloc1(m,&a->imax));
130       PetscCall(PetscMalloc1(m,&a->ilen));
131       PetscCall(PetscLogObjectMemory((PetscObject)A, 2*m*sizeof(PetscInt)));
132 
133       /* Copy data back from GPU */
134       viennacl::backend::typesafe_host_array<unsigned int> row_buffer; row_buffer.raw_resize(Agpu->handle1(), Agpu->size1() + 1);
135 
136       // copy row array
137       viennacl::backend::memory_read(Agpu->handle1(), 0, row_buffer.raw_size(), row_buffer.get());
138       (a->i)[0] = row_buffer[0];
139       for (PetscInt i = 0; i < (PetscInt)Agpu->size1(); ++i) {
140         (a->i)[i+1] = row_buffer[i+1];
141         a->imax[i]  = a->ilen[i] = a->i[i+1] - a->i[i];  //Set imax[] and ilen[] arrays at the same time as i[] for better cache reuse
142       }
143 
144       // copy column indices
145       viennacl::backend::typesafe_host_array<unsigned int> col_buffer; col_buffer.raw_resize(Agpu->handle2(), Agpu->nnz());
146       viennacl::backend::memory_read(Agpu->handle2(), 0, col_buffer.raw_size(), col_buffer.get());
147       for (PetscInt i=0; i < (PetscInt)Agpu->nnz(); ++i)
148         (a->j)[i] = col_buffer[i];
149 
150       // copy nonzero entries directly to destination (no conversion required)
151       viennacl::backend::memory_read(Agpu->handle(), 0, sizeof(PetscScalar)*Agpu->nnz(), a->a);
152 
153       PetscCall(PetscLogGpuToCpu(row_buffer.raw_size()+col_buffer.raw_size()+(Agpu->nnz()*sizeof(PetscScalar))));
154       ViennaCLWaitForGPU();
155       /* TODO: Once a->diag is moved out of MatAssemblyEnd(), invalidate it here. */
156     } catch(std::exception const & ex) {
157       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
158     }
159   } else if (A->offloadmask == PETSC_OFFLOAD_UNALLOCATED) {
160     PetscFunctionReturn(0);
161   } else {
162     if (!Agpu && A->offloadmask != PETSC_OFFLOAD_GPU) PetscFunctionReturn(0);
163 
164     PetscCheck(!a->compressedrow.use,PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ViennaCL: Cannot handle row compression for GPU matrices");
165     if (!Agpu) {
166       viennacl::backend::memory_read(viennaclstruct->mat->handle(), 0, sizeof(PetscScalar)*viennaclstruct->mat->nnz(), a->a);
167     } else {
168       viennacl::backend::memory_read(Agpu->handle(), 0, sizeof(PetscScalar)*Agpu->nnz(), a->a);
169     }
170   }
171   A->offloadmask = PETSC_OFFLOAD_BOTH;
172   /* This assembly prevents resetting the flag to PETSC_OFFLOAD_CPU and recopying */
173   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
174   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
175   PetscFunctionReturn(0);
176 }
177 
178 PetscErrorCode MatMult_SeqAIJViennaCL(Mat A,Vec xx,Vec yy)
179 {
180   Mat_SeqAIJ           *a = (Mat_SeqAIJ*)A->data;
181   Mat_SeqAIJViennaCL   *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr;
182   const ViennaCLVector *xgpu=NULL;
183   ViennaCLVector       *ygpu=NULL;
184 
185   PetscFunctionBegin;
186   /* The line below is necessary due to the operations that modify the matrix on the CPU (axpy, scale, etc) */
187   PetscCall(MatViennaCLCopyToGPU(A));
188   if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) {
189     PetscCall(VecViennaCLGetArrayRead(xx,&xgpu));
190     PetscCall(VecViennaCLGetArrayWrite(yy,&ygpu));
191     PetscCall(PetscLogGpuTimeBegin());
192     try {
193       if (a->compressedrow.use) {
194         *ygpu = viennacl::linalg::prod(*viennaclstruct->compressed_mat, *xgpu);
195       } else {
196         *ygpu = viennacl::linalg::prod(*viennaclstruct->mat,*xgpu);
197       }
198       ViennaCLWaitForGPU();
199     } catch (std::exception const & ex) {
200       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
201     }
202     PetscCall(PetscLogGpuTimeEnd());
203     PetscCall(VecViennaCLRestoreArrayRead(xx,&xgpu));
204     PetscCall(VecViennaCLRestoreArrayWrite(yy,&ygpu));
205     PetscCall(PetscLogGpuFlops(2.0*a->nz - a->nonzerorowcnt));
206   } else {
207     PetscCall(VecSet_SeqViennaCL(yy,0));
208   }
209   PetscFunctionReturn(0);
210 }
211 
212 PetscErrorCode MatMultAdd_SeqAIJViennaCL(Mat A,Vec xx,Vec yy,Vec zz)
213 {
214   Mat_SeqAIJ           *a = (Mat_SeqAIJ*)A->data;
215   Mat_SeqAIJViennaCL   *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr;
216   const ViennaCLVector *xgpu=NULL,*ygpu=NULL;
217   ViennaCLVector       *zgpu=NULL;
218 
219   PetscFunctionBegin;
220   /* The line below is necessary due to the operations that modify the matrix on the CPU (axpy, scale, etc) */
221   PetscCall(MatViennaCLCopyToGPU(A));
222   if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) {
223     try {
224       PetscCall(VecViennaCLGetArrayRead(xx,&xgpu));
225       PetscCall(VecViennaCLGetArrayRead(yy,&ygpu));
226       PetscCall(VecViennaCLGetArrayWrite(zz,&zgpu));
227       PetscCall(PetscLogGpuTimeBegin());
228       if (a->compressedrow.use) *viennaclstruct->tempvec = viennacl::linalg::prod(*viennaclstruct->compressed_mat, *xgpu);
229       else *viennaclstruct->tempvec = viennacl::linalg::prod(*viennaclstruct->mat, *xgpu);
230       if (zz != yy) *zgpu = *ygpu + *viennaclstruct->tempvec;
231       else *zgpu += *viennaclstruct->tempvec;
232       ViennaCLWaitForGPU();
233       PetscCall(PetscLogGpuTimeEnd());
234       PetscCall(VecViennaCLRestoreArrayRead(xx,&xgpu));
235       PetscCall(VecViennaCLRestoreArrayRead(yy,&ygpu));
236       PetscCall(VecViennaCLRestoreArrayWrite(zz,&zgpu));
237 
238     } catch(std::exception const & ex) {
239       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
240     }
241     PetscCall(PetscLogGpuFlops(2.0*a->nz));
242   } else {
243     PetscCall(VecCopy_SeqViennaCL(yy,zz));
244   }
245   PetscFunctionReturn(0);
246 }
247 
248 PetscErrorCode MatAssemblyEnd_SeqAIJViennaCL(Mat A,MatAssemblyType mode)
249 {
250   PetscFunctionBegin;
251   PetscCall(MatAssemblyEnd_SeqAIJ(A,mode));
252   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
253   if (!A->boundtocpu) PetscCall(MatViennaCLCopyToGPU(A));
254   PetscFunctionReturn(0);
255 }
256 
257 /* --------------------------------------------------------------------------------*/
258 /*@C
259    MatCreateSeqAIJViennaCL - Creates a sparse matrix in AIJ (compressed row) format
260    (the default parallel PETSc format).  This matrix will ultimately be pushed down
261    to GPUs and use the ViennaCL library for calculations. For good matrix
262    assembly performance the user should preallocate the matrix storage by setting
263    the parameter nz (or the array nnz).  By setting these parameters accurately,
264    performance during matrix assembly can be increased substantially.
265 
266    Collective
267 
268    Input Parameters:
269 +  comm - MPI communicator, set to PETSC_COMM_SELF
270 .  m - number of rows
271 .  n - number of columns
272 .  nz - number of nonzeros per row (same for all rows)
273 -  nnz - array containing the number of nonzeros in the various rows
274          (possibly different for each row) or NULL
275 
276    Output Parameter:
277 .  A - the matrix
278 
279    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
280    MatXXXXSetPreallocation() paradigm instead of this routine directly.
281    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
282 
283    Notes:
284    If nnz is given then nz is ignored
285 
286    The AIJ format (also called the Yale sparse matrix format or
287    compressed row storage), is fully compatible with standard Fortran 77
288    storage.  That is, the stored row and column indices can begin at
289    either one (as in Fortran) or zero.  See the users' manual for details.
290 
291    Specify the preallocated storage with either nz or nnz (not both).
292    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
293    allocation.  For large problems you MUST preallocate memory or you
294    will get TERRIBLE performance, see the users' manual chapter on matrices.
295 
296    Level: intermediate
297 
298 .seealso: `MatCreate()`, `MatCreateAIJ()`, `MatCreateAIJCUSPARSE()`, `MatSetValues()`, `MatSeqAIJSetColumnIndices()`, `MatCreateSeqAIJWithArrays()`, `MatCreateAIJ()`
299 
300 @*/
301 PetscErrorCode  MatCreateSeqAIJViennaCL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
302 {
303   PetscFunctionBegin;
304   PetscCall(MatCreate(comm,A));
305   PetscCall(MatSetSizes(*A,m,n,m,n));
306   PetscCall(MatSetType(*A,MATSEQAIJVIENNACL));
307   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,(PetscInt*)nnz));
308   PetscFunctionReturn(0);
309 }
310 
311 PetscErrorCode MatDestroy_SeqAIJViennaCL(Mat A)
312 {
313   Mat_SeqAIJViennaCL *viennaclcontainer = (Mat_SeqAIJViennaCL*)A->spptr;
314 
315   PetscFunctionBegin;
316   try {
317     if (viennaclcontainer) {
318       delete viennaclcontainer->tempvec;
319       delete viennaclcontainer->mat;
320       delete viennaclcontainer->compressed_mat;
321       delete viennaclcontainer;
322     }
323     A->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
324   } catch(std::exception const & ex) {
325     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
326   }
327 
328   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqaij_seqaijviennacl_C",NULL));
329   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_seqaijviennacl_seqdense_C",NULL));
330   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_seqaijviennacl_seqaij_C",NULL));
331 
332   /* this next line is because MatDestroy tries to PetscFree spptr if it is not zero, and PetscFree only works if the memory was allocated with PetscNew or PetscMalloc, which don't call the constructor */
333   A->spptr = 0;
334   PetscCall(MatDestroy_SeqAIJ(A));
335   PetscFunctionReturn(0);
336 }
337 
338 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJViennaCL(Mat B)
339 {
340   PetscFunctionBegin;
341   PetscCall(MatCreate_SeqAIJ(B));
342   PetscCall(MatConvert_SeqAIJ_SeqAIJViennaCL(B,MATSEQAIJVIENNACL,MAT_INPLACE_MATRIX,&B));
343   PetscFunctionReturn(0);
344 }
345 
346 static PetscErrorCode MatBindToCPU_SeqAIJViennaCL(Mat,PetscBool);
347 static PetscErrorCode MatDuplicate_SeqAIJViennaCL(Mat A,MatDuplicateOption cpvalues,Mat *B)
348 {
349   Mat C;
350 
351   PetscFunctionBegin;
352   PetscCall(MatDuplicate_SeqAIJ(A,cpvalues,B));
353   C = *B;
354 
355   PetscCall(MatBindToCPU_SeqAIJViennaCL(A,PETSC_FALSE));
356   C->ops->bindtocpu = MatBindToCPU_SeqAIJViennaCL;
357 
358   C->spptr = new Mat_SeqAIJViennaCL();
359   ((Mat_SeqAIJViennaCL*)C->spptr)->tempvec        = NULL;
360   ((Mat_SeqAIJViennaCL*)C->spptr)->mat            = NULL;
361   ((Mat_SeqAIJViennaCL*)C->spptr)->compressed_mat = NULL;
362 
363   PetscCall(PetscObjectChangeTypeName((PetscObject)C,MATSEQAIJVIENNACL));
364 
365   C->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
366 
367   /* If the source matrix is already assembled, copy the destination matrix to the GPU */
368   if (C->assembled) PetscCall(MatViennaCLCopyToGPU(C));
369   PetscFunctionReturn(0);
370 }
371 
372 static PetscErrorCode MatSeqAIJGetArray_SeqAIJViennaCL(Mat A,PetscScalar *array[])
373 {
374   PetscFunctionBegin;
375   PetscCall(MatViennaCLCopyFromGPU(A,(const ViennaCLAIJMatrix *)NULL));
376   *array = ((Mat_SeqAIJ*)A->data)->a;
377   PetscFunctionReturn(0);
378 }
379 
380 static PetscErrorCode MatSeqAIJRestoreArray_SeqAIJViennaCL(Mat A,PetscScalar *array[])
381 {
382   PetscFunctionBegin;
383   A->offloadmask = PETSC_OFFLOAD_CPU;
384   *array         = NULL;
385   PetscFunctionReturn(0);
386 }
387 
388 static PetscErrorCode MatSeqAIJGetArrayRead_SeqAIJViennaCL(Mat A,const PetscScalar *array[])
389 {
390   PetscFunctionBegin;
391   PetscCall(MatViennaCLCopyFromGPU(A,(const ViennaCLAIJMatrix *)NULL));
392   *array = ((Mat_SeqAIJ*)A->data)->a;
393   PetscFunctionReturn(0);
394 }
395 
396 static PetscErrorCode MatSeqAIJRestoreArrayRead_SeqAIJViennaCL(Mat A,const PetscScalar *array[])
397 {
398   PetscFunctionBegin;
399   *array = NULL;
400   /* No A->offloadmask = PETSC_OFFLOAD_CPU since if A->offloadmask was PETSC_OFFLOAD_BOTH, it is still BOTH */
401   PetscFunctionReturn(0);
402 }
403 
404 static PetscErrorCode MatSeqAIJGetArrayWrite_SeqAIJViennaCL(Mat A,PetscScalar *array[])
405 {
406   PetscFunctionBegin;
407   *array = ((Mat_SeqAIJ*)A->data)->a;
408   PetscFunctionReturn(0);
409 }
410 
411 static PetscErrorCode MatSeqAIJRestoreArrayWrite_SeqAIJViennaCL(Mat A,PetscScalar *array[])
412 {
413   PetscFunctionBegin;
414   A->offloadmask = PETSC_OFFLOAD_CPU;
415   *array         = NULL;
416   PetscFunctionReturn(0);
417 }
418 
419 static PetscErrorCode MatBindToCPU_SeqAIJViennaCL(Mat A,PetscBool flg)
420 {
421   Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
422 
423   PetscFunctionBegin;
424   A->boundtocpu  = flg;
425   if (flg && a->inode.size) {
426     a->inode.use = PETSC_TRUE;
427   } else {
428     a->inode.use = PETSC_FALSE;
429   }
430   if (flg) {
431     /* make sure we have an up-to-date copy on the CPU */
432     PetscCall(MatViennaCLCopyFromGPU(A,(const ViennaCLAIJMatrix *)NULL));
433     A->ops->mult        = MatMult_SeqAIJ;
434     A->ops->multadd     = MatMultAdd_SeqAIJ;
435     A->ops->assemblyend = MatAssemblyEnd_SeqAIJ;
436     A->ops->duplicate   = MatDuplicate_SeqAIJ;
437     PetscCall(PetscMemzero(a->ops,sizeof(Mat_SeqAIJOps)));
438   } else {
439     A->ops->mult        = MatMult_SeqAIJViennaCL;
440     A->ops->multadd     = MatMultAdd_SeqAIJViennaCL;
441     A->ops->assemblyend = MatAssemblyEnd_SeqAIJViennaCL;
442     A->ops->destroy     = MatDestroy_SeqAIJViennaCL;
443     A->ops->duplicate   = MatDuplicate_SeqAIJViennaCL;
444 
445     a->ops->getarray           = MatSeqAIJGetArray_SeqAIJViennaCL;
446     a->ops->restorearray       = MatSeqAIJRestoreArray_SeqAIJViennaCL;
447     a->ops->getarrayread       = MatSeqAIJGetArrayRead_SeqAIJViennaCL;
448     a->ops->restorearrayread   = MatSeqAIJRestoreArrayRead_SeqAIJViennaCL;
449     a->ops->getarraywrite      = MatSeqAIJGetArrayWrite_SeqAIJViennaCL;
450     a->ops->restorearraywrite  = MatSeqAIJRestoreArrayWrite_SeqAIJViennaCL;
451   }
452   PetscFunctionReturn(0);
453 }
454 
455 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJViennaCL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
456 {
457   Mat B;
458 
459   PetscFunctionBegin;
460   PetscCheck(reuse != MAT_REUSE_MATRIX,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MAT_REUSE_MATRIX is not supported. Consider using MAT_INPLACE_MATRIX instead");
461 
462   if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A,MAT_COPY_VALUES,newmat));
463 
464   B = *newmat;
465 
466   B->spptr = new Mat_SeqAIJViennaCL();
467 
468   ((Mat_SeqAIJViennaCL*)B->spptr)->tempvec        = NULL;
469   ((Mat_SeqAIJViennaCL*)B->spptr)->mat            = NULL;
470   ((Mat_SeqAIJViennaCL*)B->spptr)->compressed_mat = NULL;
471 
472   PetscCall(MatBindToCPU_SeqAIJViennaCL(A,PETSC_FALSE));
473   A->ops->bindtocpu = MatBindToCPU_SeqAIJViennaCL;
474 
475   PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJVIENNACL));
476   PetscCall(PetscFree(B->defaultvectype));
477   PetscCall(PetscStrallocpy(VECVIENNACL,&B->defaultvectype));
478 
479   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqaij_seqaijviennacl_C",MatConvert_SeqAIJ_SeqAIJViennaCL));
480   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_seqaijviennacl_seqdense_C",MatProductSetFromOptions_SeqAIJ_SeqDense));
481   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_seqaijviennacl_seqaij_C",MatProductSetFromOptions_SeqAIJ));
482 
483   B->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
484 
485   /* If the source matrix is already assembled, copy the destination matrix to the GPU */
486   if (B->assembled) PetscCall(MatViennaCLCopyToGPU(B));
487   PetscFunctionReturn(0);
488 }
489 
490 /*MC
491    MATSEQAIJVIENNACL - MATAIJVIENNACL = "aijviennacl" = "seqaijviennacl" - A matrix type to be used for sparse matrices.
492 
493    A matrix type type whose data resides on GPUs. These matrices are in CSR format by
494    default. All matrix calculations are performed using the ViennaCL library.
495 
496    Options Database Keys:
497 +  -mat_type aijviennacl - sets the matrix type to "seqaijviennacl" during a call to MatSetFromOptions()
498 .  -mat_viennacl_storage_format csr - sets the storage format of matrices for MatMult during a call to MatSetFromOptions().
499 -  -mat_viennacl_mult_storage_format csr - sets the storage format of matrices for MatMult during a call to MatSetFromOptions().
500 
501   Level: beginner
502 
503 .seealso: `MatCreateSeqAIJViennaCL()`, `MATAIJVIENNACL`, `MatCreateAIJViennaCL()`
504 M*/
505 
506 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ViennaCL(void)
507 {
508   PetscFunctionBegin;
509   PetscCall(MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_LU,MatGetFactor_seqaij_petsc));
510   PetscCall(MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_CHOLESKY,MatGetFactor_seqaij_petsc));
511   PetscCall(MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_ILU,MatGetFactor_seqaij_petsc));
512   PetscCall(MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_ICC,MatGetFactor_seqaij_petsc));
513   PetscFunctionReturn(0);
514 }
515