xref: /petsc/src/mat/impls/aij/seq/seqviennacl/aijviennacl.cxx (revision 503c0ea9b45bcfbcebbb1ea5341243bbc69f0bea)
1 
2 /*
3     Defines the basic matrix operations for the AIJ (compressed row)
4   matrix storage format.
5 */
6 
7 #include <petscconf.h>
8 #define PETSC_SKIP_IMMINTRIN_H_CUDAWORKAROUND 1
9 #include <../src/mat/impls/aij/seq/aij.h>          /*I "petscmat.h" I*/
10 #include <petscbt.h>
11 #include <../src/vec/vec/impls/dvecimpl.h>
12 #include <petsc/private/vecimpl.h>
13 
14 #include <../src/mat/impls/aij/seq/seqviennacl/viennaclmatimpl.h>
15 
16 #include <algorithm>
17 #include <vector>
18 #include <string>
19 
20 #include "viennacl/linalg/prod.hpp"
21 
22 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJViennaCL(Mat A, MatType type, MatReuse reuse, Mat *newmat);
23 PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat,MatFactorType,Mat*);
24 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqDense(Mat);
25 
26 PetscErrorCode MatViennaCLCopyToGPU(Mat A)
27 {
28   Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr;
29   Mat_SeqAIJ         *a              = (Mat_SeqAIJ*)A->data;
30 
31   PetscFunctionBegin;
32   if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) { //some OpenCL SDKs have issues with buffers of size 0
33     if (A->offloadmask == PETSC_OFFLOAD_UNALLOCATED || A->offloadmask == PETSC_OFFLOAD_CPU) {
34       PetscCall(PetscLogEventBegin(MAT_ViennaCLCopyToGPU,A,0,0,0));
35 
36       try {
37         if (a->compressedrow.use) {
38           if (!viennaclstruct->compressed_mat) viennaclstruct->compressed_mat = new ViennaCLCompressedAIJMatrix();
39 
40           // Since PetscInt is different from cl_uint, we have to convert:
41           viennacl::backend::mem_handle dummy;
42 
43           viennacl::backend::typesafe_host_array<unsigned int> row_buffer; row_buffer.raw_resize(dummy, a->compressedrow.nrows+1);
44           for (PetscInt i=0; i<=a->compressedrow.nrows; ++i)
45             row_buffer.set(i, (a->compressedrow.i)[i]);
46 
47           viennacl::backend::typesafe_host_array<unsigned int> row_indices; row_indices.raw_resize(dummy, a->compressedrow.nrows);
48           for (PetscInt i=0; i<a->compressedrow.nrows; ++i)
49             row_indices.set(i, (a->compressedrow.rindex)[i]);
50 
51           viennacl::backend::typesafe_host_array<unsigned int> col_buffer; col_buffer.raw_resize(dummy, a->nz);
52           for (PetscInt i=0; i<a->nz; ++i)
53             col_buffer.set(i, (a->j)[i]);
54 
55           viennaclstruct->compressed_mat->set(row_buffer.get(), row_indices.get(), col_buffer.get(), a->a, A->rmap->n, A->cmap->n, a->compressedrow.nrows, a->nz);
56           PetscCall(PetscLogCpuToGpu(((2*a->compressedrow.nrows)+1+a->nz)*sizeof(PetscInt) + (a->nz)*sizeof(PetscScalar)));
57         } else {
58           if (!viennaclstruct->mat) viennaclstruct->mat = new ViennaCLAIJMatrix();
59 
60           // Since PetscInt is in general different from cl_uint, we have to convert:
61           viennacl::backend::mem_handle dummy;
62 
63           viennacl::backend::typesafe_host_array<unsigned int> row_buffer; row_buffer.raw_resize(dummy, A->rmap->n+1);
64           for (PetscInt i=0; i<=A->rmap->n; ++i)
65             row_buffer.set(i, (a->i)[i]);
66 
67           viennacl::backend::typesafe_host_array<unsigned int> col_buffer; col_buffer.raw_resize(dummy, a->nz);
68           for (PetscInt i=0; i<a->nz; ++i)
69             col_buffer.set(i, (a->j)[i]);
70 
71           viennaclstruct->mat->set(row_buffer.get(), col_buffer.get(), a->a, A->rmap->n, A->cmap->n, a->nz);
72           PetscCall(PetscLogCpuToGpu(((A->rmap->n+1)+a->nz)*sizeof(PetscInt)+(a->nz)*sizeof(PetscScalar)));
73         }
74         ViennaCLWaitForGPU();
75       } catch(std::exception const & ex) {
76         SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
77       }
78 
79       // Create temporary vector for v += A*x:
80       if (viennaclstruct->tempvec) {
81         if (viennaclstruct->tempvec->size() != static_cast<std::size_t>(A->rmap->n)) {
82           delete (ViennaCLVector*)viennaclstruct->tempvec;
83           viennaclstruct->tempvec = new ViennaCLVector(A->rmap->n);
84         } else {
85           viennaclstruct->tempvec->clear();
86         }
87       } else {
88         viennaclstruct->tempvec = new ViennaCLVector(A->rmap->n);
89       }
90 
91       A->offloadmask = PETSC_OFFLOAD_BOTH;
92 
93       PetscCall(PetscLogEventEnd(MAT_ViennaCLCopyToGPU,A,0,0,0));
94     }
95   }
96   PetscFunctionReturn(0);
97 }
98 
99 PetscErrorCode MatViennaCLCopyFromGPU(Mat A, const ViennaCLAIJMatrix *Agpu)
100 {
101   Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr;
102   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
103   PetscInt           m  = A->rmap->n;
104 
105   PetscFunctionBegin;
106   if (A->offloadmask == PETSC_OFFLOAD_BOTH) PetscFunctionReturn(0);
107   if (A->offloadmask == PETSC_OFFLOAD_UNALLOCATED && Agpu) {
108     try {
109       PetscCheck(!a->compressedrow.use,PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ViennaCL: Cannot handle row compression for GPU matrices");
110       else {
111 
112         PetscCheck((PetscInt)Agpu->size1() == m,PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "GPU matrix has %lu rows, should be %" PetscInt_FMT, Agpu->size1(), m);
113         a->nz           = Agpu->nnz();
114         a->maxnz        = a->nz; /* Since we allocate exactly the right amount */
115         A->preallocated = PETSC_TRUE;
116         if (a->singlemalloc) {
117           if (a->a) PetscCall(PetscFree3(a->a,a->j,a->i));
118         } else {
119           if (a->i) PetscCall(PetscFree(a->i));
120           if (a->j) PetscCall(PetscFree(a->j));
121           if (a->a) PetscCall(PetscFree(a->a));
122         }
123         PetscCall(PetscMalloc3(a->nz,&a->a,a->nz,&a->j,m+1,&a->i));
124         PetscCall(PetscLogObjectMemory((PetscObject)A, a->nz*(sizeof(PetscScalar)+sizeof(PetscInt))+(m+1)*sizeof(PetscInt)));
125 
126         a->singlemalloc = PETSC_TRUE;
127 
128         /* Setup row lengths */
129         PetscCall(PetscFree(a->imax));
130         PetscCall(PetscFree(a->ilen));
131         PetscCall(PetscMalloc1(m,&a->imax));
132         PetscCall(PetscMalloc1(m,&a->ilen));
133         PetscCall(PetscLogObjectMemory((PetscObject)A, 2*m*sizeof(PetscInt)));
134 
135         /* Copy data back from GPU */
136         viennacl::backend::typesafe_host_array<unsigned int> row_buffer; row_buffer.raw_resize(Agpu->handle1(), Agpu->size1() + 1);
137 
138         // copy row array
139         viennacl::backend::memory_read(Agpu->handle1(), 0, row_buffer.raw_size(), row_buffer.get());
140         (a->i)[0] = row_buffer[0];
141         for (PetscInt i = 0; i < (PetscInt)Agpu->size1(); ++i) {
142           (a->i)[i+1] = row_buffer[i+1];
143           a->imax[i]  = a->ilen[i] = a->i[i+1] - a->i[i];  //Set imax[] and ilen[] arrays at the same time as i[] for better cache reuse
144         }
145 
146         // copy column indices
147         viennacl::backend::typesafe_host_array<unsigned int> col_buffer; col_buffer.raw_resize(Agpu->handle2(), Agpu->nnz());
148         viennacl::backend::memory_read(Agpu->handle2(), 0, col_buffer.raw_size(), col_buffer.get());
149         for (PetscInt i=0; i < (PetscInt)Agpu->nnz(); ++i)
150           (a->j)[i] = col_buffer[i];
151 
152         // copy nonzero entries directly to destination (no conversion required)
153         viennacl::backend::memory_read(Agpu->handle(), 0, sizeof(PetscScalar)*Agpu->nnz(), a->a);
154 
155         PetscCall(PetscLogGpuToCpu(row_buffer.raw_size()+col_buffer.raw_size()+(Agpu->nnz()*sizeof(PetscScalar))));
156         ViennaCLWaitForGPU();
157         /* TODO: Once a->diag is moved out of MatAssemblyEnd(), invalidate it here. */
158       }
159     } catch(std::exception const & ex) {
160       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
161     }
162   } else if (A->offloadmask == PETSC_OFFLOAD_UNALLOCATED) {
163     PetscFunctionReturn(0);
164   } else {
165     if (!Agpu && A->offloadmask != PETSC_OFFLOAD_GPU) PetscFunctionReturn(0);
166 
167     PetscCheck(!a->compressedrow.use,PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ViennaCL: Cannot handle row compression for GPU matrices");
168     if (!Agpu) {
169       viennacl::backend::memory_read(viennaclstruct->mat->handle(), 0, sizeof(PetscScalar)*viennaclstruct->mat->nnz(), a->a);
170     } else {
171       viennacl::backend::memory_read(Agpu->handle(), 0, sizeof(PetscScalar)*Agpu->nnz(), a->a);
172     }
173   }
174   A->offloadmask = PETSC_OFFLOAD_BOTH;
175   /* This assembly prevents resetting the flag to PETSC_OFFLOAD_CPU and recopying */
176   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
177   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
178   PetscFunctionReturn(0);
179 }
180 
181 PetscErrorCode MatMult_SeqAIJViennaCL(Mat A,Vec xx,Vec yy)
182 {
183   Mat_SeqAIJ           *a = (Mat_SeqAIJ*)A->data;
184   Mat_SeqAIJViennaCL   *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr;
185   const ViennaCLVector *xgpu=NULL;
186   ViennaCLVector       *ygpu=NULL;
187 
188   PetscFunctionBegin;
189   /* The line below is necessary due to the operations that modify the matrix on the CPU (axpy, scale, etc) */
190   PetscCall(MatViennaCLCopyToGPU(A));
191   if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) {
192     PetscCall(VecViennaCLGetArrayRead(xx,&xgpu));
193     PetscCall(VecViennaCLGetArrayWrite(yy,&ygpu));
194     PetscCall(PetscLogGpuTimeBegin());
195     try {
196       if (a->compressedrow.use) {
197         *ygpu = viennacl::linalg::prod(*viennaclstruct->compressed_mat, *xgpu);
198       } else {
199         *ygpu = viennacl::linalg::prod(*viennaclstruct->mat,*xgpu);
200       }
201       ViennaCLWaitForGPU();
202     } catch (std::exception const & ex) {
203       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
204     }
205     PetscCall(PetscLogGpuTimeEnd());
206     PetscCall(VecViennaCLRestoreArrayRead(xx,&xgpu));
207     PetscCall(VecViennaCLRestoreArrayWrite(yy,&ygpu));
208     PetscCall(PetscLogGpuFlops(2.0*a->nz - a->nonzerorowcnt));
209   } else {
210     PetscCall(VecSet_SeqViennaCL(yy,0));
211   }
212   PetscFunctionReturn(0);
213 }
214 
215 PetscErrorCode MatMultAdd_SeqAIJViennaCL(Mat A,Vec xx,Vec yy,Vec zz)
216 {
217   Mat_SeqAIJ           *a = (Mat_SeqAIJ*)A->data;
218   Mat_SeqAIJViennaCL   *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr;
219   const ViennaCLVector *xgpu=NULL,*ygpu=NULL;
220   ViennaCLVector       *zgpu=NULL;
221 
222   PetscFunctionBegin;
223   /* The line below is necessary due to the operations that modify the matrix on the CPU (axpy, scale, etc) */
224   PetscCall(MatViennaCLCopyToGPU(A));
225   if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) {
226     try {
227       PetscCall(VecViennaCLGetArrayRead(xx,&xgpu));
228       PetscCall(VecViennaCLGetArrayRead(yy,&ygpu));
229       PetscCall(VecViennaCLGetArrayWrite(zz,&zgpu));
230       PetscCall(PetscLogGpuTimeBegin());
231       if (a->compressedrow.use) *viennaclstruct->tempvec = viennacl::linalg::prod(*viennaclstruct->compressed_mat, *xgpu);
232       else *viennaclstruct->tempvec = viennacl::linalg::prod(*viennaclstruct->mat, *xgpu);
233       if (zz != yy) *zgpu = *ygpu + *viennaclstruct->tempvec;
234       else *zgpu += *viennaclstruct->tempvec;
235       ViennaCLWaitForGPU();
236       PetscCall(PetscLogGpuTimeEnd());
237       PetscCall(VecViennaCLRestoreArrayRead(xx,&xgpu));
238       PetscCall(VecViennaCLRestoreArrayRead(yy,&ygpu));
239       PetscCall(VecViennaCLRestoreArrayWrite(zz,&zgpu));
240 
241     } catch(std::exception const & ex) {
242       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
243     }
244     PetscCall(PetscLogGpuFlops(2.0*a->nz));
245   } else {
246     PetscCall(VecCopy_SeqViennaCL(yy,zz));
247   }
248   PetscFunctionReturn(0);
249 }
250 
251 PetscErrorCode MatAssemblyEnd_SeqAIJViennaCL(Mat A,MatAssemblyType mode)
252 {
253   PetscFunctionBegin;
254   PetscCall(MatAssemblyEnd_SeqAIJ(A,mode));
255   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
256   if (!A->boundtocpu) PetscCall(MatViennaCLCopyToGPU(A));
257   PetscFunctionReturn(0);
258 }
259 
260 /* --------------------------------------------------------------------------------*/
261 /*@C
262    MatCreateSeqAIJViennaCL - Creates a sparse matrix in AIJ (compressed row) format
263    (the default parallel PETSc format).  This matrix will ultimately be pushed down
264    to GPUs and use the ViennaCL library for calculations. For good matrix
265    assembly performance the user should preallocate the matrix storage by setting
266    the parameter nz (or the array nnz).  By setting these parameters accurately,
267    performance during matrix assembly can be increased substantially.
268 
269    Collective
270 
271    Input Parameters:
272 +  comm - MPI communicator, set to PETSC_COMM_SELF
273 .  m - number of rows
274 .  n - number of columns
275 .  nz - number of nonzeros per row (same for all rows)
276 -  nnz - array containing the number of nonzeros in the various rows
277          (possibly different for each row) or NULL
278 
279    Output Parameter:
280 .  A - the matrix
281 
282    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
283    MatXXXXSetPreallocation() paradigm instead of this routine directly.
284    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
285 
286    Notes:
287    If nnz is given then nz is ignored
288 
289    The AIJ format (also called the Yale sparse matrix format or
290    compressed row storage), is fully compatible with standard Fortran 77
291    storage.  That is, the stored row and column indices can begin at
292    either one (as in Fortran) or zero.  See the users' manual for details.
293 
294    Specify the preallocated storage with either nz or nnz (not both).
295    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
296    allocation.  For large problems you MUST preallocate memory or you
297    will get TERRIBLE performance, see the users' manual chapter on matrices.
298 
299    Level: intermediate
300 
301 .seealso: MatCreate(), MatCreateAIJ(), MatCreateAIJCUSPARSE(), MatSetValues(), MatSeqAIJSetColumnIndices(), MatCreateSeqAIJWithArrays(), MatCreateAIJ()
302 
303 @*/
304 PetscErrorCode  MatCreateSeqAIJViennaCL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
305 {
306   PetscFunctionBegin;
307   PetscCall(MatCreate(comm,A));
308   PetscCall(MatSetSizes(*A,m,n,m,n));
309   PetscCall(MatSetType(*A,MATSEQAIJVIENNACL));
310   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,(PetscInt*)nnz));
311   PetscFunctionReturn(0);
312 }
313 
314 PetscErrorCode MatDestroy_SeqAIJViennaCL(Mat A)
315 {
316   Mat_SeqAIJViennaCL *viennaclcontainer = (Mat_SeqAIJViennaCL*)A->spptr;
317 
318   PetscFunctionBegin;
319   try {
320     if (viennaclcontainer) {
321       delete viennaclcontainer->tempvec;
322       delete viennaclcontainer->mat;
323       delete viennaclcontainer->compressed_mat;
324       delete viennaclcontainer;
325     }
326     A->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
327   } catch(std::exception const & ex) {
328     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
329   }
330 
331   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqaij_seqaijviennacl_C",NULL));
332   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_seqaijviennacl_seqdense_C",NULL));
333   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_seqaijviennacl_seqaij_C",NULL));
334 
335   /* this next line is because MatDestroy tries to PetscFree spptr if it is not zero, and PetscFree only works if the memory was allocated with PetscNew or PetscMalloc, which don't call the constructor */
336   A->spptr = 0;
337   PetscCall(MatDestroy_SeqAIJ(A));
338   PetscFunctionReturn(0);
339 }
340 
341 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJViennaCL(Mat B)
342 {
343   PetscFunctionBegin;
344   PetscCall(MatCreate_SeqAIJ(B));
345   PetscCall(MatConvert_SeqAIJ_SeqAIJViennaCL(B,MATSEQAIJVIENNACL,MAT_INPLACE_MATRIX,&B));
346   PetscFunctionReturn(0);
347 }
348 
349 static PetscErrorCode MatBindToCPU_SeqAIJViennaCL(Mat,PetscBool);
350 static PetscErrorCode MatDuplicate_SeqAIJViennaCL(Mat A,MatDuplicateOption cpvalues,Mat *B)
351 {
352   Mat C;
353 
354   PetscFunctionBegin;
355   PetscCall(MatDuplicate_SeqAIJ(A,cpvalues,B));
356   C = *B;
357 
358   PetscCall(MatBindToCPU_SeqAIJViennaCL(A,PETSC_FALSE));
359   C->ops->bindtocpu = MatBindToCPU_SeqAIJViennaCL;
360 
361   C->spptr = new Mat_SeqAIJViennaCL();
362   ((Mat_SeqAIJViennaCL*)C->spptr)->tempvec        = NULL;
363   ((Mat_SeqAIJViennaCL*)C->spptr)->mat            = NULL;
364   ((Mat_SeqAIJViennaCL*)C->spptr)->compressed_mat = NULL;
365 
366   PetscCall(PetscObjectChangeTypeName((PetscObject)C,MATSEQAIJVIENNACL));
367 
368   C->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
369 
370   /* If the source matrix is already assembled, copy the destination matrix to the GPU */
371   if (C->assembled) PetscCall(MatViennaCLCopyToGPU(C));
372   PetscFunctionReturn(0);
373 }
374 
375 static PetscErrorCode MatSeqAIJGetArray_SeqAIJViennaCL(Mat A,PetscScalar *array[])
376 {
377   PetscFunctionBegin;
378   PetscCall(MatViennaCLCopyFromGPU(A,(const ViennaCLAIJMatrix *)NULL));
379   *array = ((Mat_SeqAIJ*)A->data)->a;
380   PetscFunctionReturn(0);
381 }
382 
383 static PetscErrorCode MatSeqAIJRestoreArray_SeqAIJViennaCL(Mat A,PetscScalar *array[])
384 {
385   PetscFunctionBegin;
386   A->offloadmask = PETSC_OFFLOAD_CPU;
387   *array         = NULL;
388   PetscFunctionReturn(0);
389 }
390 
391 static PetscErrorCode MatSeqAIJGetArrayRead_SeqAIJViennaCL(Mat A,const PetscScalar *array[])
392 {
393   PetscFunctionBegin;
394   PetscCall(MatViennaCLCopyFromGPU(A,(const ViennaCLAIJMatrix *)NULL));
395   *array = ((Mat_SeqAIJ*)A->data)->a;
396   PetscFunctionReturn(0);
397 }
398 
399 static PetscErrorCode MatSeqAIJRestoreArrayRead_SeqAIJViennaCL(Mat A,const PetscScalar *array[])
400 {
401   PetscFunctionBegin;
402   *array = NULL;
403   /* No A->offloadmask = PETSC_OFFLOAD_CPU since if A->offloadmask was PETSC_OFFLOAD_BOTH, it is still BOTH */
404   PetscFunctionReturn(0);
405 }
406 
407 static PetscErrorCode MatSeqAIJGetArrayWrite_SeqAIJViennaCL(Mat A,PetscScalar *array[])
408 {
409   PetscFunctionBegin;
410   *array = ((Mat_SeqAIJ*)A->data)->a;
411   PetscFunctionReturn(0);
412 }
413 
414 static PetscErrorCode MatSeqAIJRestoreArrayWrite_SeqAIJViennaCL(Mat A,PetscScalar *array[])
415 {
416   PetscFunctionBegin;
417   A->offloadmask = PETSC_OFFLOAD_CPU;
418   *array         = NULL;
419   PetscFunctionReturn(0);
420 }
421 
422 static PetscErrorCode MatBindToCPU_SeqAIJViennaCL(Mat A,PetscBool flg)
423 {
424   Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
425 
426   PetscFunctionBegin;
427   A->boundtocpu  = flg;
428   if (flg && a->inode.size) {
429     a->inode.use = PETSC_TRUE;
430   } else {
431     a->inode.use = PETSC_FALSE;
432   }
433   if (flg) {
434     /* make sure we have an up-to-date copy on the CPU */
435     PetscCall(MatViennaCLCopyFromGPU(A,(const ViennaCLAIJMatrix *)NULL));
436     A->ops->mult        = MatMult_SeqAIJ;
437     A->ops->multadd     = MatMultAdd_SeqAIJ;
438     A->ops->assemblyend = MatAssemblyEnd_SeqAIJ;
439     A->ops->duplicate   = MatDuplicate_SeqAIJ;
440     PetscCall(PetscMemzero(a->ops,sizeof(Mat_SeqAIJOps)));
441   } else {
442     A->ops->mult        = MatMult_SeqAIJViennaCL;
443     A->ops->multadd     = MatMultAdd_SeqAIJViennaCL;
444     A->ops->assemblyend = MatAssemblyEnd_SeqAIJViennaCL;
445     A->ops->destroy     = MatDestroy_SeqAIJViennaCL;
446     A->ops->duplicate   = MatDuplicate_SeqAIJViennaCL;
447 
448     a->ops->getarray           = MatSeqAIJGetArray_SeqAIJViennaCL;
449     a->ops->restorearray       = MatSeqAIJRestoreArray_SeqAIJViennaCL;
450     a->ops->getarrayread       = MatSeqAIJGetArrayRead_SeqAIJViennaCL;
451     a->ops->restorearrayread   = MatSeqAIJRestoreArrayRead_SeqAIJViennaCL;
452     a->ops->getarraywrite      = MatSeqAIJGetArrayWrite_SeqAIJViennaCL;
453     a->ops->restorearraywrite  = MatSeqAIJRestoreArrayWrite_SeqAIJViennaCL;
454   }
455   PetscFunctionReturn(0);
456 }
457 
458 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJViennaCL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
459 {
460   Mat B;
461 
462   PetscFunctionBegin;
463   PetscCheck(reuse != MAT_REUSE_MATRIX,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MAT_REUSE_MATRIX is not supported. Consider using MAT_INPLACE_MATRIX instead");
464 
465   if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A,MAT_COPY_VALUES,newmat));
466 
467   B = *newmat;
468 
469   B->spptr = new Mat_SeqAIJViennaCL();
470 
471   ((Mat_SeqAIJViennaCL*)B->spptr)->tempvec        = NULL;
472   ((Mat_SeqAIJViennaCL*)B->spptr)->mat            = NULL;
473   ((Mat_SeqAIJViennaCL*)B->spptr)->compressed_mat = NULL;
474 
475   PetscCall(MatBindToCPU_SeqAIJViennaCL(A,PETSC_FALSE));
476   A->ops->bindtocpu = MatBindToCPU_SeqAIJViennaCL;
477 
478   PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJVIENNACL));
479   PetscCall(PetscFree(B->defaultvectype));
480   PetscCall(PetscStrallocpy(VECVIENNACL,&B->defaultvectype));
481 
482   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqaij_seqaijviennacl_C",MatConvert_SeqAIJ_SeqAIJViennaCL));
483   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_seqaijviennacl_seqdense_C",MatProductSetFromOptions_SeqAIJ_SeqDense));
484   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_seqaijviennacl_seqaij_C",MatProductSetFromOptions_SeqAIJ));
485 
486   B->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
487 
488   /* If the source matrix is already assembled, copy the destination matrix to the GPU */
489   if (B->assembled) PetscCall(MatViennaCLCopyToGPU(B));
490   PetscFunctionReturn(0);
491 }
492 
493 /*MC
494    MATSEQAIJVIENNACL - MATAIJVIENNACL = "aijviennacl" = "seqaijviennacl" - A matrix type to be used for sparse matrices.
495 
496    A matrix type type whose data resides on GPUs. These matrices are in CSR format by
497    default. All matrix calculations are performed using the ViennaCL library.
498 
499    Options Database Keys:
500 +  -mat_type aijviennacl - sets the matrix type to "seqaijviennacl" during a call to MatSetFromOptions()
501 .  -mat_viennacl_storage_format csr - sets the storage format of matrices for MatMult during a call to MatSetFromOptions().
502 -  -mat_viennacl_mult_storage_format csr - sets the storage format of matrices for MatMult during a call to MatSetFromOptions().
503 
504   Level: beginner
505 
506 .seealso: MatCreateSeqAIJViennaCL(), MATAIJVIENNACL, MatCreateAIJViennaCL()
507 M*/
508 
509 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ViennaCL(void)
510 {
511   PetscFunctionBegin;
512   PetscCall(MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_LU,MatGetFactor_seqaij_petsc));
513   PetscCall(MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_CHOLESKY,MatGetFactor_seqaij_petsc));
514   PetscCall(MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_ILU,MatGetFactor_seqaij_petsc));
515   PetscCall(MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_ICC,MatGetFactor_seqaij_petsc));
516   PetscFunctionReturn(0);
517 }
518