xref: /petsc/src/mat/impls/aij/seq/seqviennacl/aijviennacl.cxx (revision ea500dcf30ffef220839d8a7e6a1237a802d17f4)
1 
2 /*
3     Defines the basic matrix operations for the AIJ (compressed row)
4   matrix storage format.
5 */
6 
7 #include <petscconf.h>
8 #define PETSC_SKIP_IMMINTRIN_H_CUDAWORKAROUND 1
9 #include <../src/mat/impls/aij/seq/aij.h>          /*I "petscmat.h" I*/
10 #include <petscbt.h>
11 #include <../src/vec/vec/impls/dvecimpl.h>
12 #include <petsc/private/vecimpl.h>
13 
14 #include <../src/mat/impls/aij/seq/seqviennacl/viennaclmatimpl.h>
15 
16 #include <algorithm>
17 #include <vector>
18 #include <string>
19 
20 #include "viennacl/linalg/prod.hpp"
21 
22 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJViennaCL(Mat A, MatType type, MatReuse reuse, Mat *newmat);
23 PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat,MatFactorType,Mat*);
24 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqDense(Mat);
25 
26 PetscErrorCode MatViennaCLCopyToGPU(Mat A)
27 {
28   Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr;
29   Mat_SeqAIJ         *a              = (Mat_SeqAIJ*)A->data;
30   PetscErrorCode     ierr;
31 
32   PetscFunctionBegin;
33   if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) { //some OpenCL SDKs have issues with buffers of size 0
34     if (A->offloadmask == PETSC_OFFLOAD_UNALLOCATED || A->offloadmask == PETSC_OFFLOAD_CPU) {
35       ierr = PetscLogEventBegin(MAT_ViennaCLCopyToGPU,A,0,0,0);CHKERRQ(ierr);
36 
37       try {
38         if (a->compressedrow.use) {
39           if (!viennaclstruct->compressed_mat) viennaclstruct->compressed_mat = new ViennaCLCompressedAIJMatrix();
40 
41           // Since PetscInt is different from cl_uint, we have to convert:
42           viennacl::backend::mem_handle dummy;
43 
44           viennacl::backend::typesafe_host_array<unsigned int> row_buffer; row_buffer.raw_resize(dummy, a->compressedrow.nrows+1);
45           for (PetscInt i=0; i<=a->compressedrow.nrows; ++i)
46             row_buffer.set(i, (a->compressedrow.i)[i]);
47 
48           viennacl::backend::typesafe_host_array<unsigned int> row_indices; row_indices.raw_resize(dummy, a->compressedrow.nrows);
49           for (PetscInt i=0; i<a->compressedrow.nrows; ++i)
50             row_indices.set(i, (a->compressedrow.rindex)[i]);
51 
52           viennacl::backend::typesafe_host_array<unsigned int> col_buffer; col_buffer.raw_resize(dummy, a->nz);
53           for (PetscInt i=0; i<a->nz; ++i)
54             col_buffer.set(i, (a->j)[i]);
55 
56           viennaclstruct->compressed_mat->set(row_buffer.get(), row_indices.get(), col_buffer.get(), a->a, A->rmap->n, A->cmap->n, a->compressedrow.nrows, a->nz);
57           ierr = PetscLogCpuToGpu(((2*a->compressedrow.nrows)+1+a->nz)*sizeof(PetscInt) + (a->nz)*sizeof(PetscScalar));CHKERRQ(ierr);
58         } else {
59           if (!viennaclstruct->mat) viennaclstruct->mat = new ViennaCLAIJMatrix();
60 
61           // Since PetscInt is in general different from cl_uint, we have to convert:
62           viennacl::backend::mem_handle dummy;
63 
64           viennacl::backend::typesafe_host_array<unsigned int> row_buffer; row_buffer.raw_resize(dummy, A->rmap->n+1);
65           for (PetscInt i=0; i<=A->rmap->n; ++i)
66             row_buffer.set(i, (a->i)[i]);
67 
68           viennacl::backend::typesafe_host_array<unsigned int> col_buffer; col_buffer.raw_resize(dummy, a->nz);
69           for (PetscInt i=0; i<a->nz; ++i)
70             col_buffer.set(i, (a->j)[i]);
71 
72           viennaclstruct->mat->set(row_buffer.get(), col_buffer.get(), a->a, A->rmap->n, A->cmap->n, a->nz);
73           ierr = PetscLogCpuToGpu(((A->rmap->n+1)+a->nz)*sizeof(PetscInt)+(a->nz)*sizeof(PetscScalar));CHKERRQ(ierr);
74         }
75         ViennaCLWaitForGPU();
76       } catch(std::exception const & ex) {
77         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
78       }
79 
80       // Create temporary vector for v += A*x:
81       if (viennaclstruct->tempvec) {
82         if (viennaclstruct->tempvec->size() != static_cast<std::size_t>(A->rmap->n)) {
83           delete (ViennaCLVector*)viennaclstruct->tempvec;
84           viennaclstruct->tempvec = new ViennaCLVector(A->rmap->n);
85         } else {
86           viennaclstruct->tempvec->clear();
87         }
88       } else {
89         viennaclstruct->tempvec = new ViennaCLVector(A->rmap->n);
90       }
91 
92       A->offloadmask = PETSC_OFFLOAD_BOTH;
93 
94       ierr = PetscLogEventEnd(MAT_ViennaCLCopyToGPU,A,0,0,0);CHKERRQ(ierr);
95     }
96   }
97   PetscFunctionReturn(0);
98 }
99 
100 PetscErrorCode MatViennaCLCopyFromGPU(Mat A, const ViennaCLAIJMatrix *Agpu)
101 {
102   Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr;
103   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
104   PetscInt           m  = A->rmap->n;
105   PetscErrorCode     ierr;
106 
107   PetscFunctionBegin;
108   if (A->offloadmask == PETSC_OFFLOAD_BOTH) PetscFunctionReturn(0);
109   if (A->offloadmask == PETSC_OFFLOAD_UNALLOCATED && Agpu) {
110     try {
111       if (a->compressedrow.use) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ViennaCL: Cannot handle row compression for GPU matrices");
112       else {
113 
114         if ((PetscInt)Agpu->size1() != m) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "GPU matrix has %d rows, should be %d", Agpu->size1(), m);
115         a->nz           = Agpu->nnz();
116         a->maxnz        = a->nz; /* Since we allocate exactly the right amount */
117         A->preallocated = PETSC_TRUE;
118         if (a->singlemalloc) {
119           if (a->a) {ierr = PetscFree3(a->a,a->j,a->i);CHKERRQ(ierr);}
120         } else {
121           if (a->i) {ierr = PetscFree(a->i);CHKERRQ(ierr);}
122           if (a->j) {ierr = PetscFree(a->j);CHKERRQ(ierr);}
123           if (a->a) {ierr = PetscFree(a->a);CHKERRQ(ierr);}
124         }
125         ierr = PetscMalloc3(a->nz,&a->a,a->nz,&a->j,m+1,&a->i);CHKERRQ(ierr);
126         ierr = PetscLogObjectMemory((PetscObject)A, a->nz*(sizeof(PetscScalar)+sizeof(PetscInt))+(m+1)*sizeof(PetscInt));CHKERRQ(ierr);
127 
128         a->singlemalloc = PETSC_TRUE;
129 
130         /* Setup row lengths */
131         ierr = PetscFree(a->imax);CHKERRQ(ierr);
132         ierr = PetscFree(a->ilen);CHKERRQ(ierr);
133         ierr = PetscMalloc1(m,&a->imax);CHKERRQ(ierr);
134         ierr = PetscMalloc1(m,&a->ilen);CHKERRQ(ierr);
135         ierr = PetscLogObjectMemory((PetscObject)A, 2*m*sizeof(PetscInt));CHKERRQ(ierr);
136 
137         /* Copy data back from GPU */
138         viennacl::backend::typesafe_host_array<unsigned int> row_buffer; row_buffer.raw_resize(Agpu->handle1(), Agpu->size1() + 1);
139 
140         // copy row array
141         viennacl::backend::memory_read(Agpu->handle1(), 0, row_buffer.raw_size(), row_buffer.get());
142         (a->i)[0] = row_buffer[0];
143         for (PetscInt i = 0; i < (PetscInt)Agpu->size1(); ++i) {
144           (a->i)[i+1] = row_buffer[i+1];
145           a->imax[i]  = a->ilen[i] = a->i[i+1] - a->i[i];  //Set imax[] and ilen[] arrays at the same time as i[] for better cache reuse
146         }
147 
148         // copy column indices
149         viennacl::backend::typesafe_host_array<unsigned int> col_buffer; col_buffer.raw_resize(Agpu->handle2(), Agpu->nnz());
150         viennacl::backend::memory_read(Agpu->handle2(), 0, col_buffer.raw_size(), col_buffer.get());
151         for (PetscInt i=0; i < (PetscInt)Agpu->nnz(); ++i)
152           (a->j)[i] = col_buffer[i];
153 
154         // copy nonzero entries directly to destination (no conversion required)
155         viennacl::backend::memory_read(Agpu->handle(), 0, sizeof(PetscScalar)*Agpu->nnz(), a->a);
156 
157         ierr = PetscLogGpuToCpu(row_buffer.raw_size()+col_buffer.raw_size()+(Agpu->nnz()*sizeof(PetscScalar)));CHKERRQ(ierr);
158         ViennaCLWaitForGPU();
159         /* TODO: Once a->diag is moved out of MatAssemblyEnd(), invalidate it here. */
160       }
161     } catch(std::exception const & ex) {
162       SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
163     }
164   } else if (A->offloadmask == PETSC_OFFLOAD_UNALLOCATED) {
165     PetscFunctionReturn(0);
166   } else {
167     if (!Agpu && A->offloadmask != PETSC_OFFLOAD_GPU) PetscFunctionReturn(0);
168 
169     if (a->compressedrow.use) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ViennaCL: Cannot handle row compression for GPU matrices");
170     if (!Agpu) {
171       viennacl::backend::memory_read(viennaclstruct->mat->handle(), 0, sizeof(PetscScalar)*viennaclstruct->mat->nnz(), a->a);
172     } else {
173       viennacl::backend::memory_read(Agpu->handle(), 0, sizeof(PetscScalar)*Agpu->nnz(), a->a);
174     }
175   }
176   A->offloadmask = PETSC_OFFLOAD_BOTH;
177   /* This assembly prevents resetting the flag to PETSC_OFFLOAD_CPU and recopying */
178   ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
179   ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
180   PetscFunctionReturn(0);
181 }
182 
183 PetscErrorCode MatMult_SeqAIJViennaCL(Mat A,Vec xx,Vec yy)
184 {
185   Mat_SeqAIJ           *a = (Mat_SeqAIJ*)A->data;
186   PetscErrorCode       ierr;
187   Mat_SeqAIJViennaCL   *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr;
188   const ViennaCLVector *xgpu=NULL;
189   ViennaCLVector       *ygpu=NULL;
190 
191   PetscFunctionBegin;
192   /* The line below is necessary due to the operations that modify the matrix on the CPU (axpy, scale, etc) */
193   ierr = MatViennaCLCopyToGPU(A);CHKERRQ(ierr);
194   if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) {
195     ierr = VecViennaCLGetArrayRead(xx,&xgpu);CHKERRQ(ierr);
196     ierr = VecViennaCLGetArrayWrite(yy,&ygpu);CHKERRQ(ierr);
197     ierr = PetscLogGpuTimeBegin();CHKERRQ(ierr);
198     try {
199       if (a->compressedrow.use) {
200         *ygpu = viennacl::linalg::prod(*viennaclstruct->compressed_mat, *xgpu);
201       } else {
202         *ygpu = viennacl::linalg::prod(*viennaclstruct->mat,*xgpu);
203       }
204       ViennaCLWaitForGPU();
205     } catch (std::exception const & ex) {
206       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
207     }
208     ierr = PetscLogGpuTimeEnd();CHKERRQ(ierr);
209     ierr = VecViennaCLRestoreArrayRead(xx,&xgpu);CHKERRQ(ierr);
210     ierr = VecViennaCLRestoreArrayWrite(yy,&ygpu);CHKERRQ(ierr);
211     ierr = PetscLogGpuFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
212   } else {
213     ierr = VecSet_SeqViennaCL(yy,0);CHKERRQ(ierr);
214   }
215   PetscFunctionReturn(0);
216 }
217 
218 PetscErrorCode MatMultAdd_SeqAIJViennaCL(Mat A,Vec xx,Vec yy,Vec zz)
219 {
220   Mat_SeqAIJ           *a = (Mat_SeqAIJ*)A->data;
221   PetscErrorCode       ierr;
222   Mat_SeqAIJViennaCL   *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr;
223   const ViennaCLVector *xgpu=NULL,*ygpu=NULL;
224   ViennaCLVector       *zgpu=NULL;
225 
226   PetscFunctionBegin;
227   /* The line below is necessary due to the operations that modify the matrix on the CPU (axpy, scale, etc) */
228   ierr = MatViennaCLCopyToGPU(A);CHKERRQ(ierr);
229   if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) {
230     try {
231       ierr = VecViennaCLGetArrayRead(xx,&xgpu);CHKERRQ(ierr);
232       ierr = VecViennaCLGetArrayRead(yy,&ygpu);CHKERRQ(ierr);
233       ierr = VecViennaCLGetArrayWrite(zz,&zgpu);CHKERRQ(ierr);
234       ierr = PetscLogGpuTimeBegin();CHKERRQ(ierr);
235       if (a->compressedrow.use) *viennaclstruct->tempvec = viennacl::linalg::prod(*viennaclstruct->compressed_mat, *xgpu);
236       else *viennaclstruct->tempvec = viennacl::linalg::prod(*viennaclstruct->mat, *xgpu);
237       if (zz != yy) *zgpu = *ygpu + *viennaclstruct->tempvec;
238       else *zgpu += *viennaclstruct->tempvec;
239       ViennaCLWaitForGPU();
240       ierr = PetscLogGpuTimeEnd();CHKERRQ(ierr);
241       ierr = VecViennaCLRestoreArrayRead(xx,&xgpu);CHKERRQ(ierr);
242       ierr = VecViennaCLRestoreArrayRead(yy,&ygpu);CHKERRQ(ierr);
243       ierr = VecViennaCLRestoreArrayWrite(zz,&zgpu);CHKERRQ(ierr);
244 
245     } catch(std::exception const & ex) {
246       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
247     }
248     ierr = PetscLogGpuFlops(2.0*a->nz);CHKERRQ(ierr);
249   } else {
250     ierr = VecCopy_SeqViennaCL(yy,zz);CHKERRQ(ierr);
251   }
252   PetscFunctionReturn(0);
253 }
254 
255 PetscErrorCode MatAssemblyEnd_SeqAIJViennaCL(Mat A,MatAssemblyType mode)
256 {
257   PetscErrorCode ierr;
258 
259   PetscFunctionBegin;
260   ierr = MatAssemblyEnd_SeqAIJ(A,mode);CHKERRQ(ierr);
261   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
262   if (!A->boundtocpu) {
263     ierr = MatViennaCLCopyToGPU(A);CHKERRQ(ierr);
264   }
265   PetscFunctionReturn(0);
266 }
267 
268 /* --------------------------------------------------------------------------------*/
269 /*@C
270    MatCreateSeqAIJViennaCL - Creates a sparse matrix in AIJ (compressed row) format
271    (the default parallel PETSc format).  This matrix will ultimately be pushed down
272    to GPUs and use the ViennaCL library for calculations. For good matrix
273    assembly performance the user should preallocate the matrix storage by setting
274    the parameter nz (or the array nnz).  By setting these parameters accurately,
275    performance during matrix assembly can be increased substantially.
276 
277    Collective
278 
279    Input Parameters:
280 +  comm - MPI communicator, set to PETSC_COMM_SELF
281 .  m - number of rows
282 .  n - number of columns
283 .  nz - number of nonzeros per row (same for all rows)
284 -  nnz - array containing the number of nonzeros in the various rows
285          (possibly different for each row) or NULL
286 
287    Output Parameter:
288 .  A - the matrix
289 
290    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
291    MatXXXXSetPreallocation() paradigm instead of this routine directly.
292    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
293 
294    Notes:
295    If nnz is given then nz is ignored
296 
297    The AIJ format (also called the Yale sparse matrix format or
298    compressed row storage), is fully compatible with standard Fortran 77
299    storage.  That is, the stored row and column indices can begin at
300    either one (as in Fortran) or zero.  See the users' manual for details.
301 
302    Specify the preallocated storage with either nz or nnz (not both).
303    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
304    allocation.  For large problems you MUST preallocate memory or you
305    will get TERRIBLE performance, see the users' manual chapter on matrices.
306 
307    Level: intermediate
308 
309 .seealso: MatCreate(), MatCreateAIJ(), MatCreateAIJCUSPARSE(), MatSetValues(), MatSeqAIJSetColumnIndices(), MatCreateSeqAIJWithArrays(), MatCreateAIJ()
310 
311 @*/
312 PetscErrorCode  MatCreateSeqAIJViennaCL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
313 {
314   PetscErrorCode ierr;
315 
316   PetscFunctionBegin;
317   ierr = MatCreate(comm,A);CHKERRQ(ierr);
318   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
319   ierr = MatSetType(*A,MATSEQAIJVIENNACL);CHKERRQ(ierr);
320   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,(PetscInt*)nnz);CHKERRQ(ierr);
321   PetscFunctionReturn(0);
322 }
323 
324 PetscErrorCode MatDestroy_SeqAIJViennaCL(Mat A)
325 {
326   PetscErrorCode ierr;
327   Mat_SeqAIJViennaCL *viennaclcontainer = (Mat_SeqAIJViennaCL*)A->spptr;
328 
329   PetscFunctionBegin;
330   try {
331     if (viennaclcontainer) {
332       delete viennaclcontainer->tempvec;
333       delete viennaclcontainer->mat;
334       delete viennaclcontainer->compressed_mat;
335       delete viennaclcontainer;
336     }
337     A->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
338   } catch(std::exception const & ex) {
339     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
340   }
341 
342   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqaij_seqaijviennacl_C",NULL);CHKERRQ(ierr);
343   ierr = PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_seqaijviennacl_seqdense_C",NULL);CHKERRQ(ierr);
344   ierr = PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_seqaijviennacl_seqaij_C",NULL);CHKERRQ(ierr);
345 
346   /* this next line is because MatDestroy tries to PetscFree spptr if it is not zero, and PetscFree only works if the memory was allocated with PetscNew or PetscMalloc, which don't call the constructor */
347   A->spptr = 0;
348   ierr     = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
349   PetscFunctionReturn(0);
350 }
351 
352 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJViennaCL(Mat B)
353 {
354   PetscErrorCode ierr;
355 
356   PetscFunctionBegin;
357   ierr = MatCreate_SeqAIJ(B);CHKERRQ(ierr);
358   ierr = MatConvert_SeqAIJ_SeqAIJViennaCL(B,MATSEQAIJVIENNACL,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr);
359   PetscFunctionReturn(0);
360 }
361 
362 static PetscErrorCode MatBindToCPU_SeqAIJViennaCL(Mat,PetscBool);
363 static PetscErrorCode MatDuplicate_SeqAIJViennaCL(Mat A,MatDuplicateOption cpvalues,Mat *B)
364 {
365   PetscErrorCode ierr;
366   Mat            C;
367 
368   PetscFunctionBegin;
369   ierr = MatDuplicate_SeqAIJ(A,cpvalues,B);CHKERRQ(ierr);
370   C = *B;
371 
372   ierr = MatBindToCPU_SeqAIJViennaCL(A,PETSC_FALSE);CHKERRQ(ierr);
373   C->ops->bindtocpu = MatBindToCPU_SeqAIJViennaCL;
374 
375   C->spptr = new Mat_SeqAIJViennaCL();
376   ((Mat_SeqAIJViennaCL*)C->spptr)->tempvec        = NULL;
377   ((Mat_SeqAIJViennaCL*)C->spptr)->mat            = NULL;
378   ((Mat_SeqAIJViennaCL*)C->spptr)->compressed_mat = NULL;
379 
380   ierr = PetscObjectChangeTypeName((PetscObject)C,MATSEQAIJVIENNACL);CHKERRQ(ierr);
381 
382   C->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
383 
384   /* If the source matrix is already assembled, copy the destination matrix to the GPU */
385   if (C->assembled) {
386     ierr = MatViennaCLCopyToGPU(C);CHKERRQ(ierr);
387   }
388 
389   PetscFunctionReturn(0);
390 }
391 
392 static PetscErrorCode MatSeqAIJGetArray_SeqAIJViennaCL(Mat A,PetscScalar *array[])
393 {
394   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
395   PetscErrorCode ierr;
396 
397   PetscFunctionBegin;
398   ierr = MatViennaCLCopyFromGPU(A,(const ViennaCLAIJMatrix *)NULL);CHKERRQ(ierr);
399   *array = a->a;
400   A->offloadmask = PETSC_OFFLOAD_CPU;
401   PetscFunctionReturn(0);
402 }
403 
404 static PetscErrorCode MatSeqAIJRestoreArray_SeqAIJViennaCL(Mat A,PetscScalar *array[])
405 {
406   PetscFunctionBegin;
407   *array = NULL;
408   PetscFunctionReturn(0);
409 }
410 
411 static PetscErrorCode MatBindToCPU_SeqAIJViennaCL(Mat A,PetscBool flg)
412 {
413   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)A->data;
414   PetscErrorCode ierr;
415 
416   PetscFunctionBegin;
417   A->boundtocpu  = flg;
418   if (flg && aij->inode.size) {
419     aij->inode.use = PETSC_TRUE;
420   } else {
421     aij->inode.use = PETSC_FALSE;
422   }
423   if (flg) {
424     /* make sure we have an up-to-date copy on the CPU */
425     ierr = MatViennaCLCopyFromGPU(A,(const ViennaCLAIJMatrix *)NULL);CHKERRQ(ierr);
426     ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqAIJGetArray_C",MatSeqAIJGetArray_SeqAIJ);CHKERRQ(ierr);
427     ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqAIJRestoreArray_C",MatSeqAIJRestoreArray_SeqAIJ);CHKERRQ(ierr);
428 
429     A->ops->mult        = MatMult_SeqAIJ;
430     A->ops->multadd     = MatMultAdd_SeqAIJ;
431     A->ops->assemblyend = MatAssemblyEnd_SeqAIJ;
432     A->ops->duplicate   = MatDuplicate_SeqAIJ;
433   } else {
434     ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqAIJGetArray_C",MatSeqAIJGetArray_SeqAIJViennaCL);CHKERRQ(ierr);
435     ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqAIJRestoreArray_C",MatSeqAIJRestoreArray_SeqAIJViennaCL);CHKERRQ(ierr);
436 
437     A->ops->mult        = MatMult_SeqAIJViennaCL;
438     A->ops->multadd     = MatMultAdd_SeqAIJViennaCL;
439     A->ops->assemblyend = MatAssemblyEnd_SeqAIJViennaCL;
440     A->ops->destroy     = MatDestroy_SeqAIJViennaCL;
441     A->ops->duplicate   = MatDuplicate_SeqAIJViennaCL;
442   }
443   PetscFunctionReturn(0);
444 }
445 
446 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJViennaCL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
447 {
448   PetscErrorCode ierr;
449   Mat            B;
450 
451   PetscFunctionBegin;
452 
453   if (reuse == MAT_REUSE_MATRIX) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MAT_REUSE_MATRIX is not supported. Consider using MAT_INPLACE_MATRIX instead");
454 
455   if (reuse == MAT_INITIAL_MATRIX) {
456     ierr = MatDuplicate(A,MAT_COPY_VALUES,newmat);CHKERRQ(ierr);
457   }
458 
459   B = *newmat;
460 
461   B->spptr = new Mat_SeqAIJViennaCL();
462 
463   ((Mat_SeqAIJViennaCL*)B->spptr)->tempvec        = NULL;
464   ((Mat_SeqAIJViennaCL*)B->spptr)->mat            = NULL;
465   ((Mat_SeqAIJViennaCL*)B->spptr)->compressed_mat = NULL;
466 
467   ierr = MatBindToCPU_SeqAIJViennaCL(A,PETSC_FALSE);CHKERRQ(ierr);
468   A->ops->bindtocpu = MatBindToCPU_SeqAIJViennaCL;
469 
470   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJVIENNACL);CHKERRQ(ierr);
471   ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr);
472   ierr = PetscStrallocpy(VECVIENNACL,&B->defaultvectype);CHKERRQ(ierr);
473 
474   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqaij_seqaijviennacl_C",MatConvert_SeqAIJ_SeqAIJViennaCL);CHKERRQ(ierr);
475   ierr = PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_seqaijviennacl_seqdense_C",MatProductSetFromOptions_SeqAIJ_SeqDense);CHKERRQ(ierr);
476   ierr = PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_seqaijviennacl_seqaij_C",MatProductSetFromOptions_SeqAIJ);CHKERRQ(ierr);
477 
478   B->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
479 
480   /* If the source matrix is already assembled, copy the destination matrix to the GPU */
481   if (B->assembled) {
482     ierr = MatViennaCLCopyToGPU(B);CHKERRQ(ierr);
483   }
484 
485   PetscFunctionReturn(0);
486 }
487 
488 /*MC
489    MATSEQAIJVIENNACL - MATAIJVIENNACL = "aijviennacl" = "seqaijviennacl" - A matrix type to be used for sparse matrices.
490 
491    A matrix type type whose data resides on GPUs. These matrices are in CSR format by
492    default. All matrix calculations are performed using the ViennaCL library.
493 
494    Options Database Keys:
495 +  -mat_type aijviennacl - sets the matrix type to "seqaijviennacl" during a call to MatSetFromOptions()
496 .  -mat_viennacl_storage_format csr - sets the storage format of matrices for MatMult during a call to MatSetFromOptions().
497 -  -mat_viennacl_mult_storage_format csr - sets the storage format of matrices for MatMult during a call to MatSetFromOptions().
498 
499   Level: beginner
500 
501 .seealso: MatCreateSeqAIJViennaCL(), MATAIJVIENNACL, MatCreateAIJViennaCL()
502 M*/
503 
504 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ViennaCL(void)
505 {
506   PetscErrorCode ierr;
507 
508   PetscFunctionBegin;
509   ierr = MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_LU,MatGetFactor_seqaij_petsc);CHKERRQ(ierr);
510   ierr = MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_CHOLESKY,MatGetFactor_seqaij_petsc);CHKERRQ(ierr);
511   ierr = MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_ILU,MatGetFactor_seqaij_petsc);CHKERRQ(ierr);
512   ierr = MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_ICC,MatGetFactor_seqaij_petsc);CHKERRQ(ierr);
513   PetscFunctionReturn(0);
514 }
515