xref: /petsc/src/mat/impls/aij/seq/seqviennacl/aijviennacl.cxx (revision 4cdfd227e2bdf8feb38b6d90cfcac175440fcd00)
1 
2 
3 /*
4     Defines the basic matrix operations for the AIJ (compressed row)
5   matrix storage format.
6 */
7 
8 #include <petscconf.h>
9 #define PETSC_SKIP_IMMINTRIN_H_CUDAWORKAROUND 1
10 #include <../src/mat/impls/aij/seq/aij.h>          /*I "petscmat.h" I*/
11 #include <petscbt.h>
12 #include <../src/vec/vec/impls/dvecimpl.h>
13 #include <petsc/private/vecimpl.h>
14 
15 #include <../src/mat/impls/aij/seq/seqviennacl/viennaclmatimpl.h>
16 
17 
18 #include <algorithm>
19 #include <vector>
20 #include <string>
21 
22 #include "viennacl/linalg/prod.hpp"
23 
24 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJViennaCL(Mat A, MatType type, MatReuse reuse, Mat *newmat);
25 PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat,MatFactorType,Mat*);
26 
27 
28 PetscErrorCode MatViennaCLCopyToGPU(Mat A)
29 {
30   Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr;
31   Mat_SeqAIJ         *a              = (Mat_SeqAIJ*)A->data;
32   PetscErrorCode     ierr;
33 
34   PetscFunctionBegin;
35   if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) { //some OpenCL SDKs have issues with buffers of size 0
36     if (A->offloadmask == PETSC_OFFLOAD_UNALLOCATED || A->offloadmask == PETSC_OFFLOAD_CPU) {
37       ierr = PetscLogEventBegin(MAT_ViennaCLCopyToGPU,A,0,0,0);CHKERRQ(ierr);
38 
39       try {
40         if (a->compressedrow.use) {
41           if (!viennaclstruct->compressed_mat) viennaclstruct->compressed_mat = new ViennaCLCompressedAIJMatrix();
42 
43           // Since PetscInt is different from cl_uint, we have to convert:
44           viennacl::backend::mem_handle dummy;
45 
46           viennacl::backend::typesafe_host_array<unsigned int> row_buffer; row_buffer.raw_resize(dummy, a->compressedrow.nrows+1);
47           for (PetscInt i=0; i<=a->compressedrow.nrows; ++i)
48             row_buffer.set(i, (a->compressedrow.i)[i]);
49 
50           viennacl::backend::typesafe_host_array<unsigned int> row_indices; row_indices.raw_resize(dummy, a->compressedrow.nrows);
51           for (PetscInt i=0; i<a->compressedrow.nrows; ++i)
52             row_indices.set(i, (a->compressedrow.rindex)[i]);
53 
54           viennacl::backend::typesafe_host_array<unsigned int> col_buffer; col_buffer.raw_resize(dummy, a->nz);
55           for (PetscInt i=0; i<a->nz; ++i)
56             col_buffer.set(i, (a->j)[i]);
57 
58           viennaclstruct->compressed_mat->set(row_buffer.get(), row_indices.get(), col_buffer.get(), a->a, A->rmap->n, A->cmap->n, a->compressedrow.nrows, a->nz);
59           ierr = PetscLogCpuToGpu(((2*a->compressedrow.nrows)+1+a->nz)*sizeof(PetscInt) + (a->nz)*sizeof(PetscScalar));CHKERRQ(ierr);
60         } else {
61           if (!viennaclstruct->mat) viennaclstruct->mat = new ViennaCLAIJMatrix();
62 
63           // Since PetscInt is in general different from cl_uint, we have to convert:
64           viennacl::backend::mem_handle dummy;
65 
66           viennacl::backend::typesafe_host_array<unsigned int> row_buffer; row_buffer.raw_resize(dummy, A->rmap->n+1);
67           for (PetscInt i=0; i<=A->rmap->n; ++i)
68             row_buffer.set(i, (a->i)[i]);
69 
70           viennacl::backend::typesafe_host_array<unsigned int> col_buffer; col_buffer.raw_resize(dummy, a->nz);
71           for (PetscInt i=0; i<a->nz; ++i)
72             col_buffer.set(i, (a->j)[i]);
73 
74           viennaclstruct->mat->set(row_buffer.get(), col_buffer.get(), a->a, A->rmap->n, A->cmap->n, a->nz);
75           ierr = PetscLogCpuToGpu(((A->rmap->n+1)+a->nz)*sizeof(PetscInt)+(a->nz)*sizeof(PetscScalar));CHKERRQ(ierr);
76         }
77         ViennaCLWaitForGPU();
78       } catch(std::exception const & ex) {
79         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
80       }
81 
82       // Create temporary vector for v += A*x:
83       if (viennaclstruct->tempvec) {
84         if (viennaclstruct->tempvec->size() != static_cast<std::size_t>(A->rmap->n)) {
85           delete (ViennaCLVector*)viennaclstruct->tempvec;
86           viennaclstruct->tempvec = new ViennaCLVector(A->rmap->n);
87         } else {
88           viennaclstruct->tempvec->clear();
89         }
90       } else {
91         viennaclstruct->tempvec = new ViennaCLVector(A->rmap->n);
92       }
93 
94       A->offloadmask = PETSC_OFFLOAD_BOTH;
95 
96       ierr = PetscLogEventEnd(MAT_ViennaCLCopyToGPU,A,0,0,0);CHKERRQ(ierr);
97     }
98   }
99   PetscFunctionReturn(0);
100 }
101 
102 PetscErrorCode MatViennaCLCopyFromGPU(Mat A, const ViennaCLAIJMatrix *Agpu)
103 {
104   Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr;
105   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
106   PetscInt           m  = A->rmap->n;
107   PetscErrorCode     ierr;
108 
109   PetscFunctionBegin;
110   if (A->offloadmask == PETSC_OFFLOAD_BOTH) PetscFunctionReturn(0);
111   if (A->offloadmask == PETSC_OFFLOAD_UNALLOCATED && Agpu) {
112     try {
113       if (a->compressedrow.use) SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_ARG_WRONG, "ViennaCL: Cannot handle row compression for GPU matrices");
114       else {
115 
116         if ((PetscInt)Agpu->size1() != m) SETERRQ2(PETSC_COMM_WORLD, PETSC_ERR_ARG_SIZ, "GPU matrix has %d rows, should be %d", Agpu->size1(), m);
117         a->nz           = Agpu->nnz();
118         a->maxnz        = a->nz; /* Since we allocate exactly the right amount */
119         A->preallocated = PETSC_TRUE;
120         if (a->singlemalloc) {
121           if (a->a) {ierr = PetscFree3(a->a,a->j,a->i);CHKERRQ(ierr);}
122         } else {
123           if (a->i) {ierr = PetscFree(a->i);CHKERRQ(ierr);}
124           if (a->j) {ierr = PetscFree(a->j);CHKERRQ(ierr);}
125           if (a->a) {ierr = PetscFree(a->a);CHKERRQ(ierr);}
126         }
127         ierr = PetscMalloc3(a->nz,&a->a,a->nz,&a->j,m+1,&a->i);CHKERRQ(ierr);
128         ierr = PetscLogObjectMemory((PetscObject)A, a->nz*(sizeof(PetscScalar)+sizeof(PetscInt))+(m+1)*sizeof(PetscInt));CHKERRQ(ierr);
129 
130         a->singlemalloc = PETSC_TRUE;
131 
132         /* Setup row lengths */
133         ierr = PetscFree(a->imax);CHKERRQ(ierr);
134         ierr = PetscFree(a->ilen);CHKERRQ(ierr);
135         ierr = PetscMalloc1(m,&a->imax);CHKERRQ(ierr);
136         ierr = PetscMalloc1(m,&a->ilen);CHKERRQ(ierr);
137         ierr = PetscLogObjectMemory((PetscObject)A, 2*m*sizeof(PetscInt));CHKERRQ(ierr);
138 
139         /* Copy data back from GPU */
140         viennacl::backend::typesafe_host_array<unsigned int> row_buffer; row_buffer.raw_resize(Agpu->handle1(), Agpu->size1() + 1);
141 
142         // copy row array
143         viennacl::backend::memory_read(Agpu->handle1(), 0, row_buffer.raw_size(), row_buffer.get());
144         (a->i)[0] = row_buffer[0];
145         for (PetscInt i = 0; i < (PetscInt)Agpu->size1(); ++i) {
146           (a->i)[i+1] = row_buffer[i+1];
147           a->imax[i]  = a->ilen[i] = a->i[i+1] - a->i[i];  //Set imax[] and ilen[] arrays at the same time as i[] for better cache reuse
148         }
149 
150         // copy column indices
151         viennacl::backend::typesafe_host_array<unsigned int> col_buffer; col_buffer.raw_resize(Agpu->handle2(), Agpu->nnz());
152         viennacl::backend::memory_read(Agpu->handle2(), 0, col_buffer.raw_size(), col_buffer.get());
153         for (PetscInt i=0; i < (PetscInt)Agpu->nnz(); ++i)
154           (a->j)[i] = col_buffer[i];
155 
156         // copy nonzero entries directly to destination (no conversion required)
157         viennacl::backend::memory_read(Agpu->handle(), 0, sizeof(PetscScalar)*Agpu->nnz(), a->a);
158 
159         ierr = PetscLogGpuToCpu(row_buffer.raw_size()+col_buffer.raw_size()+(Agpu->nnz()*sizeof(PetscScalar)));CHKERRQ(ierr);
160         ViennaCLWaitForGPU();
161         /* TODO: Once a->diag is moved out of MatAssemblyEnd(), invalidate it here. */
162       }
163     } catch(std::exception const & ex) {
164       SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
165     }
166   } else if (A->offloadmask == PETSC_OFFLOAD_UNALLOCATED) {
167     PetscFunctionReturn(0);
168   } else {
169     if (!Agpu && A->offloadmask != PETSC_OFFLOAD_GPU) PetscFunctionReturn(0);
170 
171     if (a->compressedrow.use) SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_ARG_WRONG, "ViennaCL: Cannot handle row compression for GPU matrices");
172     if (!Agpu) {
173       viennacl::backend::memory_read(viennaclstruct->mat->handle(), 0, sizeof(PetscScalar)*viennaclstruct->mat->nnz(), a->a);
174     } else {
175       viennacl::backend::memory_read(Agpu->handle(), 0, sizeof(PetscScalar)*Agpu->nnz(), a->a);
176     }
177   }
178   A->offloadmask = PETSC_OFFLOAD_BOTH;
179   /* This assembly prevents resetting the flag to PETSC_OFFLOAD_CPU and recopying */
180   ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
181   ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
182   PetscFunctionReturn(0);
183 }
184 
185 PetscErrorCode MatMult_SeqAIJViennaCL(Mat A,Vec xx,Vec yy)
186 {
187   Mat_SeqAIJ           *a = (Mat_SeqAIJ*)A->data;
188   PetscErrorCode       ierr;
189   Mat_SeqAIJViennaCL   *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr;
190   const ViennaCLVector *xgpu=NULL;
191   ViennaCLVector       *ygpu=NULL;
192 
193   PetscFunctionBegin;
194   if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) {
195     ierr = VecViennaCLGetArrayRead(xx,&xgpu);CHKERRQ(ierr);
196     ierr = VecViennaCLGetArrayWrite(yy,&ygpu);CHKERRQ(ierr);
197     ierr = PetscLogGpuTimeBegin();CHKERRQ(ierr);
198     try {
199       if (a->compressedrow.use) {
200         *ygpu = viennacl::linalg::prod(*viennaclstruct->compressed_mat, *xgpu);
201       } else {
202         *ygpu = viennacl::linalg::prod(*viennaclstruct->mat,*xgpu);
203       }
204       ViennaCLWaitForGPU();
205     } catch (std::exception const & ex) {
206       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
207     }
208     ierr = PetscLogGpuTimeEnd();CHKERRQ(ierr);
209     ierr = VecViennaCLRestoreArrayRead(xx,&xgpu);CHKERRQ(ierr);
210     ierr = VecViennaCLRestoreArrayWrite(yy,&ygpu);CHKERRQ(ierr);
211     ierr = PetscLogGpuFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
212   } else {
213     ierr = VecSet(yy,0);CHKERRQ(ierr);
214   }
215   PetscFunctionReturn(0);
216 }
217 
218 PetscErrorCode MatMultAdd_SeqAIJViennaCL(Mat A,Vec xx,Vec yy,Vec zz)
219 {
220   Mat_SeqAIJ           *a = (Mat_SeqAIJ*)A->data;
221   PetscErrorCode       ierr;
222   Mat_SeqAIJViennaCL   *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr;
223   const ViennaCLVector *xgpu=NULL,*ygpu=NULL;
224   ViennaCLVector       *zgpu=NULL;
225 
226   PetscFunctionBegin;
227   if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) {
228     try {
229       ierr = VecViennaCLGetArrayRead(xx,&xgpu);CHKERRQ(ierr);
230       ierr = VecViennaCLGetArrayRead(yy,&ygpu);CHKERRQ(ierr);
231       ierr = VecViennaCLGetArrayWrite(zz,&zgpu);CHKERRQ(ierr);
232       ierr = PetscLogGpuTimeBegin();CHKERRQ(ierr);
233       if (a->compressedrow.use) *viennaclstruct->tempvec = viennacl::linalg::prod(*viennaclstruct->compressed_mat, *xgpu);
234       else *viennaclstruct->tempvec = viennacl::linalg::prod(*viennaclstruct->mat, *xgpu);
235       if (zz != yy) *zgpu = *ygpu + *viennaclstruct->tempvec;
236       else *zgpu += *viennaclstruct->tempvec;
237       ViennaCLWaitForGPU();
238       ierr = PetscLogGpuTimeEnd();CHKERRQ(ierr);
239       ierr = VecViennaCLRestoreArrayRead(xx,&xgpu);CHKERRQ(ierr);
240       ierr = VecViennaCLRestoreArrayRead(yy,&ygpu);CHKERRQ(ierr);
241       ierr = VecViennaCLRestoreArrayWrite(zz,&zgpu);CHKERRQ(ierr);
242 
243     } catch(std::exception const & ex) {
244       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
245     }
246     ierr = PetscLogGpuFlops(2.0*a->nz);CHKERRQ(ierr);
247   } else {
248     ierr = VecCopy(yy,zz);CHKERRQ(ierr);
249   }
250   PetscFunctionReturn(0);
251 }
252 
253 PetscErrorCode MatAssemblyEnd_SeqAIJViennaCL(Mat A,MatAssemblyType mode)
254 {
255   PetscErrorCode ierr;
256 
257   PetscFunctionBegin;
258   ierr = MatAssemblyEnd_SeqAIJ(A,mode);CHKERRQ(ierr);
259   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
260   if (!A->pinnedtocpu) {
261     ierr = MatViennaCLCopyToGPU(A);CHKERRQ(ierr);
262   }
263   PetscFunctionReturn(0);
264 }
265 
266 /* --------------------------------------------------------------------------------*/
267 /*@
268    MatCreateSeqAIJViennaCL - Creates a sparse matrix in AIJ (compressed row) format
269    (the default parallel PETSc format).  This matrix will ultimately be pushed down
270    to GPUs and use the ViennaCL library for calculations. For good matrix
271    assembly performance the user should preallocate the matrix storage by setting
272    the parameter nz (or the array nnz).  By setting these parameters accurately,
273    performance during matrix assembly can be increased substantially.
274 
275 
276    Collective
277 
278    Input Parameters:
279 +  comm - MPI communicator, set to PETSC_COMM_SELF
280 .  m - number of rows
281 .  n - number of columns
282 .  nz - number of nonzeros per row (same for all rows)
283 -  nnz - array containing the number of nonzeros in the various rows
284          (possibly different for each row) or NULL
285 
286    Output Parameter:
287 .  A - the matrix
288 
289    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
290    MatXXXXSetPreallocation() paradigm instead of this routine directly.
291    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
292 
293    Notes:
294    If nnz is given then nz is ignored
295 
296    The AIJ format (also called the Yale sparse matrix format or
297    compressed row storage), is fully compatible with standard Fortran 77
298    storage.  That is, the stored row and column indices can begin at
299    either one (as in Fortran) or zero.  See the users' manual for details.
300 
301    Specify the preallocated storage with either nz or nnz (not both).
302    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
303    allocation.  For large problems you MUST preallocate memory or you
304    will get TERRIBLE performance, see the users' manual chapter on matrices.
305 
306    Level: intermediate
307 
308 .seealso: MatCreate(), MatCreateAIJ(), MatCreateAIJCUSPARSE(), MatSetValues(), MatSeqAIJSetColumnIndices(), MatCreateSeqAIJWithArrays(), MatCreateAIJ()
309 
310 @*/
311 PetscErrorCode  MatCreateSeqAIJViennaCL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
312 {
313   PetscErrorCode ierr;
314 
315   PetscFunctionBegin;
316   ierr = MatCreate(comm,A);CHKERRQ(ierr);
317   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
318   ierr = MatSetType(*A,MATSEQAIJVIENNACL);CHKERRQ(ierr);
319   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,(PetscInt*)nnz);CHKERRQ(ierr);
320   PetscFunctionReturn(0);
321 }
322 
323 
324 PetscErrorCode MatDestroy_SeqAIJViennaCL(Mat A)
325 {
326   PetscErrorCode ierr;
327   Mat_SeqAIJViennaCL *viennaclcontainer = (Mat_SeqAIJViennaCL*)A->spptr;
328 
329   PetscFunctionBegin;
330   try {
331     if (viennaclcontainer) {
332       delete viennaclcontainer->tempvec;
333       delete viennaclcontainer->mat;
334       delete viennaclcontainer->compressed_mat;
335       delete viennaclcontainer;
336     }
337     A->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
338   } catch(std::exception const & ex) {
339     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
340   }
341 
342   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqaij_seqaijviennacl_C",NULL);CHKERRQ(ierr);
343 
344   /* this next line is because MatDestroy tries to PetscFree spptr if it is not zero, and PetscFree only works if the memory was allocated with PetscNew or PetscMalloc, which don't call the constructor */
345   A->spptr = 0;
346   ierr     = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
347   PetscFunctionReturn(0);
348 }
349 
350 
351 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJViennaCL(Mat B)
352 {
353   PetscErrorCode ierr;
354 
355   PetscFunctionBegin;
356   ierr = MatCreate_SeqAIJ(B);CHKERRQ(ierr);
357   ierr = MatConvert_SeqAIJ_SeqAIJViennaCL(B,MATSEQAIJVIENNACL,MAT_INPLACE_MATRIX,&B);
358   PetscFunctionReturn(0);
359 }
360 
361 static PetscErrorCode MatPinToCPU_SeqAIJViennaCL(Mat,PetscBool);
362 static PetscErrorCode MatDuplicate_SeqAIJViennaCL(Mat A,MatDuplicateOption cpvalues,Mat *B)
363 {
364   PetscErrorCode ierr;
365   Mat            C;
366 
367   PetscFunctionBegin;
368   ierr = MatDuplicate_SeqAIJ(A,cpvalues,B);CHKERRQ(ierr);
369   C = *B;
370 
371   ierr = MatPinToCPU_SeqAIJViennaCL(A,PETSC_FALSE);CHKERRQ(ierr);
372   C->ops->pintocpu = MatPinToCPU_SeqAIJViennaCL;
373 
374   C->spptr = new Mat_SeqAIJViennaCL();
375   ((Mat_SeqAIJViennaCL*)C->spptr)->tempvec        = NULL;
376   ((Mat_SeqAIJViennaCL*)C->spptr)->mat            = NULL;
377   ((Mat_SeqAIJViennaCL*)C->spptr)->compressed_mat = NULL;
378 
379   ierr = PetscObjectChangeTypeName((PetscObject)C,MATSEQAIJVIENNACL);CHKERRQ(ierr);
380 
381   C->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
382 
383   /* If the source matrix is already assembled, copy the destination matrix to the GPU */
384   if (C->assembled) {
385     ierr = MatViennaCLCopyToGPU(C);CHKERRQ(ierr);
386   }
387 
388   PetscFunctionReturn(0);
389 }
390 
391 static PetscErrorCode MatSeqAIJGetArray_SeqAIJViennaCL(Mat A,PetscScalar *array[])
392 {
393   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
394   PetscErrorCode ierr;
395 
396   PetscFunctionBegin;
397   ierr = MatViennaCLCopyFromGPU(A,(const ViennaCLAIJMatrix *)NULL);CHKERRQ(ierr);
398   *array = a->a;
399   A->offloadmask = PETSC_OFFLOAD_CPU;
400   PetscFunctionReturn(0);
401 }
402 
403 
404 static PetscErrorCode MatSeqAIJRestoreArray_SeqAIJViennaCL(Mat A,PetscScalar *array[])
405 {
406   PetscFunctionBegin;
407   *array = NULL;
408   PetscFunctionReturn(0);
409 }
410 
411 static PetscErrorCode MatPinToCPU_SeqAIJViennaCL(Mat A,PetscBool flg)
412 {
413   PetscErrorCode ierr;
414 
415   PetscFunctionBegin;
416   A->pinnedtocpu = flg;
417   if (flg) {
418     /* make sure we have an up-to-date copy on the CPU */
419     ierr = MatViennaCLCopyFromGPU(A,(const ViennaCLAIJMatrix *)NULL);CHKERRQ(ierr);
420     ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqAIJGetArray_C",MatSeqAIJGetArray_SeqAIJ);CHKERRQ(ierr);
421     ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqAIJRestoreArray_C",MatSeqAIJRestoreArray_SeqAIJ);CHKERRQ(ierr);
422 
423     A->ops->mult        = MatMult_SeqAIJ;
424     A->ops->multadd     = MatMultAdd_SeqAIJ;
425     A->ops->assemblyend = MatAssemblyEnd_SeqAIJ;
426     A->ops->duplicate   = MatDuplicate_SeqAIJ;
427   } else {
428     ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqAIJGetArray_C",MatSeqAIJGetArray_SeqAIJViennaCL);CHKERRQ(ierr);
429     ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqAIJRestoreArray_C",MatSeqAIJRestoreArray_SeqAIJViennaCL);CHKERRQ(ierr);
430 
431     A->ops->mult        = MatMult_SeqAIJViennaCL;
432     A->ops->multadd     = MatMultAdd_SeqAIJViennaCL;
433     A->ops->assemblyend = MatAssemblyEnd_SeqAIJViennaCL;
434     A->ops->destroy     = MatDestroy_SeqAIJViennaCL;
435     A->ops->duplicate   = MatDuplicate_SeqAIJViennaCL;
436   }
437   PetscFunctionReturn(0);
438 }
439 
440 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJViennaCL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
441 {
442   PetscErrorCode ierr;
443   Mat            B;
444   Mat_SeqAIJ     *aij;
445 
446   PetscFunctionBegin;
447 
448   if (reuse == MAT_REUSE_MATRIX) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"MAT_REUSE_MATRIX is not supported. Consider using MAT_INPLACE_MATRIX instead");
449 
450   if (reuse == MAT_INITIAL_MATRIX) {
451     ierr = MatDuplicate(A,MAT_COPY_VALUES,newmat);CHKERRQ(ierr);
452   }
453 
454   B = *newmat;
455 
456   aij             = (Mat_SeqAIJ*)B->data;
457   aij->inode.use  = PETSC_FALSE;
458 
459   B->spptr        = new Mat_SeqAIJViennaCL();
460 
461   ((Mat_SeqAIJViennaCL*)B->spptr)->tempvec        = NULL;
462   ((Mat_SeqAIJViennaCL*)B->spptr)->mat            = NULL;
463   ((Mat_SeqAIJViennaCL*)B->spptr)->compressed_mat = NULL;
464 
465   ierr = MatPinToCPU_SeqAIJViennaCL(A,PETSC_FALSE);CHKERRQ(ierr);
466   A->ops->pintocpu = MatPinToCPU_SeqAIJViennaCL;
467 
468   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJVIENNACL);CHKERRQ(ierr);
469   ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr);
470   ierr = PetscStrallocpy(VECVIENNACL,&B->defaultvectype);CHKERRQ(ierr);
471 
472   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqaij_seqaijviennacl_C",MatConvert_SeqAIJ_SeqAIJViennaCL);CHKERRQ(ierr);
473 
474   B->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
475 
476   /* If the source matrix is already assembled, copy the destination matrix to the GPU */
477   if (B->assembled) {
478     ierr = MatViennaCLCopyToGPU(B);CHKERRQ(ierr);
479   }
480 
481   PetscFunctionReturn(0);
482 }
483 
484 
485 /*MC
486    MATSEQAIJVIENNACL - MATAIJVIENNACL = "aijviennacl" = "seqaijviennacl" - A matrix type to be used for sparse matrices.
487 
488    A matrix type type whose data resides on GPUs. These matrices are in CSR format by
489    default. All matrix calculations are performed using the ViennaCL library.
490 
491    Options Database Keys:
492 +  -mat_type aijviennacl - sets the matrix type to "seqaijviennacl" during a call to MatSetFromOptions()
493 .  -mat_viennacl_storage_format csr - sets the storage format of matrices for MatMult during a call to MatSetFromOptions().
494 -  -mat_viennacl_mult_storage_format csr - sets the storage format of matrices for MatMult during a call to MatSetFromOptions().
495 
496   Level: beginner
497 
498 .seealso: MatCreateSeqAIJViennaCL(), MATAIJVIENNACL, MatCreateAIJViennaCL()
499 M*/
500 
501 
502 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ViennaCL(void)
503 {
504   PetscErrorCode ierr;
505 
506   PetscFunctionBegin;
507   ierr = MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_LU,MatGetFactor_seqaij_petsc);CHKERRQ(ierr);
508   ierr = MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_CHOLESKY,MatGetFactor_seqaij_petsc);CHKERRQ(ierr);
509   ierr = MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_ILU,MatGetFactor_seqaij_petsc);CHKERRQ(ierr);
510   ierr = MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_ICC,MatGetFactor_seqaij_petsc);CHKERRQ(ierr);
511   PetscFunctionReturn(0);
512 }
513