xref: /petsc/src/mat/impls/aij/seq/seqviennacl/aijviennacl.cxx (revision 4e4bbfaa3814cc83b5851d85be69070845f5653e)
1 
2 
3 /*
4     Defines the basic matrix operations for the AIJ (compressed row)
5   matrix storage format.
6 */
7 
8 #include <petscconf.h>
9 #include <../src/mat/impls/aij/seq/aij.h>          /*I "petscmat.h" I*/
10 #include <petscbt.h>
11 #include <../src/vec/vec/impls/dvecimpl.h>
12 #include <petsc/private/vecimpl.h>
13 
14 #include <../src/mat/impls/aij/seq/seqviennacl/viennaclmatimpl.h>
15 
16 
17 #include <algorithm>
18 #include <vector>
19 #include <string>
20 
21 #include "viennacl/linalg/prod.hpp"
22 
23 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJViennaCL(Mat A, MatType type, MatReuse reuse, Mat *newmat);
24 PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat,MatFactorType,Mat*);
25 
26 
27 PetscErrorCode MatViennaCLCopyToGPU(Mat A)
28 {
29   Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr;
30   Mat_SeqAIJ         *a              = (Mat_SeqAIJ*)A->data;
31   PetscErrorCode     ierr;
32 
33   PetscFunctionBegin;
34   if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) { //some OpenCL SDKs have issues with buffers of size 0
35     if (A->valid_GPU_matrix == PETSC_OFFLOAD_UNALLOCATED || A->valid_GPU_matrix == PETSC_OFFLOAD_CPU) {
36       ierr = PetscLogEventBegin(MAT_ViennaCLCopyToGPU,A,0,0,0);CHKERRQ(ierr);
37 
38       try {
39         if (a->compressedrow.use) {
40           if (!viennaclstruct->compressed_mat) viennaclstruct->compressed_mat = new ViennaCLCompressedAIJMatrix();
41 
42           // Since PetscInt is different from cl_uint, we have to convert:
43           viennacl::backend::mem_handle dummy;
44 
45           viennacl::backend::typesafe_host_array<unsigned int> row_buffer; row_buffer.raw_resize(dummy, a->compressedrow.nrows+1);
46           for (PetscInt i=0; i<=a->compressedrow.nrows; ++i)
47             row_buffer.set(i, (a->compressedrow.i)[i]);
48 
49           viennacl::backend::typesafe_host_array<unsigned int> row_indices; row_indices.raw_resize(dummy, a->compressedrow.nrows);
50           for (PetscInt i=0; i<a->compressedrow.nrows; ++i)
51             row_indices.set(i, (a->compressedrow.rindex)[i]);
52 
53           viennacl::backend::typesafe_host_array<unsigned int> col_buffer; col_buffer.raw_resize(dummy, a->nz);
54           for (PetscInt i=0; i<a->nz; ++i)
55             col_buffer.set(i, (a->j)[i]);
56 
57           viennaclstruct->compressed_mat->set(row_buffer.get(), row_indices.get(), col_buffer.get(), a->a, A->rmap->n, A->cmap->n, a->compressedrow.nrows, a->nz);
58           ierr = PetscLogCpuToGpu(((2*a->compressedrow.nrows)+1+a->nz)*sizeof(PetscInt) + (a->nz)*sizeof(PetscScalar));CHKERRQ(ierr);
59         } else {
60           if (!viennaclstruct->mat) viennaclstruct->mat = new ViennaCLAIJMatrix();
61 
62           // Since PetscInt is in general different from cl_uint, we have to convert:
63           viennacl::backend::mem_handle dummy;
64 
65           viennacl::backend::typesafe_host_array<unsigned int> row_buffer; row_buffer.raw_resize(dummy, A->rmap->n+1);
66           for (PetscInt i=0; i<=A->rmap->n; ++i)
67             row_buffer.set(i, (a->i)[i]);
68 
69           viennacl::backend::typesafe_host_array<unsigned int> col_buffer; col_buffer.raw_resize(dummy, a->nz);
70           for (PetscInt i=0; i<a->nz; ++i)
71             col_buffer.set(i, (a->j)[i]);
72 
73           viennaclstruct->mat->set(row_buffer.get(), col_buffer.get(), a->a, A->rmap->n, A->cmap->n, a->nz);
74           ierr = PetscLogCpuToGpu(((A->rmap->n+1)+a->nz)*sizeof(PetscInt)+(a->nz)*sizeof(PetscScalar));CHKERRQ(ierr);
75         }
76         ViennaCLWaitForGPU();
77       } catch(std::exception const & ex) {
78         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
79       }
80 
81       // Create temporary vector for v += A*x:
82       if (viennaclstruct->tempvec) {
83         if (viennaclstruct->tempvec->size() != static_cast<std::size_t>(A->rmap->n)) {
84           delete (ViennaCLVector*)viennaclstruct->tempvec;
85           viennaclstruct->tempvec = new ViennaCLVector(A->rmap->n);
86         } else {
87           viennaclstruct->tempvec->clear();
88         }
89       } else {
90         viennaclstruct->tempvec = new ViennaCLVector(A->rmap->n);
91       }
92 
93       A->valid_GPU_matrix = PETSC_OFFLOAD_BOTH;
94 
95       ierr = PetscLogEventEnd(MAT_ViennaCLCopyToGPU,A,0,0,0);CHKERRQ(ierr);
96     }
97   }
98   PetscFunctionReturn(0);
99 }
100 
101 PetscErrorCode MatViennaCLCopyFromGPU(Mat A, const ViennaCLAIJMatrix *Agpu)
102 {
103   Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr;
104   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
105   PetscInt           m  = A->rmap->n;
106   PetscErrorCode     ierr;
107 
108   PetscFunctionBegin;
109   if (A->valid_GPU_matrix == PETSC_OFFLOAD_BOTH) PetscFunctionReturn(0);
110   if (A->valid_GPU_matrix == PETSC_OFFLOAD_UNALLOCATED && Agpu) {
111     try {
112       if (a->compressedrow.use) SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_ARG_WRONG, "ViennaCL: Cannot handle row compression for GPU matrices");
113       else {
114 
115         if ((PetscInt)Agpu->size1() != m) SETERRQ2(PETSC_COMM_WORLD, PETSC_ERR_ARG_SIZ, "GPU matrix has %d rows, should be %d", Agpu->size1(), m);
116         a->nz           = Agpu->nnz();
117         a->maxnz        = a->nz; /* Since we allocate exactly the right amount */
118         A->preallocated = PETSC_TRUE;
119         if (a->singlemalloc) {
120           if (a->a) {ierr = PetscFree3(a->a,a->j,a->i);CHKERRQ(ierr);}
121         } else {
122           if (a->i) {ierr = PetscFree(a->i);CHKERRQ(ierr);}
123           if (a->j) {ierr = PetscFree(a->j);CHKERRQ(ierr);}
124           if (a->a) {ierr = PetscFree(a->a);CHKERRQ(ierr);}
125         }
126         ierr = PetscMalloc3(a->nz,&a->a,a->nz,&a->j,m+1,&a->i);CHKERRQ(ierr);
127         ierr = PetscLogObjectMemory((PetscObject)A, a->nz*(sizeof(PetscScalar)+sizeof(PetscInt))+(m+1)*sizeof(PetscInt));CHKERRQ(ierr);
128 
129         a->singlemalloc = PETSC_TRUE;
130 
131         /* Setup row lengths */
132         ierr = PetscFree(a->imax);CHKERRQ(ierr);
133         ierr = PetscFree(a->ilen);CHKERRQ(ierr);
134         ierr = PetscMalloc1(m,&a->imax);CHKERRQ(ierr);
135         ierr = PetscMalloc1(m,&a->ilen);CHKERRQ(ierr);
136         ierr = PetscLogObjectMemory((PetscObject)A, 2*m*sizeof(PetscInt));CHKERRQ(ierr);
137 
138         /* Copy data back from GPU */
139         viennacl::backend::typesafe_host_array<unsigned int> row_buffer; row_buffer.raw_resize(Agpu->handle1(), Agpu->size1() + 1);
140 
141         // copy row array
142         viennacl::backend::memory_read(Agpu->handle1(), 0, row_buffer.raw_size(), row_buffer.get());
143         (a->i)[0] = row_buffer[0];
144         for (PetscInt i = 0; i < (PetscInt)Agpu->size1(); ++i) {
145           (a->i)[i+1] = row_buffer[i+1];
146           a->imax[i]  = a->ilen[i] = a->i[i+1] - a->i[i];  //Set imax[] and ilen[] arrays at the same time as i[] for better cache reuse
147         }
148 
149         // copy column indices
150         viennacl::backend::typesafe_host_array<unsigned int> col_buffer; col_buffer.raw_resize(Agpu->handle2(), Agpu->nnz());
151         viennacl::backend::memory_read(Agpu->handle2(), 0, col_buffer.raw_size(), col_buffer.get());
152         for (PetscInt i=0; i < (PetscInt)Agpu->nnz(); ++i)
153           (a->j)[i] = col_buffer[i];
154 
155         // copy nonzero entries directly to destination (no conversion required)
156         viennacl::backend::memory_read(Agpu->handle(), 0, sizeof(PetscScalar)*Agpu->nnz(), a->a);
157 
158         ierr = PetscLogGpuToCpu(row_buffer.raw_size()+col_buffer.raw_size()+(Agpu->nnz()*sizeof(PetscScalar)));CHKERRQ(ierr);
159         ViennaCLWaitForGPU();
160         /* TODO: Once a->diag is moved out of MatAssemblyEnd(), invalidate it here. */
161       }
162     } catch(std::exception const & ex) {
163       SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
164     }
165   } else if (A->valid_GPU_matrix == PETSC_OFFLOAD_UNALLOCATED) {
166     PetscFunctionReturn(0);
167   } else {
168     if (!Agpu && A->valid_GPU_matrix != PETSC_OFFLOAD_GPU) PetscFunctionReturn(0);
169 
170     if (a->compressedrow.use) SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_ARG_WRONG, "ViennaCL: Cannot handle row compression for GPU matrices");
171     if (!Agpu) {
172       viennacl::backend::memory_read(viennaclstruct->mat->handle(), 0, sizeof(PetscScalar)*viennaclstruct->mat->nnz(), a->a);
173     } else {
174       viennacl::backend::memory_read(Agpu->handle(), 0, sizeof(PetscScalar)*Agpu->nnz(), a->a);
175     }
176   }
177   A->valid_GPU_matrix = PETSC_OFFLOAD_BOTH;
178   /* This assembly prevents resetting the flag to PETSC_OFFLOAD_CPU and recopying */
179   ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
180   ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
181   PetscFunctionReturn(0);
182 }
183 
184 PetscErrorCode MatMult_SeqAIJViennaCL(Mat A,Vec xx,Vec yy)
185 {
186   Mat_SeqAIJ           *a = (Mat_SeqAIJ*)A->data;
187   PetscErrorCode       ierr;
188   Mat_SeqAIJViennaCL   *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr;
189   const ViennaCLVector *xgpu=NULL;
190   ViennaCLVector       *ygpu=NULL;
191 
192   PetscFunctionBegin;
193   if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) {
194     ierr = VecViennaCLGetArrayRead(xx,&xgpu);CHKERRQ(ierr);
195     ierr = VecViennaCLGetArrayWrite(yy,&ygpu);CHKERRQ(ierr);
196     ierr = PetscLogGpuTimeBegin();CHKERRQ(ierr);
197     try {
198       if (a->compressedrow.use) {
199         *ygpu = viennacl::linalg::prod(*viennaclstruct->compressed_mat, *xgpu);
200       } else {
201         *ygpu = viennacl::linalg::prod(*viennaclstruct->mat,*xgpu);
202       }
203       ViennaCLWaitForGPU();
204     } catch (std::exception const & ex) {
205       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
206     }
207     ierr = PetscLogGpuTimeEnd();CHKERRQ(ierr);
208     ierr = VecViennaCLRestoreArrayRead(xx,&xgpu);CHKERRQ(ierr);
209     ierr = VecViennaCLRestoreArrayWrite(yy,&ygpu);CHKERRQ(ierr);
210     ierr = PetscLogGpuFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
211   } else {
212     ierr = VecSet(yy,0);CHKERRQ(ierr);
213   }
214   PetscFunctionReturn(0);
215 }
216 
217 PetscErrorCode MatMultAdd_SeqAIJViennaCL(Mat A,Vec xx,Vec yy,Vec zz)
218 {
219   Mat_SeqAIJ           *a = (Mat_SeqAIJ*)A->data;
220   PetscErrorCode       ierr;
221   Mat_SeqAIJViennaCL   *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr;
222   const ViennaCLVector *xgpu=NULL,*ygpu=NULL;
223   ViennaCLVector       *zgpu=NULL;
224 
225   PetscFunctionBegin;
226   if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) {
227     try {
228       ierr = VecViennaCLGetArrayRead(xx,&xgpu);CHKERRQ(ierr);
229       ierr = VecViennaCLGetArrayRead(yy,&ygpu);CHKERRQ(ierr);
230       ierr = VecViennaCLGetArrayWrite(zz,&zgpu);CHKERRQ(ierr);
231       ierr = PetscLogGpuTimeBegin();CHKERRQ(ierr);
232       if (a->compressedrow.use) *viennaclstruct->tempvec = viennacl::linalg::prod(*viennaclstruct->compressed_mat, *xgpu);
233       else *viennaclstruct->tempvec = viennacl::linalg::prod(*viennaclstruct->mat, *xgpu);
234       if (zz != yy) *zgpu = *ygpu + *viennaclstruct->tempvec;
235       else *zgpu += *viennaclstruct->tempvec;
236       ViennaCLWaitForGPU();
237       ierr = PetscLogGpuTimeEnd();CHKERRQ(ierr);
238       ierr = VecViennaCLRestoreArrayRead(xx,&xgpu);CHKERRQ(ierr);
239       ierr = VecViennaCLRestoreArrayRead(yy,&ygpu);CHKERRQ(ierr);
240       ierr = VecViennaCLRestoreArrayWrite(zz,&zgpu);CHKERRQ(ierr);
241 
242     } catch(std::exception const & ex) {
243       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
244     }
245     ierr = PetscLogGpuFlops(2.0*a->nz);CHKERRQ(ierr);
246   } else {
247     ierr = VecCopy(yy,zz);CHKERRQ(ierr);
248   }
249   PetscFunctionReturn(0);
250 }
251 
252 PetscErrorCode MatAssemblyEnd_SeqAIJViennaCL(Mat A,MatAssemblyType mode)
253 {
254   PetscErrorCode ierr;
255 
256   PetscFunctionBegin;
257   ierr = MatAssemblyEnd_SeqAIJ(A,mode);CHKERRQ(ierr);
258   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
259   if (!A->pinnedtocpu) {
260     ierr = MatViennaCLCopyToGPU(A);CHKERRQ(ierr);
261   }
262   PetscFunctionReturn(0);
263 }
264 
265 /* --------------------------------------------------------------------------------*/
266 /*@
267    MatCreateSeqAIJViennaCL - Creates a sparse matrix in AIJ (compressed row) format
268    (the default parallel PETSc format).  This matrix will ultimately be pushed down
269    to GPUs and use the ViennaCL library for calculations. For good matrix
270    assembly performance the user should preallocate the matrix storage by setting
271    the parameter nz (or the array nnz).  By setting these parameters accurately,
272    performance during matrix assembly can be increased substantially.
273 
274 
275    Collective
276 
277    Input Parameters:
278 +  comm - MPI communicator, set to PETSC_COMM_SELF
279 .  m - number of rows
280 .  n - number of columns
281 .  nz - number of nonzeros per row (same for all rows)
282 -  nnz - array containing the number of nonzeros in the various rows
283          (possibly different for each row) or NULL
284 
285    Output Parameter:
286 .  A - the matrix
287 
288    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
289    MatXXXXSetPreallocation() paradigm instead of this routine directly.
290    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
291 
292    Notes:
293    If nnz is given then nz is ignored
294 
295    The AIJ format (also called the Yale sparse matrix format or
296    compressed row storage), is fully compatible with standard Fortran 77
297    storage.  That is, the stored row and column indices can begin at
298    either one (as in Fortran) or zero.  See the users' manual for details.
299 
300    Specify the preallocated storage with either nz or nnz (not both).
301    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
302    allocation.  For large problems you MUST preallocate memory or you
303    will get TERRIBLE performance, see the users' manual chapter on matrices.
304 
305    Level: intermediate
306 
307 .seealso: MatCreate(), MatCreateAIJ(), MatCreateAIJCUSPARSE(), MatSetValues(), MatSeqAIJSetColumnIndices(), MatCreateSeqAIJWithArrays(), MatCreateAIJ()
308 
309 @*/
310 PetscErrorCode  MatCreateSeqAIJViennaCL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
311 {
312   PetscErrorCode ierr;
313 
314   PetscFunctionBegin;
315   ierr = MatCreate(comm,A);CHKERRQ(ierr);
316   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
317   ierr = MatSetType(*A,MATSEQAIJVIENNACL);CHKERRQ(ierr);
318   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,(PetscInt*)nnz);CHKERRQ(ierr);
319   PetscFunctionReturn(0);
320 }
321 
322 
323 PetscErrorCode MatDestroy_SeqAIJViennaCL(Mat A)
324 {
325   PetscErrorCode ierr;
326   Mat_SeqAIJViennaCL *viennaclcontainer = (Mat_SeqAIJViennaCL*)A->spptr;
327 
328   PetscFunctionBegin;
329   try {
330     if (viennaclcontainer) {
331       delete viennaclcontainer->tempvec;
332       delete viennaclcontainer->mat;
333       delete viennaclcontainer->compressed_mat;
334       delete viennaclcontainer;
335     }
336     A->valid_GPU_matrix = PETSC_OFFLOAD_UNALLOCATED;
337   } catch(std::exception const & ex) {
338     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
339   }
340 
341   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqaij_seqaijviennacl_C",NULL);CHKERRQ(ierr);
342 
343   /* this next line is because MatDestroy tries to PetscFree spptr if it is not zero, and PetscFree only works if the memory was allocated with PetscNew or PetscMalloc, which don't call the constructor */
344   A->spptr = 0;
345   ierr     = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
346   PetscFunctionReturn(0);
347 }
348 
349 
350 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJViennaCL(Mat B)
351 {
352   PetscErrorCode ierr;
353 
354   PetscFunctionBegin;
355   ierr = MatCreate_SeqAIJ(B);CHKERRQ(ierr);
356   ierr = MatConvert_SeqAIJ_SeqAIJViennaCL(B,MATSEQAIJVIENNACL,MAT_INPLACE_MATRIX,&B);
357   PetscFunctionReturn(0);
358 }
359 
360 static PetscErrorCode MatPinToCPU_SeqAIJViennaCL(Mat,PetscBool);
361 static PetscErrorCode MatDuplicate_SeqAIJViennaCL(Mat A,MatDuplicateOption cpvalues,Mat *B)
362 {
363   PetscErrorCode ierr;
364   Mat            C;
365 
366   PetscFunctionBegin;
367   ierr = MatDuplicate_SeqAIJ(A,cpvalues,B);CHKERRQ(ierr);
368   C = *B;
369 
370   ierr = MatPinToCPU_SeqAIJViennaCL(A,PETSC_FALSE);CHKERRQ(ierr);
371   C->ops->pintocpu = MatPinToCPU_SeqAIJViennaCL;
372 
373   C->spptr = new Mat_SeqAIJViennaCL();
374   ((Mat_SeqAIJViennaCL*)C->spptr)->tempvec        = NULL;
375   ((Mat_SeqAIJViennaCL*)C->spptr)->mat            = NULL;
376   ((Mat_SeqAIJViennaCL*)C->spptr)->compressed_mat = NULL;
377 
378   ierr = PetscObjectChangeTypeName((PetscObject)C,MATSEQAIJVIENNACL);CHKERRQ(ierr);
379 
380   C->valid_GPU_matrix = PETSC_OFFLOAD_UNALLOCATED;
381 
382   /* If the source matrix is already assembled, copy the destination matrix to the GPU */
383   if (C->assembled) {
384     ierr = MatViennaCLCopyToGPU(C);CHKERRQ(ierr);
385   }
386 
387   PetscFunctionReturn(0);
388 }
389 
390 static PetscErrorCode MatSeqAIJGetArray_SeqAIJViennaCL(Mat A,PetscScalar *array[])
391 {
392   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
393   PetscErrorCode ierr;
394 
395   PetscFunctionBegin;
396   ierr = MatViennaCLCopyFromGPU(A,(const ViennaCLAIJMatrix *)NULL);CHKERRQ(ierr);
397   *array = a->a;
398   A->valid_GPU_matrix = PETSC_OFFLOAD_CPU;
399   PetscFunctionReturn(0);
400 }
401 
402 
403 static PetscErrorCode MatSeqAIJRestoreArray_SeqAIJViennaCL(Mat A,PetscScalar *array[])
404 {
405   PetscFunctionBegin;
406   *array = NULL;
407   PetscFunctionReturn(0);
408 }
409 
410 static PetscErrorCode MatPinToCPU_SeqAIJViennaCL(Mat A,PetscBool flg)
411 {
412   PetscErrorCode ierr;
413 
414   PetscFunctionBegin;
415   A->pinnedtocpu = flg;
416   if (flg) {
417     /* make sure we have an up-to-date copy on the CPU */
418     ierr = MatViennaCLCopyFromGPU(A,(const ViennaCLAIJMatrix *)NULL);CHKERRQ(ierr);
419     ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqAIJGetArray_C",MatSeqAIJGetArray_SeqAIJ);CHKERRQ(ierr);
420     ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqAIJRestoreArray_C",MatSeqAIJRestoreArray_SeqAIJ);CHKERRQ(ierr);
421 
422     A->ops->mult        = MatMult_SeqAIJ;
423     A->ops->multadd     = MatMultAdd_SeqAIJ;
424     A->ops->assemblyend = MatAssemblyEnd_SeqAIJ;
425     A->ops->duplicate   = MatDuplicate_SeqAIJ;
426   } else {
427     ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqAIJGetArray_C",MatSeqAIJGetArray_SeqAIJViennaCL);CHKERRQ(ierr);
428     ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqAIJRestoreArray_C",MatSeqAIJRestoreArray_SeqAIJViennaCL);CHKERRQ(ierr);
429 
430     A->ops->mult        = MatMult_SeqAIJViennaCL;
431     A->ops->multadd     = MatMultAdd_SeqAIJViennaCL;
432     A->ops->assemblyend = MatAssemblyEnd_SeqAIJViennaCL;
433     A->ops->destroy     = MatDestroy_SeqAIJViennaCL;
434     A->ops->duplicate   = MatDuplicate_SeqAIJViennaCL;
435   }
436   PetscFunctionReturn(0);
437 }
438 
439 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJViennaCL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
440 {
441   PetscErrorCode ierr;
442   Mat            B;
443   Mat_SeqAIJ     *aij;
444 
445   PetscFunctionBegin;
446 
447   if (reuse == MAT_REUSE_MATRIX) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"MAT_REUSE_MATRIX is not supported. Consider using MAT_INPLACE_MATRIX instead");
448 
449   if (reuse == MAT_INITIAL_MATRIX) {
450     ierr = MatDuplicate(A,MAT_COPY_VALUES,newmat);CHKERRQ(ierr);
451   }
452 
453   B = *newmat;
454 
455   aij             = (Mat_SeqAIJ*)B->data;
456   aij->inode.use  = PETSC_FALSE;
457 
458   B->spptr        = new Mat_SeqAIJViennaCL();
459 
460   ((Mat_SeqAIJViennaCL*)B->spptr)->tempvec        = NULL;
461   ((Mat_SeqAIJViennaCL*)B->spptr)->mat            = NULL;
462   ((Mat_SeqAIJViennaCL*)B->spptr)->compressed_mat = NULL;
463 
464   ierr = MatPinToCPU_SeqAIJViennaCL(A,PETSC_FALSE);CHKERRQ(ierr);
465   A->ops->pintocpu = MatPinToCPU_SeqAIJViennaCL;
466 
467   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJVIENNACL);CHKERRQ(ierr);
468   ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr);
469   ierr = PetscStrallocpy(VECVIENNACL,&B->defaultvectype);CHKERRQ(ierr);
470 
471   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqaij_seqaijviennacl_C",MatConvert_SeqAIJ_SeqAIJViennaCL);CHKERRQ(ierr);
472 
473   B->valid_GPU_matrix = PETSC_OFFLOAD_UNALLOCATED;
474 
475   /* If the source matrix is already assembled, copy the destination matrix to the GPU */
476   if (B->assembled) {
477     ierr = MatViennaCLCopyToGPU(B);CHKERRQ(ierr);
478   }
479 
480   PetscFunctionReturn(0);
481 }
482 
483 
484 /*MC
485    MATSEQAIJVIENNACL - MATAIJVIENNACL = "aijviennacl" = "seqaijviennacl" - A matrix type to be used for sparse matrices.
486 
487    A matrix type type whose data resides on GPUs. These matrices are in CSR format by
488    default. All matrix calculations are performed using the ViennaCL library.
489 
490    Options Database Keys:
491 +  -mat_type aijviennacl - sets the matrix type to "seqaijviennacl" during a call to MatSetFromOptions()
492 .  -mat_viennacl_storage_format csr - sets the storage format of matrices for MatMult during a call to MatSetFromOptions().
493 -  -mat_viennacl_mult_storage_format csr - sets the storage format of matrices for MatMult during a call to MatSetFromOptions().
494 
495   Level: beginner
496 
497 .seealso: MatCreateSeqAIJViennaCL(), MATAIJVIENNACL, MatCreateAIJViennaCL()
498 M*/
499 
500 
501 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ViennaCL(void)
502 {
503   PetscErrorCode ierr;
504 
505   PetscFunctionBegin;
506   ierr = MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_LU,MatGetFactor_seqaij_petsc);CHKERRQ(ierr);
507   ierr = MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_CHOLESKY,MatGetFactor_seqaij_petsc);CHKERRQ(ierr);
508   ierr = MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_ILU,MatGetFactor_seqaij_petsc);CHKERRQ(ierr);
509   ierr = MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_ICC,MatGetFactor_seqaij_petsc);CHKERRQ(ierr);
510   PetscFunctionReturn(0);
511 }
512