xref: /petsc/src/mat/impls/aij/seq/seqviennacl/aijviennacl.cxx (revision 5a856986583887c326abe5dfd149e8184a29cd80)
1 
2 
3 /*
4     Defines the basic matrix operations for the AIJ (compressed row)
5   matrix storage format.
6 */
7 
8 #include <petscconf.h>
9 #include <../src/mat/impls/aij/seq/aij.h>          /*I "petscmat.h" I*/
10 #include <petscbt.h>
11 #include <../src/vec/vec/impls/dvecimpl.h>
12 #include <petsc/private/vecimpl.h>
13 
14 #include <../src/mat/impls/aij/seq/seqviennacl/viennaclmatimpl.h>
15 
16 
17 #include <algorithm>
18 #include <vector>
19 #include <string>
20 
21 #include "viennacl/linalg/prod.hpp"
22 
23 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJViennaCL(Mat A, MatType type, MatReuse reuse, Mat *newmat);
24 PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat,MatFactorType,Mat*);
25 
26 
27 PetscErrorCode MatViennaCLCopyToGPU(Mat A)
28 {
29 
30   Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr;
31   Mat_SeqAIJ         *a              = (Mat_SeqAIJ*)A->data;
32   PetscErrorCode     ierr;
33 
34   PetscFunctionBegin;
35   if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) { //some OpenCL SDKs have issues with buffers of size 0
36     if (A->valid_GPU_matrix == PETSC_OFFLOAD_UNALLOCATED || A->valid_GPU_matrix == PETSC_OFFLOAD_CPU) {
37       ierr = PetscLogEventBegin(MAT_ViennaCLCopyToGPU,A,0,0,0);CHKERRQ(ierr);
38 
39       try {
40         if (a->compressedrow.use) {
41           if (!viennaclstruct->compressed_mat) viennaclstruct->compressed_mat = new ViennaCLCompressedAIJMatrix();
42 
43           // Since PetscInt is different from cl_uint, we have to convert:
44           viennacl::backend::mem_handle dummy;
45 
46           viennacl::backend::typesafe_host_array<unsigned int> row_buffer; row_buffer.raw_resize(dummy, a->compressedrow.nrows+1);
47           for (PetscInt i=0; i<=a->compressedrow.nrows; ++i)
48             row_buffer.set(i, (a->compressedrow.i)[i]);
49 
50           viennacl::backend::typesafe_host_array<unsigned int> row_indices; row_indices.raw_resize(dummy, a->compressedrow.nrows);
51           for (PetscInt i=0; i<a->compressedrow.nrows; ++i)
52             row_indices.set(i, (a->compressedrow.rindex)[i]);
53 
54           viennacl::backend::typesafe_host_array<unsigned int> col_buffer; col_buffer.raw_resize(dummy, a->nz);
55           for (PetscInt i=0; i<a->nz; ++i)
56             col_buffer.set(i, (a->j)[i]);
57 
58           viennaclstruct->compressed_mat->set(row_buffer.get(), row_indices.get(), col_buffer.get(), a->a, A->rmap->n, A->cmap->n, a->compressedrow.nrows, a->nz);
59         } else {
60           if (!viennaclstruct->mat) viennaclstruct->mat = new ViennaCLAIJMatrix();
61 
62           // Since PetscInt is in general different from cl_uint, we have to convert:
63           viennacl::backend::mem_handle dummy;
64 
65           viennacl::backend::typesafe_host_array<unsigned int> row_buffer; row_buffer.raw_resize(dummy, A->rmap->n+1);
66           for (PetscInt i=0; i<=A->rmap->n; ++i)
67             row_buffer.set(i, (a->i)[i]);
68 
69           viennacl::backend::typesafe_host_array<unsigned int> col_buffer; col_buffer.raw_resize(dummy, a->nz);
70           for (PetscInt i=0; i<a->nz; ++i)
71             col_buffer.set(i, (a->j)[i]);
72 
73           viennaclstruct->mat->set(row_buffer.get(), col_buffer.get(), a->a, A->rmap->n, A->cmap->n, a->nz);
74         }
75         ViennaCLWaitForGPU();
76       } catch(std::exception const & ex) {
77         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
78       }
79 
80       // Create temporary vector for v += A*x:
81       if (viennaclstruct->tempvec) {
82         if (viennaclstruct->tempvec->size() != static_cast<std::size_t>(A->rmap->n)) {
83           delete (ViennaCLVector*)viennaclstruct->tempvec;
84           viennaclstruct->tempvec = new ViennaCLVector(A->rmap->n);
85         } else {
86           viennaclstruct->tempvec->clear();
87         }
88       } else {
89         viennaclstruct->tempvec = new ViennaCLVector(A->rmap->n);
90       }
91 
92       A->valid_GPU_matrix = PETSC_OFFLOAD_BOTH;
93 
94       ierr = PetscLogEventEnd(MAT_ViennaCLCopyToGPU,A,0,0,0);CHKERRQ(ierr);
95     }
96   }
97   PetscFunctionReturn(0);
98 }
99 
100 PetscErrorCode MatViennaCLCopyFromGPU(Mat A, const ViennaCLAIJMatrix *Agpu)
101 {
102   Mat_SeqAIJ         *a              = (Mat_SeqAIJ*)A->data;
103   PetscInt           m               = A->rmap->n;
104   PetscErrorCode     ierr;
105 
106 
107   PetscFunctionBegin;
108   if (A->valid_GPU_matrix == PETSC_OFFLOAD_UNALLOCATED) {
109     try {
110       if (a->compressedrow.use) SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_ARG_WRONG, "ViennaCL: Cannot handle row compression for GPU matrices");
111       else {
112 
113         if ((PetscInt)Agpu->size1() != m) SETERRQ2(PETSC_COMM_WORLD, PETSC_ERR_ARG_SIZ, "GPU matrix has %d rows, should be %d", Agpu->size1(), m);
114         a->nz           = Agpu->nnz();
115         a->maxnz        = a->nz; /* Since we allocate exactly the right amount */
116         A->preallocated = PETSC_TRUE;
117         if (a->singlemalloc) {
118           if (a->a) {ierr = PetscFree3(a->a,a->j,a->i);CHKERRQ(ierr);}
119         } else {
120           if (a->i) {ierr = PetscFree(a->i);CHKERRQ(ierr);}
121           if (a->j) {ierr = PetscFree(a->j);CHKERRQ(ierr);}
122           if (a->a) {ierr = PetscFree(a->a);CHKERRQ(ierr);}
123         }
124         ierr = PetscMalloc3(a->nz,&a->a,a->nz,&a->j,m+1,&a->i);CHKERRQ(ierr);
125         ierr = PetscLogObjectMemory((PetscObject)A, a->nz*(sizeof(PetscScalar)+sizeof(PetscInt))+(m+1)*sizeof(PetscInt));CHKERRQ(ierr);
126 
127         a->singlemalloc = PETSC_TRUE;
128 
129         /* Setup row lengths */
130         ierr = PetscFree(a->imax);CHKERRQ(ierr);
131         ierr = PetscFree(a->ilen);CHKERRQ(ierr);
132         ierr = PetscMalloc1(m,&a->imax);CHKERRQ(ierr);
133         ierr = PetscMalloc1(m,&a->ilen);CHKERRQ(ierr);
134         ierr = PetscLogObjectMemory((PetscObject)A, 2*m*sizeof(PetscInt));CHKERRQ(ierr);
135 
136         /* Copy data back from GPU */
137         viennacl::backend::typesafe_host_array<unsigned int> row_buffer; row_buffer.raw_resize(Agpu->handle1(), Agpu->size1() + 1);
138 
139         // copy row array
140         viennacl::backend::memory_read(Agpu->handle1(), 0, row_buffer.raw_size(), row_buffer.get());
141         (a->i)[0] = row_buffer[0];
142         for (PetscInt i = 0; i < (PetscInt)Agpu->size1(); ++i) {
143           (a->i)[i+1] = row_buffer[i+1];
144           a->imax[i]  = a->ilen[i] = a->i[i+1] - a->i[i];  //Set imax[] and ilen[] arrays at the same time as i[] for better cache reuse
145         }
146 
147         // copy column indices
148         viennacl::backend::typesafe_host_array<unsigned int> col_buffer; col_buffer.raw_resize(Agpu->handle2(), Agpu->nnz());
149         viennacl::backend::memory_read(Agpu->handle2(), 0, col_buffer.raw_size(), col_buffer.get());
150         for (PetscInt i=0; i < (PetscInt)Agpu->nnz(); ++i)
151           (a->j)[i] = col_buffer[i];
152 
153         // copy nonzero entries directly to destination (no conversion required)
154         viennacl::backend::memory_read(Agpu->handle(), 0, sizeof(PetscScalar)*Agpu->nnz(), a->a);
155 
156         ViennaCLWaitForGPU();
157         /* TODO: Once a->diag is moved out of MatAssemblyEnd(), invalidate it here. */
158       }
159     } catch(std::exception const & ex) {
160       SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
161     }
162 
163     /* This assembly prevents resetting the flag to PETSC_OFFLOAD_CPU and recopying */
164     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
165     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
166 
167     A->valid_GPU_matrix = PETSC_OFFLOAD_BOTH;
168   } else SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_ARG_WRONG, "ViennaCL error: Only valid for unallocated GPU matrices");
169   PetscFunctionReturn(0);
170 }
171 
172 PetscErrorCode MatMult_SeqAIJViennaCL(Mat A,Vec xx,Vec yy)
173 {
174   Mat_SeqAIJ           *a = (Mat_SeqAIJ*)A->data;
175   PetscErrorCode       ierr;
176   Mat_SeqAIJViennaCL   *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr;
177   const ViennaCLVector *xgpu=NULL;
178   ViennaCLVector       *ygpu=NULL;
179 
180   PetscFunctionBegin;
181   if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) {
182     ierr = VecViennaCLGetArrayRead(xx,&xgpu);CHKERRQ(ierr);
183     ierr = VecViennaCLGetArrayWrite(yy,&ygpu);CHKERRQ(ierr);
184     try {
185       if (a->compressedrow.use) {
186         *ygpu = viennacl::linalg::prod(*viennaclstruct->compressed_mat, *xgpu);
187       } else {
188         *ygpu = viennacl::linalg::prod(*viennaclstruct->mat,*xgpu);
189       }
190       ViennaCLWaitForGPU();
191     } catch (std::exception const & ex) {
192       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
193     }
194     ierr = VecViennaCLRestoreArrayRead(xx,&xgpu);CHKERRQ(ierr);
195     ierr = VecViennaCLRestoreArrayWrite(yy,&ygpu);CHKERRQ(ierr);
196     ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
197   } else {
198     ierr = VecSet(yy,0);CHKERRQ(ierr);
199   }
200   PetscFunctionReturn(0);
201 }
202 
203 PetscErrorCode MatMultAdd_SeqAIJViennaCL(Mat A,Vec xx,Vec yy,Vec zz)
204 {
205   Mat_SeqAIJ           *a = (Mat_SeqAIJ*)A->data;
206   PetscErrorCode       ierr;
207   Mat_SeqAIJViennaCL   *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr;
208   const ViennaCLVector *xgpu=NULL,*ygpu=NULL;
209   ViennaCLVector       *zgpu=NULL;
210 
211   PetscFunctionBegin;
212   if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) {
213     try {
214       ierr = VecViennaCLGetArrayRead(xx,&xgpu);CHKERRQ(ierr);
215       ierr = VecViennaCLGetArrayRead(yy,&ygpu);CHKERRQ(ierr);
216       ierr = VecViennaCLGetArrayWrite(zz,&zgpu);CHKERRQ(ierr);
217 
218       if (a->compressedrow.use) {
219         ViennaCLVector temp = viennacl::linalg::prod(*viennaclstruct->compressed_mat, *xgpu);
220         *zgpu = *ygpu + temp;
221         ViennaCLWaitForGPU();
222       } else {
223         if (zz == xx || zz == yy) { //temporary required
224           ViennaCLVector temp = viennacl::linalg::prod(*viennaclstruct->mat, *xgpu);
225           *zgpu = *ygpu;
226           *zgpu += temp;
227           ViennaCLWaitForGPU();
228         } else {
229           *viennaclstruct->tempvec = viennacl::linalg::prod(*viennaclstruct->mat, *xgpu);
230           *zgpu = *ygpu + *viennaclstruct->tempvec;
231           ViennaCLWaitForGPU();
232         }
233       }
234 
235       ierr = VecViennaCLRestoreArrayRead(xx,&xgpu);CHKERRQ(ierr);
236       ierr = VecViennaCLRestoreArrayRead(yy,&ygpu);CHKERRQ(ierr);
237       ierr = VecViennaCLRestoreArrayWrite(zz,&zgpu);CHKERRQ(ierr);
238 
239     } catch(std::exception const & ex) {
240       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
241     }
242     ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
243   } else {
244     ierr = VecCopy(yy,zz);CHKERRQ(ierr);
245   }
246   PetscFunctionReturn(0);
247 }
248 
249 PetscErrorCode MatAssemblyEnd_SeqAIJViennaCL(Mat A,MatAssemblyType mode)
250 {
251   PetscErrorCode ierr;
252 
253   PetscFunctionBegin;
254   ierr = MatAssemblyEnd_SeqAIJ(A,mode);CHKERRQ(ierr);
255   if (!A->pinnedtocpu) {
256     ierr = MatViennaCLCopyToGPU(A);CHKERRQ(ierr);
257   }
258   PetscFunctionReturn(0);
259 }
260 
261 /* --------------------------------------------------------------------------------*/
262 /*@
263    MatCreateSeqAIJViennaCL - Creates a sparse matrix in AIJ (compressed row) format
264    (the default parallel PETSc format).  This matrix will ultimately be pushed down
265    to GPUs and use the ViennaCL library for calculations. For good matrix
266    assembly performance the user should preallocate the matrix storage by setting
267    the parameter nz (or the array nnz).  By setting these parameters accurately,
268    performance during matrix assembly can be increased substantially.
269 
270 
271    Collective
272 
273    Input Parameters:
274 +  comm - MPI communicator, set to PETSC_COMM_SELF
275 .  m - number of rows
276 .  n - number of columns
277 .  nz - number of nonzeros per row (same for all rows)
278 -  nnz - array containing the number of nonzeros in the various rows
279          (possibly different for each row) or NULL
280 
281    Output Parameter:
282 .  A - the matrix
283 
284    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
285    MatXXXXSetPreallocation() paradigm instead of this routine directly.
286    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
287 
288    Notes:
289    If nnz is given then nz is ignored
290 
291    The AIJ format (also called the Yale sparse matrix format or
292    compressed row storage), is fully compatible with standard Fortran 77
293    storage.  That is, the stored row and column indices can begin at
294    either one (as in Fortran) or zero.  See the users' manual for details.
295 
296    Specify the preallocated storage with either nz or nnz (not both).
297    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
298    allocation.  For large problems you MUST preallocate memory or you
299    will get TERRIBLE performance, see the users' manual chapter on matrices.
300 
301    Level: intermediate
302 
303 .seealso: MatCreate(), MatCreateAIJ(), MatCreateAIJCUSPARSE(), MatSetValues(), MatSeqAIJSetColumnIndices(), MatCreateSeqAIJWithArrays(), MatCreateAIJ()
304 
305 @*/
306 PetscErrorCode  MatCreateSeqAIJViennaCL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
307 {
308   PetscErrorCode ierr;
309 
310   PetscFunctionBegin;
311   ierr = MatCreate(comm,A);CHKERRQ(ierr);
312   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
313   ierr = MatSetType(*A,MATSEQAIJVIENNACL);CHKERRQ(ierr);
314   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,(PetscInt*)nnz);CHKERRQ(ierr);
315   PetscFunctionReturn(0);
316 }
317 
318 
319 PetscErrorCode MatDestroy_SeqAIJViennaCL(Mat A)
320 {
321   PetscErrorCode ierr;
322   Mat_SeqAIJViennaCL *viennaclcontainer = (Mat_SeqAIJViennaCL*)A->spptr;
323 
324   PetscFunctionBegin;
325   try {
326     if (viennaclcontainer) {
327       delete viennaclcontainer->tempvec;
328       delete viennaclcontainer->mat;
329       delete viennaclcontainer->compressed_mat;
330       delete viennaclcontainer;
331     }
332     A->valid_GPU_matrix = PETSC_OFFLOAD_UNALLOCATED;
333   } catch(std::exception const & ex) {
334     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
335   }
336 
337   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqaij_seqaijviennacl_C",NULL);CHKERRQ(ierr);
338 
339   /* this next line is because MatDestroy tries to PetscFree spptr if it is not zero, and PetscFree only works if the memory was allocated with PetscNew or PetscMalloc, which don't call the constructor */
340   A->spptr = 0;
341   ierr     = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
342   PetscFunctionReturn(0);
343 }
344 
345 
346 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJViennaCL(Mat B)
347 {
348   PetscErrorCode ierr;
349 
350   PetscFunctionBegin;
351   ierr = MatCreate_SeqAIJ(B);CHKERRQ(ierr);
352   ierr = MatConvert_SeqAIJ_SeqAIJViennaCL(B,MATSEQAIJVIENNACL,MAT_INPLACE_MATRIX,&B);
353   PetscFunctionReturn(0);
354 }
355 
356 static PetscErrorCode MatPinToCPU_SeqAIJViennaCL(Mat,PetscBool);
357 static PetscErrorCode MatDuplicate_SeqAIJViennaCL(Mat A,MatDuplicateOption cpvalues,Mat *B)
358 {
359   PetscErrorCode ierr;
360   Mat            C;
361 
362   PetscFunctionBegin;
363   ierr = MatDuplicate_SeqAIJ(A,cpvalues,B);CHKERRQ(ierr);
364   C = *B;
365 
366   ierr = MatPinToCPU_SeqAIJViennaCL(A,PETSC_FALSE);CHKERRQ(ierr);
367   C->ops->pintocpu = MatPinToCPU_SeqAIJViennaCL;
368 
369   C->spptr        = new Mat_SeqAIJViennaCL();
370   ((Mat_SeqAIJViennaCL*)C->spptr)->tempvec        = NULL;
371   ((Mat_SeqAIJViennaCL*)C->spptr)->mat            = NULL;
372   ((Mat_SeqAIJViennaCL*)C->spptr)->compressed_mat = NULL;
373 
374   ierr = PetscObjectChangeTypeName((PetscObject)C,MATSEQAIJVIENNACL);CHKERRQ(ierr);
375 
376   C->valid_GPU_matrix = PETSC_OFFLOAD_UNALLOCATED;
377 
378   /* If the source matrix is already assembled, copy the destination matrix to the GPU */
379   if (C->assembled) {
380     ierr = MatViennaCLCopyToGPU(C);CHKERRQ(ierr);
381   }
382 
383   PetscFunctionReturn(0);
384 }
385 
386 static PetscErrorCode MatPinToCPU_SeqAIJViennaCL(Mat A,PetscBool flg)
387 {
388   PetscFunctionBegin;
389   A->pinnedtocpu = flg;
390   if (flg) {
391     A->ops->mult           = MatMult_SeqAIJ;
392     A->ops->multadd        = MatMultAdd_SeqAIJ;
393     A->ops->assemblyend    = MatAssemblyEnd_SeqAIJ;
394     A->ops->duplicate      = MatDuplicate_SeqAIJ;
395   } else {
396     A->ops->mult           = MatMult_SeqAIJViennaCL;
397     A->ops->multadd        = MatMultAdd_SeqAIJViennaCL;
398     A->ops->assemblyend    = MatAssemblyEnd_SeqAIJViennaCL;
399     A->ops->destroy        = MatDestroy_SeqAIJViennaCL;
400     A->ops->duplicate      = MatDuplicate_SeqAIJViennaCL;
401   }
402   PetscFunctionReturn(0);
403 }
404 
405 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJViennaCL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
406 {
407   PetscErrorCode ierr;
408   Mat            B;
409   Mat_SeqAIJ     *aij;
410 
411   PetscFunctionBegin;
412 
413   if (reuse == MAT_REUSE_MATRIX) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"MAT_REUSE_MATRIX is not supported. Consider using MAT_INPLACE_MATRIX instead");
414 
415   if (reuse == MAT_INITIAL_MATRIX) {
416     ierr = MatDuplicate(A,MAT_COPY_VALUES,newmat);CHKERRQ(ierr);
417   }
418 
419   B = *newmat;
420 
421   aij             = (Mat_SeqAIJ*)B->data;
422   aij->inode.use  = PETSC_FALSE;
423 
424   B->spptr        = new Mat_SeqAIJViennaCL();
425 
426   ((Mat_SeqAIJViennaCL*)B->spptr)->tempvec        = NULL;
427   ((Mat_SeqAIJViennaCL*)B->spptr)->mat            = NULL;
428   ((Mat_SeqAIJViennaCL*)B->spptr)->compressed_mat = NULL;
429 
430   ierr = MatPinToCPU_SeqAIJViennaCL(A,PETSC_FALSE);CHKERRQ(ierr);
431   A->ops->pintocpu = MatPinToCPU_SeqAIJViennaCL;
432 
433   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJVIENNACL);CHKERRQ(ierr);
434   ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr);
435   ierr = PetscStrallocpy(VECVIENNACL,&B->defaultvectype);CHKERRQ(ierr);
436 
437   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqaij_seqaijviennacl_C",MatConvert_SeqAIJ_SeqAIJViennaCL);CHKERRQ(ierr);
438 
439   B->valid_GPU_matrix = PETSC_OFFLOAD_UNALLOCATED;
440 
441   /* If the source matrix is already assembled, copy the destination matrix to the GPU */
442   if (B->assembled) {
443     ierr = MatViennaCLCopyToGPU(B);CHKERRQ(ierr);
444   }
445 
446   PetscFunctionReturn(0);
447 }
448 
449 
450 /*MC
451    MATSEQAIJVIENNACL - MATAIJVIENNACL = "aijviennacl" = "seqaijviennacl" - A matrix type to be used for sparse matrices.
452 
453    A matrix type type whose data resides on GPUs. These matrices are in CSR format by
454    default. All matrix calculations are performed using the ViennaCL library.
455 
456    Options Database Keys:
457 +  -mat_type aijviennacl - sets the matrix type to "seqaijviennacl" during a call to MatSetFromOptions()
458 .  -mat_viennacl_storage_format csr - sets the storage format of matrices for MatMult during a call to MatSetFromOptions().
459 -  -mat_viennacl_mult_storage_format csr - sets the storage format of matrices for MatMult during a call to MatSetFromOptions().
460 
461   Level: beginner
462 
463 .seealso: MatCreateSeqAIJViennaCL(), MATAIJVIENNACL, MatCreateAIJViennaCL()
464 M*/
465 
466 
467 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ViennaCL(void)
468 {
469   PetscErrorCode ierr;
470 
471   PetscFunctionBegin;
472   ierr = MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL,    MAT_FACTOR_LU,MatGetFactor_seqaij_petsc);CHKERRQ(ierr);
473   ierr = MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL,    MAT_FACTOR_CHOLESKY,MatGetFactor_seqaij_petsc);CHKERRQ(ierr);
474   ierr = MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL,    MAT_FACTOR_ILU,MatGetFactor_seqaij_petsc);CHKERRQ(ierr);
475   ierr = MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL,    MAT_FACTOR_ICC,MatGetFactor_seqaij_petsc);CHKERRQ(ierr);
476   PetscFunctionReturn(0);
477 }
478