xref: /petsc/src/mat/impls/aij/seq/seqviennacl/aijviennacl.cxx (revision ca15aa20a3a5a8570efb974e6a39a9dd888fe266)
1 
2 
3 /*
4     Defines the basic matrix operations for the AIJ (compressed row)
5   matrix storage format.
6 */
7 
8 #include <petscconf.h>
9 #include <../src/mat/impls/aij/seq/aij.h>          /*I "petscmat.h" I*/
10 #include <petscbt.h>
11 #include <../src/vec/vec/impls/dvecimpl.h>
12 #include <petsc/private/vecimpl.h>
13 
14 #include <../src/mat/impls/aij/seq/seqviennacl/viennaclmatimpl.h>
15 
16 
17 #include <algorithm>
18 #include <vector>
19 #include <string>
20 
21 #include "viennacl/linalg/prod.hpp"
22 
23 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJViennaCL(Mat A, MatType type, MatReuse reuse, Mat *newmat);
24 PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat,MatFactorType,Mat*);
25 
26 
27 PetscErrorCode MatViennaCLCopyToGPU(Mat A)
28 {
29 
30   Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr;
31   Mat_SeqAIJ         *a              = (Mat_SeqAIJ*)A->data;
32   PetscErrorCode     ierr;
33 
34   PetscFunctionBegin;
35   if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) { //some OpenCL SDKs have issues with buffers of size 0
36     if (A->valid_GPU_matrix == PETSC_OFFLOAD_UNALLOCATED || A->valid_GPU_matrix == PETSC_OFFLOAD_CPU) {
37       ierr = PetscLogEventBegin(MAT_ViennaCLCopyToGPU,A,0,0,0);CHKERRQ(ierr);
38 
39       try {
40         if (a->compressedrow.use) {
41           if (!viennaclstruct->compressed_mat) viennaclstruct->compressed_mat = new ViennaCLCompressedAIJMatrix();
42 
43           // Since PetscInt is different from cl_uint, we have to convert:
44           viennacl::backend::mem_handle dummy;
45 
46           viennacl::backend::typesafe_host_array<unsigned int> row_buffer; row_buffer.raw_resize(dummy, a->compressedrow.nrows+1);
47           for (PetscInt i=0; i<=a->compressedrow.nrows; ++i)
48             row_buffer.set(i, (a->compressedrow.i)[i]);
49 
50           viennacl::backend::typesafe_host_array<unsigned int> row_indices; row_indices.raw_resize(dummy, a->compressedrow.nrows);
51           for (PetscInt i=0; i<a->compressedrow.nrows; ++i)
52             row_indices.set(i, (a->compressedrow.rindex)[i]);
53 
54           viennacl::backend::typesafe_host_array<unsigned int> col_buffer; col_buffer.raw_resize(dummy, a->nz);
55           for (PetscInt i=0; i<a->nz; ++i)
56             col_buffer.set(i, (a->j)[i]);
57 
58           viennaclstruct->compressed_mat->set(row_buffer.get(), row_indices.get(), col_buffer.get(), a->a, A->rmap->n, A->cmap->n, a->compressedrow.nrows, a->nz);
59           ierr = PetscLogCpuToGpu(((2*a->compressedrow.nrows)+1+a->nz)*sizeof(PetscInt) + (a->nz)*sizeof(PetscScalar));CHKERRQ(ierr);
60         } else {
61           if (!viennaclstruct->mat) viennaclstruct->mat = new ViennaCLAIJMatrix();
62 
63           // Since PetscInt is in general different from cl_uint, we have to convert:
64           viennacl::backend::mem_handle dummy;
65 
66           viennacl::backend::typesafe_host_array<unsigned int> row_buffer; row_buffer.raw_resize(dummy, A->rmap->n+1);
67           for (PetscInt i=0; i<=A->rmap->n; ++i)
68             row_buffer.set(i, (a->i)[i]);
69 
70           viennacl::backend::typesafe_host_array<unsigned int> col_buffer; col_buffer.raw_resize(dummy, a->nz);
71           for (PetscInt i=0; i<a->nz; ++i)
72             col_buffer.set(i, (a->j)[i]);
73 
74           viennaclstruct->mat->set(row_buffer.get(), col_buffer.get(), a->a, A->rmap->n, A->cmap->n, a->nz);
75           ierr = PetscLogCpuToGpu(((A->rmap->n+1)+a->nz)*sizeof(PetscInt)+(a->nz)*sizeof(PetscScalar));CHKERRQ(ierr);
76         }
77         ViennaCLWaitForGPU();
78       } catch(std::exception const & ex) {
79         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
80       }
81 
82       // Create temporary vector for v += A*x:
83       if (viennaclstruct->tempvec) {
84         if (viennaclstruct->tempvec->size() != static_cast<std::size_t>(A->rmap->n)) {
85           delete (ViennaCLVector*)viennaclstruct->tempvec;
86           viennaclstruct->tempvec = new ViennaCLVector(A->rmap->n);
87         } else {
88           viennaclstruct->tempvec->clear();
89         }
90       } else {
91         viennaclstruct->tempvec = new ViennaCLVector(A->rmap->n);
92       }
93 
94       A->valid_GPU_matrix = PETSC_OFFLOAD_BOTH;
95 
96       ierr = PetscLogEventEnd(MAT_ViennaCLCopyToGPU,A,0,0,0);CHKERRQ(ierr);
97     }
98   }
99   PetscFunctionReturn(0);
100 }
101 
102 PetscErrorCode MatViennaCLCopyFromGPU(Mat A, const ViennaCLAIJMatrix *Agpu)
103 {
104   Mat_SeqAIJ         *a              = (Mat_SeqAIJ*)A->data;
105   PetscInt           m               = A->rmap->n;
106   PetscErrorCode     ierr;
107 
108 
109   PetscFunctionBegin;
110   if (A->valid_GPU_matrix == PETSC_OFFLOAD_UNALLOCATED) {
111     try {
112       if (a->compressedrow.use) SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_ARG_WRONG, "ViennaCL: Cannot handle row compression for GPU matrices");
113       else {
114 
115         if ((PetscInt)Agpu->size1() != m) SETERRQ2(PETSC_COMM_WORLD, PETSC_ERR_ARG_SIZ, "GPU matrix has %d rows, should be %d", Agpu->size1(), m);
116         a->nz           = Agpu->nnz();
117         a->maxnz        = a->nz; /* Since we allocate exactly the right amount */
118         A->preallocated = PETSC_TRUE;
119         if (a->singlemalloc) {
120           if (a->a) {ierr = PetscFree3(a->a,a->j,a->i);CHKERRQ(ierr);}
121         } else {
122           if (a->i) {ierr = PetscFree(a->i);CHKERRQ(ierr);}
123           if (a->j) {ierr = PetscFree(a->j);CHKERRQ(ierr);}
124           if (a->a) {ierr = PetscFree(a->a);CHKERRQ(ierr);}
125         }
126         ierr = PetscMalloc3(a->nz,&a->a,a->nz,&a->j,m+1,&a->i);CHKERRQ(ierr);
127         ierr = PetscLogObjectMemory((PetscObject)A, a->nz*(sizeof(PetscScalar)+sizeof(PetscInt))+(m+1)*sizeof(PetscInt));CHKERRQ(ierr);
128 
129         a->singlemalloc = PETSC_TRUE;
130 
131         /* Setup row lengths */
132         ierr = PetscFree(a->imax);CHKERRQ(ierr);
133         ierr = PetscFree(a->ilen);CHKERRQ(ierr);
134         ierr = PetscMalloc1(m,&a->imax);CHKERRQ(ierr);
135         ierr = PetscMalloc1(m,&a->ilen);CHKERRQ(ierr);
136         ierr = PetscLogObjectMemory((PetscObject)A, 2*m*sizeof(PetscInt));CHKERRQ(ierr);
137 
138         /* Copy data back from GPU */
139         viennacl::backend::typesafe_host_array<unsigned int> row_buffer; row_buffer.raw_resize(Agpu->handle1(), Agpu->size1() + 1);
140 
141         // copy row array
142         viennacl::backend::memory_read(Agpu->handle1(), 0, row_buffer.raw_size(), row_buffer.get());
143         (a->i)[0] = row_buffer[0];
144         for (PetscInt i = 0; i < (PetscInt)Agpu->size1(); ++i) {
145           (a->i)[i+1] = row_buffer[i+1];
146           a->imax[i]  = a->ilen[i] = a->i[i+1] - a->i[i];  //Set imax[] and ilen[] arrays at the same time as i[] for better cache reuse
147         }
148 
149         // copy column indices
150         viennacl::backend::typesafe_host_array<unsigned int> col_buffer; col_buffer.raw_resize(Agpu->handle2(), Agpu->nnz());
151         viennacl::backend::memory_read(Agpu->handle2(), 0, col_buffer.raw_size(), col_buffer.get());
152         for (PetscInt i=0; i < (PetscInt)Agpu->nnz(); ++i)
153           (a->j)[i] = col_buffer[i];
154 
155         // copy nonzero entries directly to destination (no conversion required)
156         viennacl::backend::memory_read(Agpu->handle(), 0, sizeof(PetscScalar)*Agpu->nnz(), a->a);
157 
158         ierr = PetscLogGpuToCpu(row_buffer.raw_size()+col_buffer.raw_size()+(Agpu->nnz()*sizeof(PetscScalar)));CHKERRQ(ierr);
159         ViennaCLWaitForGPU();
160         /* TODO: Once a->diag is moved out of MatAssemblyEnd(), invalidate it here. */
161       }
162     } catch(std::exception const & ex) {
163       SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
164     }
165 
166     /* This assembly prevents resetting the flag to PETSC_OFFLOAD_CPU and recopying */
167     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
168     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
169 
170     A->valid_GPU_matrix = PETSC_OFFLOAD_BOTH;
171   } else SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_ARG_WRONG, "ViennaCL error: Only valid for unallocated GPU matrices");
172   PetscFunctionReturn(0);
173 }
174 
175 PetscErrorCode MatMult_SeqAIJViennaCL(Mat A,Vec xx,Vec yy)
176 {
177   Mat_SeqAIJ           *a = (Mat_SeqAIJ*)A->data;
178   PetscErrorCode       ierr;
179   Mat_SeqAIJViennaCL   *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr;
180   const ViennaCLVector *xgpu=NULL;
181   ViennaCLVector       *ygpu=NULL;
182 
183   PetscFunctionBegin;
184   if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) {
185     ierr = VecViennaCLGetArrayRead(xx,&xgpu);CHKERRQ(ierr);
186     ierr = VecViennaCLGetArrayWrite(yy,&ygpu);CHKERRQ(ierr);
187     ierr = PetscLogGpuTimeBegin();CHKERRQ(ierr);
188     try {
189       if (a->compressedrow.use) {
190         *ygpu = viennacl::linalg::prod(*viennaclstruct->compressed_mat, *xgpu);
191       } else {
192         *ygpu = viennacl::linalg::prod(*viennaclstruct->mat,*xgpu);
193       }
194       ViennaCLWaitForGPU();
195     } catch (std::exception const & ex) {
196       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
197     }
198     ierr = PetscLogGpuTimeEnd();CHKERRQ(ierr);
199     ierr = VecViennaCLRestoreArrayRead(xx,&xgpu);CHKERRQ(ierr);
200     ierr = VecViennaCLRestoreArrayWrite(yy,&ygpu);CHKERRQ(ierr);
201     ierr = PetscLogGpuFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
202   } else {
203     ierr = VecSet(yy,0);CHKERRQ(ierr);
204   }
205   PetscFunctionReturn(0);
206 }
207 
208 PetscErrorCode MatMultAdd_SeqAIJViennaCL(Mat A,Vec xx,Vec yy,Vec zz)
209 {
210   Mat_SeqAIJ           *a = (Mat_SeqAIJ*)A->data;
211   PetscErrorCode       ierr;
212   Mat_SeqAIJViennaCL   *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr;
213   const ViennaCLVector *xgpu=NULL,*ygpu=NULL;
214   ViennaCLVector       *zgpu=NULL;
215 
216   PetscFunctionBegin;
217   if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) {
218     try {
219       ierr = VecViennaCLGetArrayRead(xx,&xgpu);CHKERRQ(ierr);
220       ierr = VecViennaCLGetArrayRead(yy,&ygpu);CHKERRQ(ierr);
221       ierr = VecViennaCLGetArrayWrite(zz,&zgpu);CHKERRQ(ierr);
222       ierr = PetscLogGpuTimeBegin();CHKERRQ(ierr);
223       if (a->compressedrow.use) {
224         ViennaCLVector temp = viennacl::linalg::prod(*viennaclstruct->compressed_mat, *xgpu);
225         *zgpu = *ygpu + temp;
226         ViennaCLWaitForGPU();
227       } else {
228         if (zz == xx || zz == yy) { //temporary required
229           ViennaCLVector temp = viennacl::linalg::prod(*viennaclstruct->mat, *xgpu);
230           *zgpu = *ygpu;
231           *zgpu += temp;
232           ViennaCLWaitForGPU();
233         } else {
234           *viennaclstruct->tempvec = viennacl::linalg::prod(*viennaclstruct->mat, *xgpu);
235           *zgpu = *ygpu + *viennaclstruct->tempvec;
236           ViennaCLWaitForGPU();
237         }
238       }
239       ierr = PetscLogGpuTimeEnd();CHKERRQ(ierr);
240       ierr = VecViennaCLRestoreArrayRead(xx,&xgpu);CHKERRQ(ierr);
241       ierr = VecViennaCLRestoreArrayRead(yy,&ygpu);CHKERRQ(ierr);
242       ierr = VecViennaCLRestoreArrayWrite(zz,&zgpu);CHKERRQ(ierr);
243 
244     } catch(std::exception const & ex) {
245       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
246     }
247     ierr = PetscLogGpuFlops(2.0*a->nz);CHKERRQ(ierr);
248   } else {
249     ierr = VecCopy(yy,zz);CHKERRQ(ierr);
250   }
251   PetscFunctionReturn(0);
252 }
253 
254 PetscErrorCode MatAssemblyEnd_SeqAIJViennaCL(Mat A,MatAssemblyType mode)
255 {
256   PetscErrorCode ierr;
257 
258   PetscFunctionBegin;
259   ierr = MatAssemblyEnd_SeqAIJ(A,mode);CHKERRQ(ierr);
260   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
261   if (!A->pinnedtocpu) {
262     ierr = MatViennaCLCopyToGPU(A);CHKERRQ(ierr);
263   }
264   PetscFunctionReturn(0);
265 }
266 
267 /* --------------------------------------------------------------------------------*/
268 /*@
269    MatCreateSeqAIJViennaCL - Creates a sparse matrix in AIJ (compressed row) format
270    (the default parallel PETSc format).  This matrix will ultimately be pushed down
271    to GPUs and use the ViennaCL library for calculations. For good matrix
272    assembly performance the user should preallocate the matrix storage by setting
273    the parameter nz (or the array nnz).  By setting these parameters accurately,
274    performance during matrix assembly can be increased substantially.
275 
276 
277    Collective
278 
279    Input Parameters:
280 +  comm - MPI communicator, set to PETSC_COMM_SELF
281 .  m - number of rows
282 .  n - number of columns
283 .  nz - number of nonzeros per row (same for all rows)
284 -  nnz - array containing the number of nonzeros in the various rows
285          (possibly different for each row) or NULL
286 
287    Output Parameter:
288 .  A - the matrix
289 
290    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
291    MatXXXXSetPreallocation() paradigm instead of this routine directly.
292    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
293 
294    Notes:
295    If nnz is given then nz is ignored
296 
297    The AIJ format (also called the Yale sparse matrix format or
298    compressed row storage), is fully compatible with standard Fortran 77
299    storage.  That is, the stored row and column indices can begin at
300    either one (as in Fortran) or zero.  See the users' manual for details.
301 
302    Specify the preallocated storage with either nz or nnz (not both).
303    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
304    allocation.  For large problems you MUST preallocate memory or you
305    will get TERRIBLE performance, see the users' manual chapter on matrices.
306 
307    Level: intermediate
308 
309 .seealso: MatCreate(), MatCreateAIJ(), MatCreateAIJCUSPARSE(), MatSetValues(), MatSeqAIJSetColumnIndices(), MatCreateSeqAIJWithArrays(), MatCreateAIJ()
310 
311 @*/
312 PetscErrorCode  MatCreateSeqAIJViennaCL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
313 {
314   PetscErrorCode ierr;
315 
316   PetscFunctionBegin;
317   ierr = MatCreate(comm,A);CHKERRQ(ierr);
318   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
319   ierr = MatSetType(*A,MATSEQAIJVIENNACL);CHKERRQ(ierr);
320   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,(PetscInt*)nnz);CHKERRQ(ierr);
321   PetscFunctionReturn(0);
322 }
323 
324 
325 PetscErrorCode MatDestroy_SeqAIJViennaCL(Mat A)
326 {
327   PetscErrorCode ierr;
328   Mat_SeqAIJViennaCL *viennaclcontainer = (Mat_SeqAIJViennaCL*)A->spptr;
329 
330   PetscFunctionBegin;
331   try {
332     if (viennaclcontainer) {
333       delete viennaclcontainer->tempvec;
334       delete viennaclcontainer->mat;
335       delete viennaclcontainer->compressed_mat;
336       delete viennaclcontainer;
337     }
338     A->valid_GPU_matrix = PETSC_OFFLOAD_UNALLOCATED;
339   } catch(std::exception const & ex) {
340     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
341   }
342 
343   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqaij_seqaijviennacl_C",NULL);CHKERRQ(ierr);
344 
345   /* this next line is because MatDestroy tries to PetscFree spptr if it is not zero, and PetscFree only works if the memory was allocated with PetscNew or PetscMalloc, which don't call the constructor */
346   A->spptr = 0;
347   ierr     = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
348   PetscFunctionReturn(0);
349 }
350 
351 
352 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJViennaCL(Mat B)
353 {
354   PetscErrorCode ierr;
355 
356   PetscFunctionBegin;
357   ierr = MatCreate_SeqAIJ(B);CHKERRQ(ierr);
358   ierr = MatConvert_SeqAIJ_SeqAIJViennaCL(B,MATSEQAIJVIENNACL,MAT_INPLACE_MATRIX,&B);
359   PetscFunctionReturn(0);
360 }
361 
362 static PetscErrorCode MatPinToCPU_SeqAIJViennaCL(Mat,PetscBool);
363 static PetscErrorCode MatDuplicate_SeqAIJViennaCL(Mat A,MatDuplicateOption cpvalues,Mat *B)
364 {
365   PetscErrorCode ierr;
366   Mat            C;
367 
368   PetscFunctionBegin;
369   ierr = MatDuplicate_SeqAIJ(A,cpvalues,B);CHKERRQ(ierr);
370   C = *B;
371 
372   ierr = MatPinToCPU_SeqAIJViennaCL(A,PETSC_FALSE);CHKERRQ(ierr);
373   C->ops->pintocpu = MatPinToCPU_SeqAIJViennaCL;
374 
375   C->spptr        = new Mat_SeqAIJViennaCL();
376   ((Mat_SeqAIJViennaCL*)C->spptr)->tempvec        = NULL;
377   ((Mat_SeqAIJViennaCL*)C->spptr)->mat            = NULL;
378   ((Mat_SeqAIJViennaCL*)C->spptr)->compressed_mat = NULL;
379 
380   ierr = PetscObjectChangeTypeName((PetscObject)C,MATSEQAIJVIENNACL);CHKERRQ(ierr);
381 
382   C->valid_GPU_matrix = PETSC_OFFLOAD_UNALLOCATED;
383 
384   /* If the source matrix is already assembled, copy the destination matrix to the GPU */
385   if (C->assembled) {
386     ierr = MatViennaCLCopyToGPU(C);CHKERRQ(ierr);
387   }
388 
389   PetscFunctionReturn(0);
390 }
391 
392 static PetscErrorCode MatPinToCPU_SeqAIJViennaCL(Mat A,PetscBool flg)
393 {
394   PetscFunctionBegin;
395   A->pinnedtocpu = flg;
396   if (flg) {
397     A->ops->mult        = MatMult_SeqAIJ;
398     A->ops->multadd     = MatMultAdd_SeqAIJ;
399     A->ops->assemblyend = MatAssemblyEnd_SeqAIJ;
400     A->ops->duplicate   = MatDuplicate_SeqAIJ;
401   } else {
402     A->ops->mult        = MatMult_SeqAIJViennaCL;
403     A->ops->multadd     = MatMultAdd_SeqAIJViennaCL;
404     A->ops->assemblyend = MatAssemblyEnd_SeqAIJViennaCL;
405     A->ops->destroy     = MatDestroy_SeqAIJViennaCL;
406     A->ops->duplicate   = MatDuplicate_SeqAIJViennaCL;
407   }
408   PetscFunctionReturn(0);
409 }
410 
411 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJViennaCL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
412 {
413   PetscErrorCode ierr;
414   Mat            B;
415   Mat_SeqAIJ     *aij;
416 
417   PetscFunctionBegin;
418 
419   if (reuse == MAT_REUSE_MATRIX) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"MAT_REUSE_MATRIX is not supported. Consider using MAT_INPLACE_MATRIX instead");
420 
421   if (reuse == MAT_INITIAL_MATRIX) {
422     ierr = MatDuplicate(A,MAT_COPY_VALUES,newmat);CHKERRQ(ierr);
423   }
424 
425   B = *newmat;
426 
427   aij             = (Mat_SeqAIJ*)B->data;
428   aij->inode.use  = PETSC_FALSE;
429 
430   B->spptr        = new Mat_SeqAIJViennaCL();
431 
432   ((Mat_SeqAIJViennaCL*)B->spptr)->tempvec        = NULL;
433   ((Mat_SeqAIJViennaCL*)B->spptr)->mat            = NULL;
434   ((Mat_SeqAIJViennaCL*)B->spptr)->compressed_mat = NULL;
435 
436   ierr = MatPinToCPU_SeqAIJViennaCL(A,PETSC_FALSE);CHKERRQ(ierr);
437   A->ops->pintocpu = MatPinToCPU_SeqAIJViennaCL;
438 
439   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJVIENNACL);CHKERRQ(ierr);
440   ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr);
441   ierr = PetscStrallocpy(VECVIENNACL,&B->defaultvectype);CHKERRQ(ierr);
442 
443   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqaij_seqaijviennacl_C",MatConvert_SeqAIJ_SeqAIJViennaCL);CHKERRQ(ierr);
444 
445   B->valid_GPU_matrix = PETSC_OFFLOAD_UNALLOCATED;
446 
447   /* If the source matrix is already assembled, copy the destination matrix to the GPU */
448   if (B->assembled) {
449     ierr = MatViennaCLCopyToGPU(B);CHKERRQ(ierr);
450   }
451 
452   PetscFunctionReturn(0);
453 }
454 
455 
456 /*MC
457    MATSEQAIJVIENNACL - MATAIJVIENNACL = "aijviennacl" = "seqaijviennacl" - A matrix type to be used for sparse matrices.
458 
459    A matrix type type whose data resides on GPUs. These matrices are in CSR format by
460    default. All matrix calculations are performed using the ViennaCL library.
461 
462    Options Database Keys:
463 +  -mat_type aijviennacl - sets the matrix type to "seqaijviennacl" during a call to MatSetFromOptions()
464 .  -mat_viennacl_storage_format csr - sets the storage format of matrices for MatMult during a call to MatSetFromOptions().
465 -  -mat_viennacl_mult_storage_format csr - sets the storage format of matrices for MatMult during a call to MatSetFromOptions().
466 
467   Level: beginner
468 
469 .seealso: MatCreateSeqAIJViennaCL(), MATAIJVIENNACL, MatCreateAIJViennaCL()
470 M*/
471 
472 
473 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ViennaCL(void)
474 {
475   PetscErrorCode ierr;
476 
477   PetscFunctionBegin;
478   ierr = MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL,    MAT_FACTOR_LU,MatGetFactor_seqaij_petsc);CHKERRQ(ierr);
479   ierr = MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL,    MAT_FACTOR_CHOLESKY,MatGetFactor_seqaij_petsc);CHKERRQ(ierr);
480   ierr = MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL,    MAT_FACTOR_ILU,MatGetFactor_seqaij_petsc);CHKERRQ(ierr);
481   ierr = MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL,    MAT_FACTOR_ICC,MatGetFactor_seqaij_petsc);CHKERRQ(ierr);
482   PetscFunctionReturn(0);
483 }
484