xref: /petsc/src/mat/impls/aij/seq/seqviennacl/aijviennacl.cxx (revision 4863603af06b16768e4e4a060cedf7d42fa55e81)
1 
2 
3 /*
4     Defines the basic matrix operations for the AIJ (compressed row)
5   matrix storage format.
6 */
7 
8 #include <petscconf.h>
9 #include <../src/mat/impls/aij/seq/aij.h>          /*I "petscmat.h" I*/
10 #include <petscbt.h>
11 #include <../src/vec/vec/impls/dvecimpl.h>
12 #include <petsc/private/vecimpl.h>
13 
14 #include <../src/mat/impls/aij/seq/seqviennacl/viennaclmatimpl.h>
15 
16 
17 #include <algorithm>
18 #include <vector>
19 #include <string>
20 
21 #include "viennacl/linalg/prod.hpp"
22 
23 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJViennaCL(Mat A, MatType type, MatReuse reuse, Mat *newmat);
24 PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat,MatFactorType,Mat*);
25 
26 
27 PetscErrorCode MatViennaCLCopyToGPU(Mat A)
28 {
29 
30   Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr;
31   Mat_SeqAIJ         *a              = (Mat_SeqAIJ*)A->data;
32   PetscErrorCode     ierr;
33 
34   PetscFunctionBegin;
35   if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) { //some OpenCL SDKs have issues with buffers of size 0
36     if (A->valid_GPU_matrix == PETSC_OFFLOAD_UNALLOCATED || A->valid_GPU_matrix == PETSC_OFFLOAD_CPU) {
37       ierr = PetscLogEventBegin(MAT_ViennaCLCopyToGPU,A,0,0,0);CHKERRQ(ierr);
38 
39       try {
40         if (a->compressedrow.use) {
41           if (!viennaclstruct->compressed_mat) viennaclstruct->compressed_mat = new ViennaCLCompressedAIJMatrix();
42 
43           // Since PetscInt is different from cl_uint, we have to convert:
44           viennacl::backend::mem_handle dummy;
45 
46           viennacl::backend::typesafe_host_array<unsigned int> row_buffer; row_buffer.raw_resize(dummy, a->compressedrow.nrows+1);
47           for (PetscInt i=0; i<=a->compressedrow.nrows; ++i)
48             row_buffer.set(i, (a->compressedrow.i)[i]);
49 
50           viennacl::backend::typesafe_host_array<unsigned int> row_indices; row_indices.raw_resize(dummy, a->compressedrow.nrows);
51           for (PetscInt i=0; i<a->compressedrow.nrows; ++i)
52             row_indices.set(i, (a->compressedrow.rindex)[i]);
53 
54           viennacl::backend::typesafe_host_array<unsigned int> col_buffer; col_buffer.raw_resize(dummy, a->nz);
55           for (PetscInt i=0; i<a->nz; ++i)
56             col_buffer.set(i, (a->j)[i]);
57 
58           viennaclstruct->compressed_mat->set(row_buffer.get(), row_indices.get(), col_buffer.get(), a->a, A->rmap->n, A->cmap->n, a->compressedrow.nrows, a->nz);
59           ierr = PetscLogCpuToGpu(((2*a->compressedrow.nrows)+1+a->nz)*sizeof(PetscInt) + (a->nz)*sizeof(PetscScalar));CHKERRQ(ierr);
60         } else {
61           if (!viennaclstruct->mat) viennaclstruct->mat = new ViennaCLAIJMatrix();
62 
63           // Since PetscInt is in general different from cl_uint, we have to convert:
64           viennacl::backend::mem_handle dummy;
65 
66           viennacl::backend::typesafe_host_array<unsigned int> row_buffer; row_buffer.raw_resize(dummy, A->rmap->n+1);
67           for (PetscInt i=0; i<=A->rmap->n; ++i)
68             row_buffer.set(i, (a->i)[i]);
69 
70           viennacl::backend::typesafe_host_array<unsigned int> col_buffer; col_buffer.raw_resize(dummy, a->nz);
71           for (PetscInt i=0; i<a->nz; ++i)
72             col_buffer.set(i, (a->j)[i]);
73 
74           viennaclstruct->mat->set(row_buffer.get(), col_buffer.get(), a->a, A->rmap->n, A->cmap->n, a->nz);
75           ierr = PetscLogCpuToGpu(((A->rmap->n+1)+a->nz)*sizeof(PetscInt)+(a->nz)*sizeof(PetscScalar));CHKERRQ(ierr);
76         }
77         ViennaCLWaitForGPU();
78       } catch(std::exception const & ex) {
79         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
80       }
81 
82       // Create temporary vector for v += A*x:
83       if (viennaclstruct->tempvec) {
84         if (viennaclstruct->tempvec->size() != static_cast<std::size_t>(A->rmap->n)) {
85           delete (ViennaCLVector*)viennaclstruct->tempvec;
86           viennaclstruct->tempvec = new ViennaCLVector(A->rmap->n);
87         } else {
88           viennaclstruct->tempvec->clear();
89         }
90       } else {
91         viennaclstruct->tempvec = new ViennaCLVector(A->rmap->n);
92       }
93 
94       A->valid_GPU_matrix = PETSC_OFFLOAD_BOTH;
95 
96       ierr = PetscLogEventEnd(MAT_ViennaCLCopyToGPU,A,0,0,0);CHKERRQ(ierr);
97     }
98   }
99   PetscFunctionReturn(0);
100 }
101 
102 PetscErrorCode MatViennaCLCopyFromGPU(Mat A, const ViennaCLAIJMatrix *Agpu)
103 {
104   Mat_SeqAIJ         *a              = (Mat_SeqAIJ*)A->data;
105   PetscInt           m               = A->rmap->n;
106   PetscErrorCode     ierr;
107 
108 
109   PetscFunctionBegin;
110   if (A->valid_GPU_matrix == PETSC_OFFLOAD_UNALLOCATED) {
111     try {
112       if (a->compressedrow.use) SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_ARG_WRONG, "ViennaCL: Cannot handle row compression for GPU matrices");
113       else {
114 
115         if ((PetscInt)Agpu->size1() != m) SETERRQ2(PETSC_COMM_WORLD, PETSC_ERR_ARG_SIZ, "GPU matrix has %d rows, should be %d", Agpu->size1(), m);
116         a->nz           = Agpu->nnz();
117         a->maxnz        = a->nz; /* Since we allocate exactly the right amount */
118         A->preallocated = PETSC_TRUE;
119         if (a->singlemalloc) {
120           if (a->a) {ierr = PetscFree3(a->a,a->j,a->i);CHKERRQ(ierr);}
121         } else {
122           if (a->i) {ierr = PetscFree(a->i);CHKERRQ(ierr);}
123           if (a->j) {ierr = PetscFree(a->j);CHKERRQ(ierr);}
124           if (a->a) {ierr = PetscFree(a->a);CHKERRQ(ierr);}
125         }
126         ierr = PetscMalloc3(a->nz,&a->a,a->nz,&a->j,m+1,&a->i);CHKERRQ(ierr);
127         ierr = PetscLogObjectMemory((PetscObject)A, a->nz*(sizeof(PetscScalar)+sizeof(PetscInt))+(m+1)*sizeof(PetscInt));CHKERRQ(ierr);
128 
129         a->singlemalloc = PETSC_TRUE;
130 
131         /* Setup row lengths */
132         if (a->imax) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
133         ierr = PetscMalloc2(m,&a->imax,m,&a->ilen);CHKERRQ(ierr);
134         ierr = PetscLogObjectMemory((PetscObject)A, 2*m*sizeof(PetscInt));CHKERRQ(ierr);
135 
136         /* Copy data back from GPU */
137         viennacl::backend::typesafe_host_array<unsigned int> row_buffer; row_buffer.raw_resize(Agpu->handle1(), Agpu->size1() + 1);
138 
139         // copy row array
140         viennacl::backend::memory_read(Agpu->handle1(), 0, row_buffer.raw_size(), row_buffer.get());
141         (a->i)[0] = row_buffer[0];
142         for (PetscInt i = 0; i < (PetscInt)Agpu->size1(); ++i) {
143           (a->i)[i+1] = row_buffer[i+1];
144           a->imax[i]  = a->ilen[i] = a->i[i+1] - a->i[i];  //Set imax[] and ilen[] arrays at the same time as i[] for better cache reuse
145         }
146 
147         // copy column indices
148         viennacl::backend::typesafe_host_array<unsigned int> col_buffer; col_buffer.raw_resize(Agpu->handle2(), Agpu->nnz());
149         viennacl::backend::memory_read(Agpu->handle2(), 0, col_buffer.raw_size(), col_buffer.get());
150         for (PetscInt i=0; i < (PetscInt)Agpu->nnz(); ++i)
151           (a->j)[i] = col_buffer[i];
152 
153         // copy nonzero entries directly to destination (no conversion required)
154         viennacl::backend::memory_read(Agpu->handle(), 0, sizeof(PetscScalar)*Agpu->nnz(), a->a);
155 
156         ierr = PetscLogGpuToCpu(row_buffer.raw_size()+col_buffer.raw_size()+(Agpu->nnz()*sizeof(PetscScalar)));CHKERRQ(ierr);
157         ViennaCLWaitForGPU();
158         /* TODO: Once a->diag is moved out of MatAssemblyEnd(), invalidate it here. */
159       }
160     } catch(std::exception const & ex) {
161       SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
162     }
163 
164     /* This assembly prevents resetting the flag to PETSC_OFFLOAD_CPU and recopying */
165     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
166     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
167 
168     A->valid_GPU_matrix = PETSC_OFFLOAD_BOTH;
169   } else SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_ARG_WRONG, "ViennaCL error: Only valid for unallocated GPU matrices");
170   PetscFunctionReturn(0);
171 }
172 
173 PetscErrorCode MatMult_SeqAIJViennaCL(Mat A,Vec xx,Vec yy)
174 {
175   Mat_SeqAIJ           *a = (Mat_SeqAIJ*)A->data;
176   PetscErrorCode       ierr;
177   Mat_SeqAIJViennaCL   *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr;
178   const ViennaCLVector *xgpu=NULL;
179   ViennaCLVector       *ygpu=NULL;
180 
181   PetscFunctionBegin;
182   if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) {
183     ierr = VecViennaCLGetArrayRead(xx,&xgpu);CHKERRQ(ierr);
184     ierr = VecViennaCLGetArrayWrite(yy,&ygpu);CHKERRQ(ierr);
185     try {
186       if (a->compressedrow.use) {
187         *ygpu = viennacl::linalg::prod(*viennaclstruct->compressed_mat, *xgpu);
188       } else {
189         *ygpu = viennacl::linalg::prod(*viennaclstruct->mat,*xgpu);
190       }
191       ViennaCLWaitForGPU();
192     } catch (std::exception const & ex) {
193       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
194     }
195     ierr = VecViennaCLRestoreArrayRead(xx,&xgpu);CHKERRQ(ierr);
196     ierr = VecViennaCLRestoreArrayWrite(yy,&ygpu);CHKERRQ(ierr);
197     ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
198   } else {
199     ierr = VecSet(yy,0);CHKERRQ(ierr);
200   }
201   PetscFunctionReturn(0);
202 }
203 
204 PetscErrorCode MatMultAdd_SeqAIJViennaCL(Mat A,Vec xx,Vec yy,Vec zz)
205 {
206   Mat_SeqAIJ           *a = (Mat_SeqAIJ*)A->data;
207   PetscErrorCode       ierr;
208   Mat_SeqAIJViennaCL   *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr;
209   const ViennaCLVector *xgpu=NULL,*ygpu=NULL;
210   ViennaCLVector       *zgpu=NULL;
211 
212   PetscFunctionBegin;
213   if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) {
214     try {
215       ierr = VecViennaCLGetArrayRead(xx,&xgpu);CHKERRQ(ierr);
216       ierr = VecViennaCLGetArrayRead(yy,&ygpu);CHKERRQ(ierr);
217       ierr = VecViennaCLGetArrayWrite(zz,&zgpu);CHKERRQ(ierr);
218 
219       if (a->compressedrow.use) {
220         ViennaCLVector temp = viennacl::linalg::prod(*viennaclstruct->compressed_mat, *xgpu);
221         *zgpu = *ygpu + temp;
222         ViennaCLWaitForGPU();
223       } else {
224         if (zz == xx || zz == yy) { //temporary required
225           ViennaCLVector temp = viennacl::linalg::prod(*viennaclstruct->mat, *xgpu);
226           *zgpu = *ygpu;
227           *zgpu += temp;
228           ViennaCLWaitForGPU();
229         } else {
230           *viennaclstruct->tempvec = viennacl::linalg::prod(*viennaclstruct->mat, *xgpu);
231           *zgpu = *ygpu + *viennaclstruct->tempvec;
232           ViennaCLWaitForGPU();
233         }
234       }
235 
236       ierr = VecViennaCLRestoreArrayRead(xx,&xgpu);CHKERRQ(ierr);
237       ierr = VecViennaCLRestoreArrayRead(yy,&ygpu);CHKERRQ(ierr);
238       ierr = VecViennaCLRestoreArrayWrite(zz,&zgpu);CHKERRQ(ierr);
239 
240     } catch(std::exception const & ex) {
241       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
242     }
243     ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
244   } else {
245     ierr = VecCopy(yy,zz);CHKERRQ(ierr);
246   }
247   PetscFunctionReturn(0);
248 }
249 
250 PetscErrorCode MatAssemblyEnd_SeqAIJViennaCL(Mat A,MatAssemblyType mode)
251 {
252   PetscErrorCode ierr;
253 
254   PetscFunctionBegin;
255   ierr = MatAssemblyEnd_SeqAIJ(A,mode);CHKERRQ(ierr);
256   if (!A->pinnedtocpu) {
257     ierr = MatViennaCLCopyToGPU(A);CHKERRQ(ierr);
258   }
259   PetscFunctionReturn(0);
260 }
261 
262 /* --------------------------------------------------------------------------------*/
263 /*@
264    MatCreateSeqAIJViennaCL - Creates a sparse matrix in AIJ (compressed row) format
265    (the default parallel PETSc format).  This matrix will ultimately be pushed down
266    to GPUs and use the ViennaCL library for calculations. For good matrix
267    assembly performance the user should preallocate the matrix storage by setting
268    the parameter nz (or the array nnz).  By setting these parameters accurately,
269    performance during matrix assembly can be increased substantially.
270 
271 
272    Collective
273 
274    Input Parameters:
275 +  comm - MPI communicator, set to PETSC_COMM_SELF
276 .  m - number of rows
277 .  n - number of columns
278 .  nz - number of nonzeros per row (same for all rows)
279 -  nnz - array containing the number of nonzeros in the various rows
280          (possibly different for each row) or NULL
281 
282    Output Parameter:
283 .  A - the matrix
284 
285    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
286    MatXXXXSetPreallocation() paradigm instead of this routine directly.
287    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
288 
289    Notes:
290    If nnz is given then nz is ignored
291 
292    The AIJ format (also called the Yale sparse matrix format or
293    compressed row storage), is fully compatible with standard Fortran 77
294    storage.  That is, the stored row and column indices can begin at
295    either one (as in Fortran) or zero.  See the users' manual for details.
296 
297    Specify the preallocated storage with either nz or nnz (not both).
298    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
299    allocation.  For large problems you MUST preallocate memory or you
300    will get TERRIBLE performance, see the users' manual chapter on matrices.
301 
302    Level: intermediate
303 
304 .seealso: MatCreate(), MatCreateAIJ(), MatCreateAIJCUSPARSE(), MatSetValues(), MatSeqAIJSetColumnIndices(), MatCreateSeqAIJWithArrays(), MatCreateAIJ()
305 
306 @*/
307 PetscErrorCode  MatCreateSeqAIJViennaCL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
308 {
309   PetscErrorCode ierr;
310 
311   PetscFunctionBegin;
312   ierr = MatCreate(comm,A);CHKERRQ(ierr);
313   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
314   ierr = MatSetType(*A,MATSEQAIJVIENNACL);CHKERRQ(ierr);
315   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,(PetscInt*)nnz);CHKERRQ(ierr);
316   PetscFunctionReturn(0);
317 }
318 
319 
320 PetscErrorCode MatDestroy_SeqAIJViennaCL(Mat A)
321 {
322   PetscErrorCode ierr;
323   Mat_SeqAIJViennaCL *viennaclcontainer = (Mat_SeqAIJViennaCL*)A->spptr;
324 
325   PetscFunctionBegin;
326   try {
327     if (viennaclcontainer) {
328       delete viennaclcontainer->tempvec;
329       delete viennaclcontainer->mat;
330       delete viennaclcontainer->compressed_mat;
331       delete viennaclcontainer;
332     }
333     A->valid_GPU_matrix = PETSC_OFFLOAD_UNALLOCATED;
334   } catch(std::exception const & ex) {
335     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
336   }
337 
338   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqaij_seqaijviennacl_C",NULL);CHKERRQ(ierr);
339 
340   /* this next line is because MatDestroy tries to PetscFree spptr if it is not zero, and PetscFree only works if the memory was allocated with PetscNew or PetscMalloc, which don't call the constructor */
341   A->spptr = 0;
342   ierr     = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
343   PetscFunctionReturn(0);
344 }
345 
346 
347 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJViennaCL(Mat B)
348 {
349   PetscErrorCode ierr;
350 
351   PetscFunctionBegin;
352   ierr = MatCreate_SeqAIJ(B);CHKERRQ(ierr);
353   ierr = MatConvert_SeqAIJ_SeqAIJViennaCL(B,MATSEQAIJVIENNACL,MAT_INPLACE_MATRIX,&B);
354   PetscFunctionReturn(0);
355 }
356 
357 static PetscErrorCode MatPinToCPU_SeqAIJViennaCL(Mat,PetscBool);
358 static PetscErrorCode MatDuplicate_SeqAIJViennaCL(Mat A,MatDuplicateOption cpvalues,Mat *B)
359 {
360   PetscErrorCode ierr;
361   Mat            C;
362 
363   PetscFunctionBegin;
364   ierr = MatDuplicate_SeqAIJ(A,cpvalues,B);CHKERRQ(ierr);
365   C = *B;
366 
367   ierr = MatPinToCPU_SeqAIJViennaCL(A,PETSC_FALSE);CHKERRQ(ierr);
368   C->ops->pintocpu = MatPinToCPU_SeqAIJViennaCL;
369 
370   C->spptr        = new Mat_SeqAIJViennaCL();
371   ((Mat_SeqAIJViennaCL*)C->spptr)->tempvec        = NULL;
372   ((Mat_SeqAIJViennaCL*)C->spptr)->mat            = NULL;
373   ((Mat_SeqAIJViennaCL*)C->spptr)->compressed_mat = NULL;
374 
375   ierr = PetscObjectChangeTypeName((PetscObject)C,MATSEQAIJVIENNACL);CHKERRQ(ierr);
376 
377   C->valid_GPU_matrix = PETSC_OFFLOAD_UNALLOCATED;
378 
379   /* If the source matrix is already assembled, copy the destination matrix to the GPU */
380   if (C->assembled) {
381     ierr = MatViennaCLCopyToGPU(C);CHKERRQ(ierr);
382   }
383 
384   PetscFunctionReturn(0);
385 }
386 
387 static PetscErrorCode MatPinToCPU_SeqAIJViennaCL(Mat A,PetscBool flg)
388 {
389   PetscFunctionBegin;
390   A->pinnedtocpu = flg;
391   if (flg) {
392     A->ops->mult           = MatMult_SeqAIJ;
393     A->ops->multadd        = MatMultAdd_SeqAIJ;
394     A->ops->assemblyend    = MatAssemblyEnd_SeqAIJ;
395     A->ops->destroy        = MatDestroy_SeqAIJ;
396     A->ops->duplicate      = MatDuplicate_SeqAIJ;
397   } else {
398     A->ops->mult           = MatMult_SeqAIJViennaCL;
399     A->ops->multadd        = MatMultAdd_SeqAIJViennaCL;
400     A->ops->assemblyend    = MatAssemblyEnd_SeqAIJViennaCL;
401     A->ops->destroy        = MatDestroy_SeqAIJViennaCL;
402     A->ops->duplicate      = MatDuplicate_SeqAIJViennaCL;
403   }
404   PetscFunctionReturn(0);
405 }
406 
407 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJViennaCL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
408 {
409   PetscErrorCode ierr;
410   Mat            B;
411   Mat_SeqAIJ     *aij;
412 
413   PetscFunctionBegin;
414 
415   if (reuse == MAT_REUSE_MATRIX) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"MAT_REUSE_MATRIX is not supported. Consider using MAT_INPLACE_MATRIX instead");
416 
417   if (reuse == MAT_INITIAL_MATRIX) {
418     ierr = MatDuplicate(A,MAT_COPY_VALUES,newmat);CHKERRQ(ierr);
419   }
420 
421   B = *newmat;
422 
423   aij             = (Mat_SeqAIJ*)B->data;
424   aij->inode.use  = PETSC_FALSE;
425 
426   B->spptr        = new Mat_SeqAIJViennaCL();
427 
428   ((Mat_SeqAIJViennaCL*)B->spptr)->tempvec        = NULL;
429   ((Mat_SeqAIJViennaCL*)B->spptr)->mat            = NULL;
430   ((Mat_SeqAIJViennaCL*)B->spptr)->compressed_mat = NULL;
431 
432   ierr = MatPinToCPU_SeqAIJViennaCL(A,PETSC_FALSE);CHKERRQ(ierr);
433   A->ops->pintocpu = MatPinToCPU_SeqAIJViennaCL;
434 
435   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJVIENNACL);CHKERRQ(ierr);
436   ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr);
437   ierr = PetscStrallocpy(VECVIENNACL,&B->defaultvectype);CHKERRQ(ierr);
438 
439   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqaij_seqaijviennacl_C",MatConvert_SeqAIJ_SeqAIJViennaCL);CHKERRQ(ierr);
440 
441   B->valid_GPU_matrix = PETSC_OFFLOAD_UNALLOCATED;
442 
443   /* If the source matrix is already assembled, copy the destination matrix to the GPU */
444   if (B->assembled) {
445     ierr = MatViennaCLCopyToGPU(B);CHKERRQ(ierr);
446   }
447 
448   PetscFunctionReturn(0);
449 }
450 
451 
452 /*MC
453    MATSEQAIJVIENNACL - MATAIJVIENNACL = "aijviennacl" = "seqaijviennacl" - A matrix type to be used for sparse matrices.
454 
455    A matrix type type whose data resides on GPUs. These matrices are in CSR format by
456    default. All matrix calculations are performed using the ViennaCL library.
457 
458    Options Database Keys:
459 +  -mat_type aijviennacl - sets the matrix type to "seqaijviennacl" during a call to MatSetFromOptions()
460 .  -mat_viennacl_storage_format csr - sets the storage format of matrices for MatMult during a call to MatSetFromOptions().
461 -  -mat_viennacl_mult_storage_format csr - sets the storage format of matrices for MatMult during a call to MatSetFromOptions().
462 
463   Level: beginner
464 
465 .seealso: MatCreateSeqAIJViennaCL(), MATAIJVIENNACL, MatCreateAIJViennaCL()
466 M*/
467 
468 
469 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ViennaCL(void)
470 {
471   PetscErrorCode ierr;
472 
473   PetscFunctionBegin;
474   ierr = MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL,    MAT_FACTOR_LU,MatGetFactor_seqaij_petsc);CHKERRQ(ierr);
475   ierr = MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL,    MAT_FACTOR_CHOLESKY,MatGetFactor_seqaij_petsc);CHKERRQ(ierr);
476   ierr = MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL,    MAT_FACTOR_ILU,MatGetFactor_seqaij_petsc);CHKERRQ(ierr);
477   ierr = MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL,    MAT_FACTOR_ICC,MatGetFactor_seqaij_petsc);CHKERRQ(ierr);
478   PetscFunctionReturn(0);
479 }
480