xref: /petsc/src/mat/impls/aij/seq/seqviennacl/aijviennacl.cxx (revision e38f01bd562bb9c093f0c5a16b3a2cf03e50c0e1)
1 
2 
3 /*
4     Defines the basic matrix operations for the AIJ (compressed row)
5   matrix storage format.
6 */
7 
8 #include <petscconf.h>
9 #include <../src/mat/impls/aij/seq/aij.h>          /*I "petscmat.h" I*/
10 #include <petscbt.h>
11 #include <../src/vec/vec/impls/dvecimpl.h>
12 #include <petsc/private/vecimpl.h>
13 
14 #include <../src/mat/impls/aij/seq/seqviennacl/viennaclmatimpl.h>
15 
16 
17 #include <algorithm>
18 #include <vector>
19 #include <string>
20 
21 #include "viennacl/linalg/prod.hpp"
22 
23 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJViennaCL(Mat A, MatType type, MatReuse reuse, Mat *newmat);
24 PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat,MatFactorType,Mat*);
25 
26 
27 PetscErrorCode MatViennaCLCopyToGPU(Mat A)
28 {
29 
30   Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr;
31   Mat_SeqAIJ         *a              = (Mat_SeqAIJ*)A->data;
32   PetscErrorCode     ierr;
33 
34   PetscFunctionBegin;
35   if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) { //some OpenCL SDKs have issues with buffers of size 0
36     if (A->valid_GPU_matrix == PETSC_OFFLOAD_UNALLOCATED || A->valid_GPU_matrix == PETSC_OFFLOAD_CPU) {
37       ierr = PetscLogEventBegin(MAT_ViennaCLCopyToGPU,A,0,0,0);CHKERRQ(ierr);
38 
39       try {
40         if (a->compressedrow.use) {
41           if (!viennaclstruct->compressed_mat) viennaclstruct->compressed_mat = new ViennaCLCompressedAIJMatrix();
42 
43           // Since PetscInt is different from cl_uint, we have to convert:
44           viennacl::backend::mem_handle dummy;
45 
46           viennacl::backend::typesafe_host_array<unsigned int> row_buffer; row_buffer.raw_resize(dummy, a->compressedrow.nrows+1);
47           for (PetscInt i=0; i<=a->compressedrow.nrows; ++i)
48             row_buffer.set(i, (a->compressedrow.i)[i]);
49 
50           viennacl::backend::typesafe_host_array<unsigned int> row_indices; row_indices.raw_resize(dummy, a->compressedrow.nrows);
51           for (PetscInt i=0; i<a->compressedrow.nrows; ++i)
52             row_indices.set(i, (a->compressedrow.rindex)[i]);
53 
54           viennacl::backend::typesafe_host_array<unsigned int> col_buffer; col_buffer.raw_resize(dummy, a->nz);
55           for (PetscInt i=0; i<a->nz; ++i)
56             col_buffer.set(i, (a->j)[i]);
57 
58           viennaclstruct->compressed_mat->set(row_buffer.get(), row_indices.get(), col_buffer.get(), a->a, A->rmap->n, A->cmap->n, a->compressedrow.nrows, a->nz);
59           ierr = PetscLogCpuToGpu(((2*a->compressedrow.nrows)+1+a->nz)*sizeof(PetscInt) + (a->nz)*sizeof(PetscScalar));CHKERRQ(ierr);
60         } else {
61           if (!viennaclstruct->mat) viennaclstruct->mat = new ViennaCLAIJMatrix();
62 
63           // Since PetscInt is in general different from cl_uint, we have to convert:
64           viennacl::backend::mem_handle dummy;
65 
66           viennacl::backend::typesafe_host_array<unsigned int> row_buffer; row_buffer.raw_resize(dummy, A->rmap->n+1);
67           for (PetscInt i=0; i<=A->rmap->n; ++i)
68             row_buffer.set(i, (a->i)[i]);
69 
70           viennacl::backend::typesafe_host_array<unsigned int> col_buffer; col_buffer.raw_resize(dummy, a->nz);
71           for (PetscInt i=0; i<a->nz; ++i)
72             col_buffer.set(i, (a->j)[i]);
73 
74           viennaclstruct->mat->set(row_buffer.get(), col_buffer.get(), a->a, A->rmap->n, A->cmap->n, a->nz);
75           ierr = PetscLogCpuToGpu(((A->rmap->n+1)+a->nz)*sizeof(PetscInt)+(a->nz)*sizeof(PetscScalar));CHKERRQ(ierr);
76         }
77         ViennaCLWaitForGPU();
78       } catch(std::exception const & ex) {
79         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
80       }
81 
82       // Create temporary vector for v += A*x:
83       if (viennaclstruct->tempvec) {
84         if (viennaclstruct->tempvec->size() != static_cast<std::size_t>(A->rmap->n)) {
85           delete (ViennaCLVector*)viennaclstruct->tempvec;
86           viennaclstruct->tempvec = new ViennaCLVector(A->rmap->n);
87         } else {
88           viennaclstruct->tempvec->clear();
89         }
90       } else {
91         viennaclstruct->tempvec = new ViennaCLVector(A->rmap->n);
92       }
93 
94       A->valid_GPU_matrix = PETSC_OFFLOAD_BOTH;
95 
96       ierr = PetscLogEventEnd(MAT_ViennaCLCopyToGPU,A,0,0,0);CHKERRQ(ierr);
97     }
98   }
99   PetscFunctionReturn(0);
100 }
101 
102 PetscErrorCode MatViennaCLCopyFromGPU(Mat A, const ViennaCLAIJMatrix *Agpu)
103 {
104   Mat_SeqAIJ         *a              = (Mat_SeqAIJ*)A->data;
105   PetscInt           m               = A->rmap->n;
106   PetscErrorCode     ierr;
107 
108 
109   PetscFunctionBegin;
110   if (A->valid_GPU_matrix == PETSC_OFFLOAD_UNALLOCATED) {
111     try {
112       if (a->compressedrow.use) SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_ARG_WRONG, "ViennaCL: Cannot handle row compression for GPU matrices");
113       else {
114 
115         if ((PetscInt)Agpu->size1() != m) SETERRQ2(PETSC_COMM_WORLD, PETSC_ERR_ARG_SIZ, "GPU matrix has %d rows, should be %d", Agpu->size1(), m);
116         a->nz           = Agpu->nnz();
117         a->maxnz        = a->nz; /* Since we allocate exactly the right amount */
118         A->preallocated = PETSC_TRUE;
119         if (a->singlemalloc) {
120           if (a->a) {ierr = PetscFree3(a->a,a->j,a->i);CHKERRQ(ierr);}
121         } else {
122           if (a->i) {ierr = PetscFree(a->i);CHKERRQ(ierr);}
123           if (a->j) {ierr = PetscFree(a->j);CHKERRQ(ierr);}
124           if (a->a) {ierr = PetscFree(a->a);CHKERRQ(ierr);}
125         }
126         ierr = PetscMalloc3(a->nz,&a->a,a->nz,&a->j,m+1,&a->i);CHKERRQ(ierr);
127         ierr = PetscLogObjectMemory((PetscObject)A, a->nz*(sizeof(PetscScalar)+sizeof(PetscInt))+(m+1)*sizeof(PetscInt));CHKERRQ(ierr);
128 
129         a->singlemalloc = PETSC_TRUE;
130 
131         /* Setup row lengths */
132         ierr = PetscFree(a->imax);CHKERRQ(ierr);
133         ierr = PetscFree(a->ilen);CHKERRQ(ierr);
134         ierr = PetscMalloc1(m,&a->imax);CHKERRQ(ierr);
135         ierr = PetscMalloc1(m,&a->ilen);CHKERRQ(ierr);
136         ierr = PetscLogObjectMemory((PetscObject)A, 2*m*sizeof(PetscInt));CHKERRQ(ierr);
137 
138         /* Copy data back from GPU */
139         viennacl::backend::typesafe_host_array<unsigned int> row_buffer; row_buffer.raw_resize(Agpu->handle1(), Agpu->size1() + 1);
140 
141         // copy row array
142         viennacl::backend::memory_read(Agpu->handle1(), 0, row_buffer.raw_size(), row_buffer.get());
143         (a->i)[0] = row_buffer[0];
144         for (PetscInt i = 0; i < (PetscInt)Agpu->size1(); ++i) {
145           (a->i)[i+1] = row_buffer[i+1];
146           a->imax[i]  = a->ilen[i] = a->i[i+1] - a->i[i];  //Set imax[] and ilen[] arrays at the same time as i[] for better cache reuse
147         }
148 
149         // copy column indices
150         viennacl::backend::typesafe_host_array<unsigned int> col_buffer; col_buffer.raw_resize(Agpu->handle2(), Agpu->nnz());
151         viennacl::backend::memory_read(Agpu->handle2(), 0, col_buffer.raw_size(), col_buffer.get());
152         for (PetscInt i=0; i < (PetscInt)Agpu->nnz(); ++i)
153           (a->j)[i] = col_buffer[i];
154 
155         // copy nonzero entries directly to destination (no conversion required)
156         viennacl::backend::memory_read(Agpu->handle(), 0, sizeof(PetscScalar)*Agpu->nnz(), a->a);
157 
158         ierr = PetscLogGpuToCpu(row_buffer.raw_size()+col_buffer.raw_size()+(Agpu->nnz()*sizeof(PetscScalar)));CHKERRQ(ierr);
159         ViennaCLWaitForGPU();
160         /* TODO: Once a->diag is moved out of MatAssemblyEnd(), invalidate it here. */
161       }
162     } catch(std::exception const & ex) {
163       SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
164     }
165 
166     /* This assembly prevents resetting the flag to PETSC_OFFLOAD_CPU and recopying */
167     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
168     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
169 
170     A->valid_GPU_matrix = PETSC_OFFLOAD_BOTH;
171   } else SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_ARG_WRONG, "ViennaCL error: Only valid for unallocated GPU matrices");
172   PetscFunctionReturn(0);
173 }
174 
175 PetscErrorCode MatMult_SeqAIJViennaCL(Mat A,Vec xx,Vec yy)
176 {
177   Mat_SeqAIJ           *a = (Mat_SeqAIJ*)A->data;
178   PetscErrorCode       ierr;
179   Mat_SeqAIJViennaCL   *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr;
180   const ViennaCLVector *xgpu=NULL;
181   ViennaCLVector       *ygpu=NULL;
182 
183   PetscFunctionBegin;
184   if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) {
185     ierr = VecViennaCLGetArrayRead(xx,&xgpu);CHKERRQ(ierr);
186     ierr = VecViennaCLGetArrayWrite(yy,&ygpu);CHKERRQ(ierr);
187     try {
188       if (a->compressedrow.use) {
189         *ygpu = viennacl::linalg::prod(*viennaclstruct->compressed_mat, *xgpu);
190       } else {
191         *ygpu = viennacl::linalg::prod(*viennaclstruct->mat,*xgpu);
192       }
193       ViennaCLWaitForGPU();
194     } catch (std::exception const & ex) {
195       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
196     }
197     ierr = VecViennaCLRestoreArrayRead(xx,&xgpu);CHKERRQ(ierr);
198     ierr = VecViennaCLRestoreArrayWrite(yy,&ygpu);CHKERRQ(ierr);
199     ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
200   } else {
201     ierr = VecSet(yy,0);CHKERRQ(ierr);
202   }
203   PetscFunctionReturn(0);
204 }
205 
206 PetscErrorCode MatMultAdd_SeqAIJViennaCL(Mat A,Vec xx,Vec yy,Vec zz)
207 {
208   Mat_SeqAIJ           *a = (Mat_SeqAIJ*)A->data;
209   PetscErrorCode       ierr;
210   Mat_SeqAIJViennaCL   *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr;
211   const ViennaCLVector *xgpu=NULL,*ygpu=NULL;
212   ViennaCLVector       *zgpu=NULL;
213 
214   PetscFunctionBegin;
215   if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) {
216     try {
217       ierr = VecViennaCLGetArrayRead(xx,&xgpu);CHKERRQ(ierr);
218       ierr = VecViennaCLGetArrayRead(yy,&ygpu);CHKERRQ(ierr);
219       ierr = VecViennaCLGetArrayWrite(zz,&zgpu);CHKERRQ(ierr);
220 
221       if (a->compressedrow.use) {
222         ViennaCLVector temp = viennacl::linalg::prod(*viennaclstruct->compressed_mat, *xgpu);
223         *zgpu = *ygpu + temp;
224         ViennaCLWaitForGPU();
225       } else {
226         if (zz == xx || zz == yy) { //temporary required
227           ViennaCLVector temp = viennacl::linalg::prod(*viennaclstruct->mat, *xgpu);
228           *zgpu = *ygpu;
229           *zgpu += temp;
230           ViennaCLWaitForGPU();
231         } else {
232           *viennaclstruct->tempvec = viennacl::linalg::prod(*viennaclstruct->mat, *xgpu);
233           *zgpu = *ygpu + *viennaclstruct->tempvec;
234           ViennaCLWaitForGPU();
235         }
236       }
237 
238       ierr = VecViennaCLRestoreArrayRead(xx,&xgpu);CHKERRQ(ierr);
239       ierr = VecViennaCLRestoreArrayRead(yy,&ygpu);CHKERRQ(ierr);
240       ierr = VecViennaCLRestoreArrayWrite(zz,&zgpu);CHKERRQ(ierr);
241 
242     } catch(std::exception const & ex) {
243       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
244     }
245     ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
246   } else {
247     ierr = VecCopy(yy,zz);CHKERRQ(ierr);
248   }
249   PetscFunctionReturn(0);
250 }
251 
252 PetscErrorCode MatAssemblyEnd_SeqAIJViennaCL(Mat A,MatAssemblyType mode)
253 {
254   PetscErrorCode ierr;
255 
256   PetscFunctionBegin;
257   ierr = MatAssemblyEnd_SeqAIJ(A,mode);CHKERRQ(ierr);
258   if (!A->pinnedtocpu) {
259     ierr = MatViennaCLCopyToGPU(A);CHKERRQ(ierr);
260   }
261   PetscFunctionReturn(0);
262 }
263 
264 /* --------------------------------------------------------------------------------*/
265 /*@
266    MatCreateSeqAIJViennaCL - Creates a sparse matrix in AIJ (compressed row) format
267    (the default parallel PETSc format).  This matrix will ultimately be pushed down
268    to GPUs and use the ViennaCL library for calculations. For good matrix
269    assembly performance the user should preallocate the matrix storage by setting
270    the parameter nz (or the array nnz).  By setting these parameters accurately,
271    performance during matrix assembly can be increased substantially.
272 
273 
274    Collective
275 
276    Input Parameters:
277 +  comm - MPI communicator, set to PETSC_COMM_SELF
278 .  m - number of rows
279 .  n - number of columns
280 .  nz - number of nonzeros per row (same for all rows)
281 -  nnz - array containing the number of nonzeros in the various rows
282          (possibly different for each row) or NULL
283 
284    Output Parameter:
285 .  A - the matrix
286 
287    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
288    MatXXXXSetPreallocation() paradigm instead of this routine directly.
289    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
290 
291    Notes:
292    If nnz is given then nz is ignored
293 
294    The AIJ format (also called the Yale sparse matrix format or
295    compressed row storage), is fully compatible with standard Fortran 77
296    storage.  That is, the stored row and column indices can begin at
297    either one (as in Fortran) or zero.  See the users' manual for details.
298 
299    Specify the preallocated storage with either nz or nnz (not both).
300    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
301    allocation.  For large problems you MUST preallocate memory or you
302    will get TERRIBLE performance, see the users' manual chapter on matrices.
303 
304    Level: intermediate
305 
306 .seealso: MatCreate(), MatCreateAIJ(), MatCreateAIJCUSPARSE(), MatSetValues(), MatSeqAIJSetColumnIndices(), MatCreateSeqAIJWithArrays(), MatCreateAIJ()
307 
308 @*/
309 PetscErrorCode  MatCreateSeqAIJViennaCL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
310 {
311   PetscErrorCode ierr;
312 
313   PetscFunctionBegin;
314   ierr = MatCreate(comm,A);CHKERRQ(ierr);
315   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
316   ierr = MatSetType(*A,MATSEQAIJVIENNACL);CHKERRQ(ierr);
317   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,(PetscInt*)nnz);CHKERRQ(ierr);
318   PetscFunctionReturn(0);
319 }
320 
321 
322 PetscErrorCode MatDestroy_SeqAIJViennaCL(Mat A)
323 {
324   PetscErrorCode ierr;
325   Mat_SeqAIJViennaCL *viennaclcontainer = (Mat_SeqAIJViennaCL*)A->spptr;
326 
327   PetscFunctionBegin;
328   try {
329     if (viennaclcontainer) {
330       delete viennaclcontainer->tempvec;
331       delete viennaclcontainer->mat;
332       delete viennaclcontainer->compressed_mat;
333       delete viennaclcontainer;
334     }
335     A->valid_GPU_matrix = PETSC_OFFLOAD_UNALLOCATED;
336   } catch(std::exception const & ex) {
337     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
338   }
339 
340   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqaij_seqaijviennacl_C",NULL);CHKERRQ(ierr);
341 
342   /* this next line is because MatDestroy tries to PetscFree spptr if it is not zero, and PetscFree only works if the memory was allocated with PetscNew or PetscMalloc, which don't call the constructor */
343   A->spptr = 0;
344   ierr     = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
345   PetscFunctionReturn(0);
346 }
347 
348 
349 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJViennaCL(Mat B)
350 {
351   PetscErrorCode ierr;
352 
353   PetscFunctionBegin;
354   ierr = MatCreate_SeqAIJ(B);CHKERRQ(ierr);
355   ierr = MatConvert_SeqAIJ_SeqAIJViennaCL(B,MATSEQAIJVIENNACL,MAT_INPLACE_MATRIX,&B);
356   PetscFunctionReturn(0);
357 }
358 
359 static PetscErrorCode MatPinToCPU_SeqAIJViennaCL(Mat,PetscBool);
360 static PetscErrorCode MatDuplicate_SeqAIJViennaCL(Mat A,MatDuplicateOption cpvalues,Mat *B)
361 {
362   PetscErrorCode ierr;
363   Mat            C;
364 
365   PetscFunctionBegin;
366   ierr = MatDuplicate_SeqAIJ(A,cpvalues,B);CHKERRQ(ierr);
367   C = *B;
368 
369   ierr = MatPinToCPU_SeqAIJViennaCL(A,PETSC_FALSE);CHKERRQ(ierr);
370   C->ops->pintocpu = MatPinToCPU_SeqAIJViennaCL;
371 
372   C->spptr        = new Mat_SeqAIJViennaCL();
373   ((Mat_SeqAIJViennaCL*)C->spptr)->tempvec        = NULL;
374   ((Mat_SeqAIJViennaCL*)C->spptr)->mat            = NULL;
375   ((Mat_SeqAIJViennaCL*)C->spptr)->compressed_mat = NULL;
376 
377   ierr = PetscObjectChangeTypeName((PetscObject)C,MATSEQAIJVIENNACL);CHKERRQ(ierr);
378 
379   C->valid_GPU_matrix = PETSC_OFFLOAD_UNALLOCATED;
380 
381   /* If the source matrix is already assembled, copy the destination matrix to the GPU */
382   if (C->assembled) {
383     ierr = MatViennaCLCopyToGPU(C);CHKERRQ(ierr);
384   }
385 
386   PetscFunctionReturn(0);
387 }
388 
389 static PetscErrorCode MatPinToCPU_SeqAIJViennaCL(Mat A,PetscBool flg)
390 {
391   PetscFunctionBegin;
392   A->pinnedtocpu = flg;
393   if (flg) {
394     A->ops->mult           = MatMult_SeqAIJ;
395     A->ops->multadd        = MatMultAdd_SeqAIJ;
396     A->ops->assemblyend    = MatAssemblyEnd_SeqAIJ;
397     A->ops->duplicate      = MatDuplicate_SeqAIJ;
398   } else {
399     A->ops->mult           = MatMult_SeqAIJViennaCL;
400     A->ops->multadd        = MatMultAdd_SeqAIJViennaCL;
401     A->ops->assemblyend    = MatAssemblyEnd_SeqAIJViennaCL;
402     A->ops->destroy        = MatDestroy_SeqAIJViennaCL;
403     A->ops->duplicate      = MatDuplicate_SeqAIJViennaCL;
404   }
405   PetscFunctionReturn(0);
406 }
407 
408 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJViennaCL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
409 {
410   PetscErrorCode ierr;
411   Mat            B;
412   Mat_SeqAIJ     *aij;
413 
414   PetscFunctionBegin;
415 
416   if (reuse == MAT_REUSE_MATRIX) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"MAT_REUSE_MATRIX is not supported. Consider using MAT_INPLACE_MATRIX instead");
417 
418   if (reuse == MAT_INITIAL_MATRIX) {
419     ierr = MatDuplicate(A,MAT_COPY_VALUES,newmat);CHKERRQ(ierr);
420   }
421 
422   B = *newmat;
423 
424   aij             = (Mat_SeqAIJ*)B->data;
425   aij->inode.use  = PETSC_FALSE;
426 
427   B->spptr        = new Mat_SeqAIJViennaCL();
428 
429   ((Mat_SeqAIJViennaCL*)B->spptr)->tempvec        = NULL;
430   ((Mat_SeqAIJViennaCL*)B->spptr)->mat            = NULL;
431   ((Mat_SeqAIJViennaCL*)B->spptr)->compressed_mat = NULL;
432 
433   ierr = MatPinToCPU_SeqAIJViennaCL(A,PETSC_FALSE);CHKERRQ(ierr);
434   A->ops->pintocpu = MatPinToCPU_SeqAIJViennaCL;
435 
436   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJVIENNACL);CHKERRQ(ierr);
437   ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr);
438   ierr = PetscStrallocpy(VECVIENNACL,&B->defaultvectype);CHKERRQ(ierr);
439 
440   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqaij_seqaijviennacl_C",MatConvert_SeqAIJ_SeqAIJViennaCL);CHKERRQ(ierr);
441 
442   B->valid_GPU_matrix = PETSC_OFFLOAD_UNALLOCATED;
443 
444   /* If the source matrix is already assembled, copy the destination matrix to the GPU */
445   if (B->assembled) {
446     ierr = MatViennaCLCopyToGPU(B);CHKERRQ(ierr);
447   }
448 
449   PetscFunctionReturn(0);
450 }
451 
452 
453 /*MC
454    MATSEQAIJVIENNACL - MATAIJVIENNACL = "aijviennacl" = "seqaijviennacl" - A matrix type to be used for sparse matrices.
455 
456    A matrix type type whose data resides on GPUs. These matrices are in CSR format by
457    default. All matrix calculations are performed using the ViennaCL library.
458 
459    Options Database Keys:
460 +  -mat_type aijviennacl - sets the matrix type to "seqaijviennacl" during a call to MatSetFromOptions()
461 .  -mat_viennacl_storage_format csr - sets the storage format of matrices for MatMult during a call to MatSetFromOptions().
462 -  -mat_viennacl_mult_storage_format csr - sets the storage format of matrices for MatMult during a call to MatSetFromOptions().
463 
464   Level: beginner
465 
466 .seealso: MatCreateSeqAIJViennaCL(), MATAIJVIENNACL, MatCreateAIJViennaCL()
467 M*/
468 
469 
470 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ViennaCL(void)
471 {
472   PetscErrorCode ierr;
473 
474   PetscFunctionBegin;
475   ierr = MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL,    MAT_FACTOR_LU,MatGetFactor_seqaij_petsc);CHKERRQ(ierr);
476   ierr = MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL,    MAT_FACTOR_CHOLESKY,MatGetFactor_seqaij_petsc);CHKERRQ(ierr);
477   ierr = MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL,    MAT_FACTOR_ILU,MatGetFactor_seqaij_petsc);CHKERRQ(ierr);
478   ierr = MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL,    MAT_FACTOR_ICC,MatGetFactor_seqaij_petsc);CHKERRQ(ierr);
479   PetscFunctionReturn(0);
480 }
481