xref: /petsc/src/mat/impls/aij/seq/seqviennacl/aijviennacl.cxx (revision a5bc1bf344cccdca9c53fc2bcd040c7b504fff0e)
1 
2 
3 /*
4     Defines the basic matrix operations for the AIJ (compressed row)
5   matrix storage format.
6 */
7 
8 #include <petscconf.h>
9 #define PETSC_SKIP_IMMINTRIN_H_CUDAWORKAROUND 1
10 #include <../src/mat/impls/aij/seq/aij.h>          /*I "petscmat.h" I*/
11 #include <petscbt.h>
12 #include <../src/vec/vec/impls/dvecimpl.h>
13 #include <petsc/private/vecimpl.h>
14 
15 #include <../src/mat/impls/aij/seq/seqviennacl/viennaclmatimpl.h>
16 
17 
18 #include <algorithm>
19 #include <vector>
20 #include <string>
21 
22 #include "viennacl/linalg/prod.hpp"
23 
24 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJViennaCL(Mat A, MatType type, MatReuse reuse, Mat *newmat);
25 PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat,MatFactorType,Mat*);
26 
27 
28 PetscErrorCode MatViennaCLCopyToGPU(Mat A)
29 {
30   Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr;
31   Mat_SeqAIJ         *a              = (Mat_SeqAIJ*)A->data;
32   PetscErrorCode     ierr;
33 
34   PetscFunctionBegin;
35   if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) { //some OpenCL SDKs have issues with buffers of size 0
36     if (A->offloadmask == PETSC_OFFLOAD_UNALLOCATED || A->offloadmask == PETSC_OFFLOAD_CPU) {
37       ierr = PetscLogEventBegin(MAT_ViennaCLCopyToGPU,A,0,0,0);CHKERRQ(ierr);
38 
39       try {
40         if (a->compressedrow.use) {
41           if (!viennaclstruct->compressed_mat) viennaclstruct->compressed_mat = new ViennaCLCompressedAIJMatrix();
42 
43           // Since PetscInt is different from cl_uint, we have to convert:
44           viennacl::backend::mem_handle dummy;
45 
46           viennacl::backend::typesafe_host_array<unsigned int> row_buffer; row_buffer.raw_resize(dummy, a->compressedrow.nrows+1);
47           for (PetscInt i=0; i<=a->compressedrow.nrows; ++i)
48             row_buffer.set(i, (a->compressedrow.i)[i]);
49 
50           viennacl::backend::typesafe_host_array<unsigned int> row_indices; row_indices.raw_resize(dummy, a->compressedrow.nrows);
51           for (PetscInt i=0; i<a->compressedrow.nrows; ++i)
52             row_indices.set(i, (a->compressedrow.rindex)[i]);
53 
54           viennacl::backend::typesafe_host_array<unsigned int> col_buffer; col_buffer.raw_resize(dummy, a->nz);
55           for (PetscInt i=0; i<a->nz; ++i)
56             col_buffer.set(i, (a->j)[i]);
57 
58           viennaclstruct->compressed_mat->set(row_buffer.get(), row_indices.get(), col_buffer.get(), a->a, A->rmap->n, A->cmap->n, a->compressedrow.nrows, a->nz);
59           ierr = PetscLogCpuToGpu(((2*a->compressedrow.nrows)+1+a->nz)*sizeof(PetscInt) + (a->nz)*sizeof(PetscScalar));CHKERRQ(ierr);
60         } else {
61           if (!viennaclstruct->mat) viennaclstruct->mat = new ViennaCLAIJMatrix();
62 
63           // Since PetscInt is in general different from cl_uint, we have to convert:
64           viennacl::backend::mem_handle dummy;
65 
66           viennacl::backend::typesafe_host_array<unsigned int> row_buffer; row_buffer.raw_resize(dummy, A->rmap->n+1);
67           for (PetscInt i=0; i<=A->rmap->n; ++i)
68             row_buffer.set(i, (a->i)[i]);
69 
70           viennacl::backend::typesafe_host_array<unsigned int> col_buffer; col_buffer.raw_resize(dummy, a->nz);
71           for (PetscInt i=0; i<a->nz; ++i)
72             col_buffer.set(i, (a->j)[i]);
73 
74           viennaclstruct->mat->set(row_buffer.get(), col_buffer.get(), a->a, A->rmap->n, A->cmap->n, a->nz);
75           ierr = PetscLogCpuToGpu(((A->rmap->n+1)+a->nz)*sizeof(PetscInt)+(a->nz)*sizeof(PetscScalar));CHKERRQ(ierr);
76         }
77         ViennaCLWaitForGPU();
78       } catch(std::exception const & ex) {
79         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
80       }
81 
82       // Create temporary vector for v += A*x:
83       if (viennaclstruct->tempvec) {
84         if (viennaclstruct->tempvec->size() != static_cast<std::size_t>(A->rmap->n)) {
85           delete (ViennaCLVector*)viennaclstruct->tempvec;
86           viennaclstruct->tempvec = new ViennaCLVector(A->rmap->n);
87         } else {
88           viennaclstruct->tempvec->clear();
89         }
90       } else {
91         viennaclstruct->tempvec = new ViennaCLVector(A->rmap->n);
92       }
93 
94       A->offloadmask = PETSC_OFFLOAD_BOTH;
95 
96       ierr = PetscLogEventEnd(MAT_ViennaCLCopyToGPU,A,0,0,0);CHKERRQ(ierr);
97     }
98   }
99   PetscFunctionReturn(0);
100 }
101 
102 PetscErrorCode MatViennaCLCopyFromGPU(Mat A, const ViennaCLAIJMatrix *Agpu)
103 {
104   Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr;
105   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
106   PetscInt           m  = A->rmap->n;
107   PetscErrorCode     ierr;
108 
109   PetscFunctionBegin;
110   if (A->offloadmask == PETSC_OFFLOAD_BOTH) PetscFunctionReturn(0);
111   if (A->offloadmask == PETSC_OFFLOAD_UNALLOCATED && Agpu) {
112     try {
113       if (a->compressedrow.use) SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_ARG_WRONG, "ViennaCL: Cannot handle row compression for GPU matrices");
114       else {
115 
116         if ((PetscInt)Agpu->size1() != m) SETERRQ2(PETSC_COMM_WORLD, PETSC_ERR_ARG_SIZ, "GPU matrix has %d rows, should be %d", Agpu->size1(), m);
117         a->nz           = Agpu->nnz();
118         a->maxnz        = a->nz; /* Since we allocate exactly the right amount */
119         A->preallocated = PETSC_TRUE;
120         if (a->singlemalloc) {
121           if (a->a) {ierr = PetscFree3(a->a,a->j,a->i);CHKERRQ(ierr);}
122         } else {
123           if (a->i) {ierr = PetscFree(a->i);CHKERRQ(ierr);}
124           if (a->j) {ierr = PetscFree(a->j);CHKERRQ(ierr);}
125           if (a->a) {ierr = PetscFree(a->a);CHKERRQ(ierr);}
126         }
127         ierr = PetscMalloc3(a->nz,&a->a,a->nz,&a->j,m+1,&a->i);CHKERRQ(ierr);
128         ierr = PetscLogObjectMemory((PetscObject)A, a->nz*(sizeof(PetscScalar)+sizeof(PetscInt))+(m+1)*sizeof(PetscInt));CHKERRQ(ierr);
129 
130         a->singlemalloc = PETSC_TRUE;
131 
132         /* Setup row lengths */
133         ierr = PetscFree(a->imax);CHKERRQ(ierr);
134         ierr = PetscFree(a->ilen);CHKERRQ(ierr);
135         ierr = PetscMalloc1(m,&a->imax);CHKERRQ(ierr);
136         ierr = PetscMalloc1(m,&a->ilen);CHKERRQ(ierr);
137         ierr = PetscLogObjectMemory((PetscObject)A, 2*m*sizeof(PetscInt));CHKERRQ(ierr);
138 
139         /* Copy data back from GPU */
140         viennacl::backend::typesafe_host_array<unsigned int> row_buffer; row_buffer.raw_resize(Agpu->handle1(), Agpu->size1() + 1);
141 
142         // copy row array
143         viennacl::backend::memory_read(Agpu->handle1(), 0, row_buffer.raw_size(), row_buffer.get());
144         (a->i)[0] = row_buffer[0];
145         for (PetscInt i = 0; i < (PetscInt)Agpu->size1(); ++i) {
146           (a->i)[i+1] = row_buffer[i+1];
147           a->imax[i]  = a->ilen[i] = a->i[i+1] - a->i[i];  //Set imax[] and ilen[] arrays at the same time as i[] for better cache reuse
148         }
149 
150         // copy column indices
151         viennacl::backend::typesafe_host_array<unsigned int> col_buffer; col_buffer.raw_resize(Agpu->handle2(), Agpu->nnz());
152         viennacl::backend::memory_read(Agpu->handle2(), 0, col_buffer.raw_size(), col_buffer.get());
153         for (PetscInt i=0; i < (PetscInt)Agpu->nnz(); ++i)
154           (a->j)[i] = col_buffer[i];
155 
156         // copy nonzero entries directly to destination (no conversion required)
157         viennacl::backend::memory_read(Agpu->handle(), 0, sizeof(PetscScalar)*Agpu->nnz(), a->a);
158 
159         ierr = PetscLogGpuToCpu(row_buffer.raw_size()+col_buffer.raw_size()+(Agpu->nnz()*sizeof(PetscScalar)));CHKERRQ(ierr);
160         ViennaCLWaitForGPU();
161         /* TODO: Once a->diag is moved out of MatAssemblyEnd(), invalidate it here. */
162       }
163     } catch(std::exception const & ex) {
164       SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
165     }
166   } else if (A->offloadmask == PETSC_OFFLOAD_UNALLOCATED) {
167     PetscFunctionReturn(0);
168   } else {
169     if (!Agpu && A->offloadmask != PETSC_OFFLOAD_GPU) PetscFunctionReturn(0);
170 
171     if (a->compressedrow.use) SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_ARG_WRONG, "ViennaCL: Cannot handle row compression for GPU matrices");
172     if (!Agpu) {
173       viennacl::backend::memory_read(viennaclstruct->mat->handle(), 0, sizeof(PetscScalar)*viennaclstruct->mat->nnz(), a->a);
174     } else {
175       viennacl::backend::memory_read(Agpu->handle(), 0, sizeof(PetscScalar)*Agpu->nnz(), a->a);
176     }
177   }
178   A->offloadmask = PETSC_OFFLOAD_BOTH;
179   /* This assembly prevents resetting the flag to PETSC_OFFLOAD_CPU and recopying */
180   ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
181   ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
182   PetscFunctionReturn(0);
183 }
184 
185 PetscErrorCode MatMult_SeqAIJViennaCL(Mat A,Vec xx,Vec yy)
186 {
187   Mat_SeqAIJ           *a = (Mat_SeqAIJ*)A->data;
188   PetscErrorCode       ierr;
189   Mat_SeqAIJViennaCL   *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr;
190   const ViennaCLVector *xgpu=NULL;
191   ViennaCLVector       *ygpu=NULL;
192 
193   PetscFunctionBegin;
194   /* The line below is necessary due to the operations that modify the matrix on the CPU (axpy, scale, etc) */
195   ierr = MatViennaCLCopyToGPU(A);CHKERRQ(ierr);
196   if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) {
197     ierr = VecViennaCLGetArrayRead(xx,&xgpu);CHKERRQ(ierr);
198     ierr = VecViennaCLGetArrayWrite(yy,&ygpu);CHKERRQ(ierr);
199     ierr = PetscLogGpuTimeBegin();CHKERRQ(ierr);
200     try {
201       if (a->compressedrow.use) {
202         *ygpu = viennacl::linalg::prod(*viennaclstruct->compressed_mat, *xgpu);
203       } else {
204         *ygpu = viennacl::linalg::prod(*viennaclstruct->mat,*xgpu);
205       }
206       ViennaCLWaitForGPU();
207     } catch (std::exception const & ex) {
208       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
209     }
210     ierr = PetscLogGpuTimeEnd();CHKERRQ(ierr);
211     ierr = VecViennaCLRestoreArrayRead(xx,&xgpu);CHKERRQ(ierr);
212     ierr = VecViennaCLRestoreArrayWrite(yy,&ygpu);CHKERRQ(ierr);
213     ierr = PetscLogGpuFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
214   } else {
215     ierr = VecSet(yy,0);CHKERRQ(ierr);
216   }
217   PetscFunctionReturn(0);
218 }
219 
220 PetscErrorCode MatMultAdd_SeqAIJViennaCL(Mat A,Vec xx,Vec yy,Vec zz)
221 {
222   Mat_SeqAIJ           *a = (Mat_SeqAIJ*)A->data;
223   PetscErrorCode       ierr;
224   Mat_SeqAIJViennaCL   *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr;
225   const ViennaCLVector *xgpu=NULL,*ygpu=NULL;
226   ViennaCLVector       *zgpu=NULL;
227 
228   PetscFunctionBegin;
229   /* The line below is necessary due to the operations that modify the matrix on the CPU (axpy, scale, etc) */
230   ierr = MatViennaCLCopyToGPU(A);CHKERRQ(ierr);
231   if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) {
232     try {
233       ierr = VecViennaCLGetArrayRead(xx,&xgpu);CHKERRQ(ierr);
234       ierr = VecViennaCLGetArrayRead(yy,&ygpu);CHKERRQ(ierr);
235       ierr = VecViennaCLGetArrayWrite(zz,&zgpu);CHKERRQ(ierr);
236       ierr = PetscLogGpuTimeBegin();CHKERRQ(ierr);
237       if (a->compressedrow.use) *viennaclstruct->tempvec = viennacl::linalg::prod(*viennaclstruct->compressed_mat, *xgpu);
238       else *viennaclstruct->tempvec = viennacl::linalg::prod(*viennaclstruct->mat, *xgpu);
239       if (zz != yy) *zgpu = *ygpu + *viennaclstruct->tempvec;
240       else *zgpu += *viennaclstruct->tempvec;
241       ViennaCLWaitForGPU();
242       ierr = PetscLogGpuTimeEnd();CHKERRQ(ierr);
243       ierr = VecViennaCLRestoreArrayRead(xx,&xgpu);CHKERRQ(ierr);
244       ierr = VecViennaCLRestoreArrayRead(yy,&ygpu);CHKERRQ(ierr);
245       ierr = VecViennaCLRestoreArrayWrite(zz,&zgpu);CHKERRQ(ierr);
246 
247     } catch(std::exception const & ex) {
248       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
249     }
250     ierr = PetscLogGpuFlops(2.0*a->nz);CHKERRQ(ierr);
251   } else {
252     ierr = VecCopy(yy,zz);CHKERRQ(ierr);
253   }
254   PetscFunctionReturn(0);
255 }
256 
257 PetscErrorCode MatAssemblyEnd_SeqAIJViennaCL(Mat A,MatAssemblyType mode)
258 {
259   PetscErrorCode ierr;
260 
261   PetscFunctionBegin;
262   ierr = MatAssemblyEnd_SeqAIJ(A,mode);CHKERRQ(ierr);
263   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
264   if (!A->boundtocpu) {
265     ierr = MatViennaCLCopyToGPU(A);CHKERRQ(ierr);
266   }
267   PetscFunctionReturn(0);
268 }
269 
270 /* --------------------------------------------------------------------------------*/
271 /*@C
272    MatCreateSeqAIJViennaCL - Creates a sparse matrix in AIJ (compressed row) format
273    (the default parallel PETSc format).  This matrix will ultimately be pushed down
274    to GPUs and use the ViennaCL library for calculations. For good matrix
275    assembly performance the user should preallocate the matrix storage by setting
276    the parameter nz (or the array nnz).  By setting these parameters accurately,
277    performance during matrix assembly can be increased substantially.
278 
279 
280    Collective
281 
282    Input Parameters:
283 +  comm - MPI communicator, set to PETSC_COMM_SELF
284 .  m - number of rows
285 .  n - number of columns
286 .  nz - number of nonzeros per row (same for all rows)
287 -  nnz - array containing the number of nonzeros in the various rows
288          (possibly different for each row) or NULL
289 
290    Output Parameter:
291 .  A - the matrix
292 
293    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
294    MatXXXXSetPreallocation() paradigm instead of this routine directly.
295    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
296 
297    Notes:
298    If nnz is given then nz is ignored
299 
300    The AIJ format (also called the Yale sparse matrix format or
301    compressed row storage), is fully compatible with standard Fortran 77
302    storage.  That is, the stored row and column indices can begin at
303    either one (as in Fortran) or zero.  See the users' manual for details.
304 
305    Specify the preallocated storage with either nz or nnz (not both).
306    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
307    allocation.  For large problems you MUST preallocate memory or you
308    will get TERRIBLE performance, see the users' manual chapter on matrices.
309 
310    Level: intermediate
311 
312 .seealso: MatCreate(), MatCreateAIJ(), MatCreateAIJCUSPARSE(), MatSetValues(), MatSeqAIJSetColumnIndices(), MatCreateSeqAIJWithArrays(), MatCreateAIJ()
313 
314 @*/
315 PetscErrorCode  MatCreateSeqAIJViennaCL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
316 {
317   PetscErrorCode ierr;
318 
319   PetscFunctionBegin;
320   ierr = MatCreate(comm,A);CHKERRQ(ierr);
321   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
322   ierr = MatSetType(*A,MATSEQAIJVIENNACL);CHKERRQ(ierr);
323   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,(PetscInt*)nnz);CHKERRQ(ierr);
324   PetscFunctionReturn(0);
325 }
326 
327 
328 PetscErrorCode MatDestroy_SeqAIJViennaCL(Mat A)
329 {
330   PetscErrorCode ierr;
331   Mat_SeqAIJViennaCL *viennaclcontainer = (Mat_SeqAIJViennaCL*)A->spptr;
332 
333   PetscFunctionBegin;
334   try {
335     if (viennaclcontainer) {
336       delete viennaclcontainer->tempvec;
337       delete viennaclcontainer->mat;
338       delete viennaclcontainer->compressed_mat;
339       delete viennaclcontainer;
340     }
341     A->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
342   } catch(std::exception const & ex) {
343     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
344   }
345 
346   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqaij_seqaijviennacl_C",NULL);CHKERRQ(ierr);
347 
348   /* this next line is because MatDestroy tries to PetscFree spptr if it is not zero, and PetscFree only works if the memory was allocated with PetscNew or PetscMalloc, which don't call the constructor */
349   A->spptr = 0;
350   ierr     = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
351   PetscFunctionReturn(0);
352 }
353 
354 
355 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJViennaCL(Mat B)
356 {
357   PetscErrorCode ierr;
358 
359   PetscFunctionBegin;
360   ierr = MatCreate_SeqAIJ(B);CHKERRQ(ierr);
361   ierr = MatConvert_SeqAIJ_SeqAIJViennaCL(B,MATSEQAIJVIENNACL,MAT_INPLACE_MATRIX,&B);
362   PetscFunctionReturn(0);
363 }
364 
365 static PetscErrorCode MatBindToCPU_SeqAIJViennaCL(Mat,PetscBool);
366 static PetscErrorCode MatDuplicate_SeqAIJViennaCL(Mat A,MatDuplicateOption cpvalues,Mat *B)
367 {
368   PetscErrorCode ierr;
369   Mat            C;
370 
371   PetscFunctionBegin;
372   ierr = MatDuplicate_SeqAIJ(A,cpvalues,B);CHKERRQ(ierr);
373   C = *B;
374 
375   ierr = MatBindToCPU_SeqAIJViennaCL(A,PETSC_FALSE);CHKERRQ(ierr);
376   C->ops->bindtocpu = MatBindToCPU_SeqAIJViennaCL;
377 
378   C->spptr = new Mat_SeqAIJViennaCL();
379   ((Mat_SeqAIJViennaCL*)C->spptr)->tempvec        = NULL;
380   ((Mat_SeqAIJViennaCL*)C->spptr)->mat            = NULL;
381   ((Mat_SeqAIJViennaCL*)C->spptr)->compressed_mat = NULL;
382 
383   ierr = PetscObjectChangeTypeName((PetscObject)C,MATSEQAIJVIENNACL);CHKERRQ(ierr);
384 
385   C->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
386 
387   /* If the source matrix is already assembled, copy the destination matrix to the GPU */
388   if (C->assembled) {
389     ierr = MatViennaCLCopyToGPU(C);CHKERRQ(ierr);
390   }
391 
392   PetscFunctionReturn(0);
393 }
394 
395 static PetscErrorCode MatSeqAIJGetArray_SeqAIJViennaCL(Mat A,PetscScalar *array[])
396 {
397   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
398   PetscErrorCode ierr;
399 
400   PetscFunctionBegin;
401   ierr = MatViennaCLCopyFromGPU(A,(const ViennaCLAIJMatrix *)NULL);CHKERRQ(ierr);
402   *array = a->a;
403   A->offloadmask = PETSC_OFFLOAD_CPU;
404   PetscFunctionReturn(0);
405 }
406 
407 
408 static PetscErrorCode MatSeqAIJRestoreArray_SeqAIJViennaCL(Mat A,PetscScalar *array[])
409 {
410   PetscFunctionBegin;
411   *array = NULL;
412   PetscFunctionReturn(0);
413 }
414 
415 static PetscErrorCode MatBindToCPU_SeqAIJViennaCL(Mat A,PetscBool flg)
416 {
417   PetscErrorCode ierr;
418 
419   PetscFunctionBegin;
420   A->boundtocpu = flg;
421   if (flg) {
422     /* make sure we have an up-to-date copy on the CPU */
423     ierr = MatViennaCLCopyFromGPU(A,(const ViennaCLAIJMatrix *)NULL);CHKERRQ(ierr);
424     ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqAIJGetArray_C",MatSeqAIJGetArray_SeqAIJ);CHKERRQ(ierr);
425     ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqAIJRestoreArray_C",MatSeqAIJRestoreArray_SeqAIJ);CHKERRQ(ierr);
426 
427     A->ops->mult        = MatMult_SeqAIJ;
428     A->ops->multadd     = MatMultAdd_SeqAIJ;
429     A->ops->assemblyend = MatAssemblyEnd_SeqAIJ;
430     A->ops->duplicate   = MatDuplicate_SeqAIJ;
431   } else {
432     ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqAIJGetArray_C",MatSeqAIJGetArray_SeqAIJViennaCL);CHKERRQ(ierr);
433     ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqAIJRestoreArray_C",MatSeqAIJRestoreArray_SeqAIJViennaCL);CHKERRQ(ierr);
434 
435     A->ops->mult        = MatMult_SeqAIJViennaCL;
436     A->ops->multadd     = MatMultAdd_SeqAIJViennaCL;
437     A->ops->assemblyend = MatAssemblyEnd_SeqAIJViennaCL;
438     A->ops->destroy     = MatDestroy_SeqAIJViennaCL;
439     A->ops->duplicate   = MatDuplicate_SeqAIJViennaCL;
440   }
441   PetscFunctionReturn(0);
442 }
443 
444 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJViennaCL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
445 {
446   PetscErrorCode ierr;
447   Mat            B;
448   Mat_SeqAIJ     *aij;
449 
450   PetscFunctionBegin;
451 
452   if (reuse == MAT_REUSE_MATRIX) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"MAT_REUSE_MATRIX is not supported. Consider using MAT_INPLACE_MATRIX instead");
453 
454   if (reuse == MAT_INITIAL_MATRIX) {
455     ierr = MatDuplicate(A,MAT_COPY_VALUES,newmat);CHKERRQ(ierr);
456   }
457 
458   B = *newmat;
459 
460   aij             = (Mat_SeqAIJ*)B->data;
461   aij->inode.use  = PETSC_FALSE;
462 
463   B->spptr        = new Mat_SeqAIJViennaCL();
464 
465   ((Mat_SeqAIJViennaCL*)B->spptr)->tempvec        = NULL;
466   ((Mat_SeqAIJViennaCL*)B->spptr)->mat            = NULL;
467   ((Mat_SeqAIJViennaCL*)B->spptr)->compressed_mat = NULL;
468 
469   ierr = MatBindToCPU_SeqAIJViennaCL(A,PETSC_FALSE);CHKERRQ(ierr);
470   A->ops->bindtocpu = MatBindToCPU_SeqAIJViennaCL;
471 
472   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJVIENNACL);CHKERRQ(ierr);
473   ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr);
474   ierr = PetscStrallocpy(VECVIENNACL,&B->defaultvectype);CHKERRQ(ierr);
475 
476   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqaij_seqaijviennacl_C",MatConvert_SeqAIJ_SeqAIJViennaCL);CHKERRQ(ierr);
477 
478   B->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
479 
480   /* If the source matrix is already assembled, copy the destination matrix to the GPU */
481   if (B->assembled) {
482     ierr = MatViennaCLCopyToGPU(B);CHKERRQ(ierr);
483   }
484 
485   PetscFunctionReturn(0);
486 }
487 
488 
489 /*MC
490    MATSEQAIJVIENNACL - MATAIJVIENNACL = "aijviennacl" = "seqaijviennacl" - A matrix type to be used for sparse matrices.
491 
492    A matrix type type whose data resides on GPUs. These matrices are in CSR format by
493    default. All matrix calculations are performed using the ViennaCL library.
494 
495    Options Database Keys:
496 +  -mat_type aijviennacl - sets the matrix type to "seqaijviennacl" during a call to MatSetFromOptions()
497 .  -mat_viennacl_storage_format csr - sets the storage format of matrices for MatMult during a call to MatSetFromOptions().
498 -  -mat_viennacl_mult_storage_format csr - sets the storage format of matrices for MatMult during a call to MatSetFromOptions().
499 
500   Level: beginner
501 
502 .seealso: MatCreateSeqAIJViennaCL(), MATAIJVIENNACL, MatCreateAIJViennaCL()
503 M*/
504 
505 
506 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ViennaCL(void)
507 {
508   PetscErrorCode ierr;
509 
510   PetscFunctionBegin;
511   ierr = MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_LU,MatGetFactor_seqaij_petsc);CHKERRQ(ierr);
512   ierr = MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_CHOLESKY,MatGetFactor_seqaij_petsc);CHKERRQ(ierr);
513   ierr = MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_ILU,MatGetFactor_seqaij_petsc);CHKERRQ(ierr);
514   ierr = MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_ICC,MatGetFactor_seqaij_petsc);CHKERRQ(ierr);
515   PetscFunctionReturn(0);
516 }
517