xref: /petsc/src/mat/impls/aij/seq/seqviennacl/aijviennacl.cxx (revision 67c87b7f722561d11437735eb3bab5a6f62e8669)
1e4a0ef16SKarl Rupp 
2e4a0ef16SKarl Rupp 
3e4a0ef16SKarl Rupp /*
4e4a0ef16SKarl Rupp     Defines the basic matrix operations for the AIJ (compressed row)
5e4a0ef16SKarl Rupp   matrix storage format.
6e4a0ef16SKarl Rupp */
7e4a0ef16SKarl Rupp 
8e4a0ef16SKarl Rupp #include "petscconf.h"
9e4a0ef16SKarl Rupp #include "../src/mat/impls/aij/seq/aij.h"          /*I "petscmat.h" I*/
10e4a0ef16SKarl Rupp #include "petscbt.h"
11e4a0ef16SKarl Rupp #include "../src/vec/vec/impls/dvecimpl.h"
12e4a0ef16SKarl Rupp #include "petsc-private/vecimpl.h"
13e4a0ef16SKarl Rupp 
14e4a0ef16SKarl Rupp #include "../src/mat/impls/aij/seq/seqviennacl/viennaclmatimpl.h"
15e4a0ef16SKarl Rupp 
16e4a0ef16SKarl Rupp 
17e4a0ef16SKarl Rupp #include <algorithm>
18e4a0ef16SKarl Rupp #include <vector>
19e4a0ef16SKarl Rupp #include <string>
20e4a0ef16SKarl Rupp 
21e4a0ef16SKarl Rupp #include "viennacl/linalg/prod.hpp"
22e4a0ef16SKarl Rupp 
23e4a0ef16SKarl Rupp #undef __FUNCT__
24e4a0ef16SKarl Rupp #define __FUNCT__ "MatViennaCLCopyToGPU"
25e4a0ef16SKarl Rupp PetscErrorCode MatViennaCLCopyToGPU(Mat A)
26e4a0ef16SKarl Rupp {
27e4a0ef16SKarl Rupp 
28e4a0ef16SKarl Rupp   Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr;
29e4a0ef16SKarl Rupp   Mat_SeqAIJ         *a              = (Mat_SeqAIJ*)A->data;
30e4a0ef16SKarl Rupp   //PetscInt           m               = A->rmap->n,*ii,*ridx;
31e4a0ef16SKarl Rupp   PetscInt           *ii;
32e4a0ef16SKarl Rupp   PetscErrorCode     ierr;
33e4a0ef16SKarl Rupp 
34e4a0ef16SKarl Rupp 
35e4a0ef16SKarl Rupp   PetscFunctionBegin;
36*67c87b7fSKarl Rupp   if (A->rmap->n > 0 && A->cmap->n > 0) { //some OpenCL SDKs have issues with buffers of size 0
37e4a0ef16SKarl Rupp     if (A->valid_GPU_matrix == PETSC_VIENNACL_UNALLOCATED || A->valid_GPU_matrix == PETSC_VIENNACL_CPU) {
38e4a0ef16SKarl Rupp       ierr = PetscLogEventBegin(MAT_ViennaCLCopyToGPU,A,0,0,0);CHKERRQ(ierr);
39e4a0ef16SKarl Rupp 
40e4a0ef16SKarl Rupp       try {
41e4a0ef16SKarl Rupp         //viennaclstruct->nonzerorow=0;
42e4a0ef16SKarl Rupp         //for (PetscInt j = 0; j<m; j++) viennaclstruct->nonzerorow += (a->i[j+1] > a->i[j]);
43e4a0ef16SKarl Rupp 
44e4a0ef16SKarl Rupp         viennaclstruct->mat = new ViennaCLAIJMatrix();
45e4a0ef16SKarl Rupp         if (a->compressedrow.use) {
46e4a0ef16SKarl Rupp           //m    = a->compressedrow.nrows;
47e4a0ef16SKarl Rupp           ii   = a->compressedrow.i;
48e4a0ef16SKarl Rupp           //ridx = a->compressedrow.rindex;
49e4a0ef16SKarl Rupp 
50e4a0ef16SKarl Rupp           viennaclstruct->mat->set(ii, a->j, a->a, A->rmap->n, A->cmap->n, a->nz);
51e4a0ef16SKarl Rupp 
52e4a0ef16SKarl Rupp           // TODO: Either convert to full CSR (inefficient), or hold row indices in temporary vector (requires additional bookkeeping for matrix-vector multiplications)
53e4a0ef16SKarl Rupp 
54e4a0ef16SKarl Rupp           SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: Compressed CSR (only nonzero rows) not yet supported");
55e4a0ef16SKarl Rupp         } else {
56e4a0ef16SKarl Rupp 
57e4a0ef16SKarl Rupp           // Since PetscInt is in general different from cl_uint, we have to convert:
58e4a0ef16SKarl Rupp           viennacl::backend::mem_handle dummy;
59e4a0ef16SKarl Rupp 
60e4a0ef16SKarl Rupp           viennacl::backend::typesafe_host_array<unsigned int> row_buffer; row_buffer.raw_resize(dummy, A->rmap->n+1);
61e4a0ef16SKarl Rupp           for (PetscInt i=0; i<=A->rmap->n; ++i)
62e4a0ef16SKarl Rupp             row_buffer.set(i, (a->i)[i]);
63e4a0ef16SKarl Rupp 
64e4a0ef16SKarl Rupp           viennacl::backend::typesafe_host_array<unsigned int> col_buffer; col_buffer.raw_resize(dummy, a->nz);
65e4a0ef16SKarl Rupp           for (PetscInt i=0; i<a->nz; ++i)
66e4a0ef16SKarl Rupp             col_buffer.set(i, (a->j)[i]);
67e4a0ef16SKarl Rupp 
68e4a0ef16SKarl Rupp           viennaclstruct->mat->set(row_buffer.get(), col_buffer.get(), a->a, A->rmap->n, A->cmap->n, a->nz);
69e4a0ef16SKarl Rupp         }
70e4a0ef16SKarl Rupp       } catch(char *ex) {
71e4a0ef16SKarl Rupp         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex);
72e4a0ef16SKarl Rupp       }
73e4a0ef16SKarl Rupp 
74e4a0ef16SKarl Rupp       A->valid_GPU_matrix = PETSC_VIENNACL_BOTH;
75e4a0ef16SKarl Rupp 
76e4a0ef16SKarl Rupp       ierr = PetscLogEventEnd(MAT_ViennaCLCopyToGPU,A,0,0,0);CHKERRQ(ierr);
77e4a0ef16SKarl Rupp     }
78*67c87b7fSKarl Rupp   }
79e4a0ef16SKarl Rupp   PetscFunctionReturn(0);
80e4a0ef16SKarl Rupp }
81e4a0ef16SKarl Rupp 
82e4a0ef16SKarl Rupp #undef __FUNCT__
83e4a0ef16SKarl Rupp #define __FUNCT__ "MatViennaCLCopyFromGPU"
84e4a0ef16SKarl Rupp PetscErrorCode MatViennaCLCopyFromGPU(Mat A, ViennaCLAIJMatrix *Agpu)
85e4a0ef16SKarl Rupp {
86e4a0ef16SKarl Rupp   //Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr;
87e4a0ef16SKarl Rupp   Mat_SeqAIJ         *a              = (Mat_SeqAIJ*)A->data;
88e4a0ef16SKarl Rupp   PetscInt           m               = A->rmap->n;
89e4a0ef16SKarl Rupp   PetscErrorCode     ierr;
90e4a0ef16SKarl Rupp 
91e4a0ef16SKarl Rupp 
92e4a0ef16SKarl Rupp   PetscFunctionBegin;
93e4a0ef16SKarl Rupp   if (A->valid_GPU_matrix == PETSC_VIENNACL_UNALLOCATED) {
94e4a0ef16SKarl Rupp     try {
95e4a0ef16SKarl Rupp       if (a->compressedrow.use) {
96e4a0ef16SKarl Rupp         SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_ARG_WRONG, "ViennaCL: Cannot handle row compression for GPU matrices");
97e4a0ef16SKarl Rupp       } else {
98e4a0ef16SKarl Rupp 
99e4a0ef16SKarl Rupp         if ((PetscInt)Agpu->size1() != m) SETERRQ2(PETSC_COMM_WORLD, PETSC_ERR_ARG_SIZ, "GPU matrix has %d rows, should be %d", Agpu->size1(), m);
100e4a0ef16SKarl Rupp         a->nz           = Agpu->nnz();
101e4a0ef16SKarl Rupp         a->maxnz        = a->nz; /* Since we allocate exactly the right amount */
102e4a0ef16SKarl Rupp         A->preallocated = PETSC_TRUE;
103e4a0ef16SKarl Rupp         if (a->singlemalloc) {
104e4a0ef16SKarl Rupp           if (a->a) {ierr = PetscFree3(a->a,a->j,a->i);CHKERRQ(ierr);}
105e4a0ef16SKarl Rupp         } else {
106e4a0ef16SKarl Rupp           if (a->i) {ierr = PetscFree(a->i);CHKERRQ(ierr);}
107e4a0ef16SKarl Rupp           if (a->j) {ierr = PetscFree(a->j);CHKERRQ(ierr);}
108e4a0ef16SKarl Rupp           if (a->a) {ierr = PetscFree(a->a);CHKERRQ(ierr);}
109e4a0ef16SKarl Rupp         }
110e4a0ef16SKarl Rupp         ierr = PetscMalloc3(a->nz,PetscScalar,&a->a,a->nz,PetscInt,&a->j,m+1,PetscInt,&a->i);CHKERRQ(ierr);
111e4a0ef16SKarl Rupp         ierr = PetscLogObjectMemory(A, a->nz*(sizeof(PetscScalar)+sizeof(PetscInt))+(m+1)*sizeof(PetscInt));CHKERRQ(ierr);
112e4a0ef16SKarl Rupp 
113e4a0ef16SKarl Rupp         a->singlemalloc = PETSC_TRUE;
114e4a0ef16SKarl Rupp 
115e4a0ef16SKarl Rupp         /* Setup row lengths */
116e4a0ef16SKarl Rupp         if (a->imax) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
117e4a0ef16SKarl Rupp         ierr = PetscMalloc2(m,PetscInt,&a->imax,m,PetscInt,&a->ilen);CHKERRQ(ierr);
118e4a0ef16SKarl Rupp         ierr = PetscLogObjectMemory(A, 2*m*sizeof(PetscInt));CHKERRQ(ierr);
119e4a0ef16SKarl Rupp 
120e4a0ef16SKarl Rupp         /* Copy data back from GPU */
121e4a0ef16SKarl Rupp         viennacl::backend::typesafe_host_array<unsigned int> row_buffer; row_buffer.raw_resize(Agpu->handle1(), Agpu->size1() + 1);
122e4a0ef16SKarl Rupp 
123e4a0ef16SKarl Rupp         // copy row array
124e4a0ef16SKarl Rupp         viennacl::backend::memory_read(Agpu->handle1(), 0, row_buffer.raw_size(), row_buffer.get());
125e4a0ef16SKarl Rupp         (a->i)[0] = row_buffer[0];
126e4a0ef16SKarl Rupp         for (PetscInt i = 0; i < (PetscInt)Agpu->size1(); ++i) {
127e4a0ef16SKarl Rupp           (a->i)[i+1] = row_buffer[i+1];
128e4a0ef16SKarl Rupp           a->imax[i]  = a->ilen[i] = a->i[i+1] - a->i[i];  //Set imax[] and ilen[] arrays at the same time as i[] for better cache reuse
129e4a0ef16SKarl Rupp         }
130e4a0ef16SKarl Rupp 
131e4a0ef16SKarl Rupp         // copy column indices
132e4a0ef16SKarl Rupp         viennacl::backend::typesafe_host_array<unsigned int> col_buffer; col_buffer.raw_resize(Agpu->handle2(), Agpu->nnz());
133e4a0ef16SKarl Rupp         viennacl::backend::memory_read(Agpu->handle2(), 0, col_buffer.raw_size(), col_buffer.get());
134e4a0ef16SKarl Rupp         for (PetscInt i=0; i < (PetscInt)Agpu->nnz(); ++i)
135e4a0ef16SKarl Rupp           (a->j)[i] = col_buffer[i];
136e4a0ef16SKarl Rupp 
137e4a0ef16SKarl Rupp         // copy nonzero entries directly to destination (no conversion required)
138e4a0ef16SKarl Rupp         viennacl::backend::memory_read(Agpu->handle(), 0, sizeof(PetscScalar)*Agpu->nnz(), a->a);
139e4a0ef16SKarl Rupp 
140e4a0ef16SKarl Rupp         /* TODO: What about a->diag? */
141e4a0ef16SKarl Rupp       }
142e4a0ef16SKarl Rupp     } catch(char *ex) {
143e4a0ef16SKarl Rupp       SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex);
144e4a0ef16SKarl Rupp     }
145e4a0ef16SKarl Rupp 
146e4a0ef16SKarl Rupp     /* This assembly prevents resetting the flag to PETSC_VIENNACL_CPU and recopying */
147e4a0ef16SKarl Rupp     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
148e4a0ef16SKarl Rupp     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
149e4a0ef16SKarl Rupp 
150e4a0ef16SKarl Rupp     A->valid_GPU_matrix = PETSC_VIENNACL_BOTH;
151e4a0ef16SKarl Rupp   } else {
152e4a0ef16SKarl Rupp     SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_ARG_WRONG, "ViennaCL error: Only valid for unallocated GPU matrices");
153e4a0ef16SKarl Rupp   }
154e4a0ef16SKarl Rupp   PetscFunctionReturn(0);
155e4a0ef16SKarl Rupp }
156e4a0ef16SKarl Rupp 
157e4a0ef16SKarl Rupp #undef __FUNCT__
158e4a0ef16SKarl Rupp #define __FUNCT__ "MatGetVecs_SeqAIJViennaCL"
159e4a0ef16SKarl Rupp PetscErrorCode MatGetVecs_SeqAIJViennaCL(Mat mat, Vec *right, Vec *left)
160e4a0ef16SKarl Rupp {
161e4a0ef16SKarl Rupp   PetscErrorCode ierr;
162e4a0ef16SKarl Rupp 
163e4a0ef16SKarl Rupp   PetscFunctionBegin;
164e4a0ef16SKarl Rupp   if (right) {
165e4a0ef16SKarl Rupp     ierr = VecCreate(PetscObjectComm((PetscObject)mat),right);CHKERRQ(ierr);
166e4a0ef16SKarl Rupp     ierr = VecSetSizes(*right,mat->cmap->n,PETSC_DETERMINE);CHKERRQ(ierr);
167e4a0ef16SKarl Rupp     ierr = VecSetBlockSize(*right,mat->rmap->bs);CHKERRQ(ierr);
168e4a0ef16SKarl Rupp     ierr = VecSetType(*right,VECSEQVIENNACL);CHKERRQ(ierr);
169e4a0ef16SKarl Rupp     ierr = PetscLayoutReference(mat->cmap,&(*right)->map);CHKERRQ(ierr);
170e4a0ef16SKarl Rupp   }
171e4a0ef16SKarl Rupp   if (left) {
172e4a0ef16SKarl Rupp     ierr = VecCreate(PetscObjectComm((PetscObject)mat),left);CHKERRQ(ierr);
173e4a0ef16SKarl Rupp     ierr = VecSetSizes(*left,mat->rmap->n,PETSC_DETERMINE);CHKERRQ(ierr);
174e4a0ef16SKarl Rupp     ierr = VecSetBlockSize(*left,mat->rmap->bs);CHKERRQ(ierr);
175e4a0ef16SKarl Rupp     ierr = VecSetType(*left,VECSEQVIENNACL);CHKERRQ(ierr);
176e4a0ef16SKarl Rupp     ierr = PetscLayoutReference(mat->rmap,&(*left)->map);CHKERRQ(ierr);
177e4a0ef16SKarl Rupp   }
178e4a0ef16SKarl Rupp   PetscFunctionReturn(0);
179e4a0ef16SKarl Rupp }
180e4a0ef16SKarl Rupp 
181e4a0ef16SKarl Rupp #undef __FUNCT__
182e4a0ef16SKarl Rupp #define __FUNCT__ "MatMult_SeqAIJViennaCL"
183e4a0ef16SKarl Rupp PetscErrorCode MatMult_SeqAIJViennaCL(Mat A,Vec xx,Vec yy)
184e4a0ef16SKarl Rupp {
185e4a0ef16SKarl Rupp   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
186e4a0ef16SKarl Rupp   PetscErrorCode     ierr;
187e4a0ef16SKarl Rupp   Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr;
188e4a0ef16SKarl Rupp   ViennaCLVector     *xgpu=NULL,*ygpu=NULL;
189e4a0ef16SKarl Rupp 
190e4a0ef16SKarl Rupp   PetscFunctionBegin;
191*67c87b7fSKarl Rupp   if (A->rmap->n > 0 && A->cmap->n > 0) {
192e4a0ef16SKarl Rupp     ierr = VecViennaCLGetArrayRead(xx,&xgpu);CHKERRQ(ierr);
193e4a0ef16SKarl Rupp     ierr = VecViennaCLGetArrayWrite(yy,&ygpu);CHKERRQ(ierr);
194e4a0ef16SKarl Rupp     try {
195e4a0ef16SKarl Rupp       *ygpu = viennacl::linalg::prod(*viennaclstruct->mat,*xgpu);
196e4a0ef16SKarl Rupp     } catch (char *ex) {
197e4a0ef16SKarl Rupp       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex);
198e4a0ef16SKarl Rupp     }
199e4a0ef16SKarl Rupp     ierr = VecViennaCLRestoreArrayRead(xx,&xgpu);CHKERRQ(ierr);
200e4a0ef16SKarl Rupp     ierr = VecViennaCLRestoreArrayWrite(yy,&ygpu);CHKERRQ(ierr);
201e4a0ef16SKarl Rupp     ierr = PetscLogFlops(2.0*a->nz - viennaclstruct->mat->nnz());CHKERRQ(ierr);
202*67c87b7fSKarl Rupp   }
203e4a0ef16SKarl Rupp   PetscFunctionReturn(0);
204e4a0ef16SKarl Rupp }
205e4a0ef16SKarl Rupp 
206e4a0ef16SKarl Rupp 
207e4a0ef16SKarl Rupp 
208e4a0ef16SKarl Rupp #undef __FUNCT__
209e4a0ef16SKarl Rupp #define __FUNCT__ "MatMultAdd_SeqAIJViennaCL"
210e4a0ef16SKarl Rupp PetscErrorCode MatMultAdd_SeqAIJViennaCL(Mat A,Vec xx,Vec yy,Vec zz)
211e4a0ef16SKarl Rupp {
212e4a0ef16SKarl Rupp   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
213e4a0ef16SKarl Rupp   PetscErrorCode     ierr;
214e4a0ef16SKarl Rupp   Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr;
215e4a0ef16SKarl Rupp   ViennaCLVector     *xgpu=NULL,*ygpu=NULL,*zgpu=NULL;
216e4a0ef16SKarl Rupp 
217e4a0ef16SKarl Rupp   PetscFunctionBegin;
218*67c87b7fSKarl Rupp   if (A->rmap->n > 0 && A->cmap->n > 0) {
219e4a0ef16SKarl Rupp     try {
220e4a0ef16SKarl Rupp       ierr = VecViennaCLGetArrayRead(xx,&xgpu);CHKERRQ(ierr);
221e4a0ef16SKarl Rupp       ierr = VecViennaCLGetArrayRead(yy,&ygpu);CHKERRQ(ierr);
222e4a0ef16SKarl Rupp       ierr = VecViennaCLGetArrayWrite(zz,&zgpu);CHKERRQ(ierr);
223e4a0ef16SKarl Rupp 
224e4a0ef16SKarl Rupp       if (a->compressedrow.use) {
225e4a0ef16SKarl Rupp           SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: Compressed CSR (only nonzero rows) not yet supported");
226e4a0ef16SKarl Rupp       } else {
227e4a0ef16SKarl Rupp         if (zz == xx || zz == yy) { //temporary required
228e4a0ef16SKarl Rupp           ViennaCLVector temp = viennacl::linalg::prod(*viennaclstruct->mat, *xgpu);
229e4a0ef16SKarl Rupp           *zgpu = *ygpu + temp;
230e4a0ef16SKarl Rupp         } else {
231e4a0ef16SKarl Rupp           *zgpu = viennacl::linalg::prod(*viennaclstruct->mat, *xgpu);
232e4a0ef16SKarl Rupp           *zgpu += *ygpu;
233e4a0ef16SKarl Rupp         }
234e4a0ef16SKarl Rupp       }
235e4a0ef16SKarl Rupp 
236e4a0ef16SKarl Rupp       ierr = VecViennaCLRestoreArrayRead(xx,&xgpu);CHKERRQ(ierr);
237e4a0ef16SKarl Rupp       ierr = VecViennaCLRestoreArrayRead(yy,&ygpu);CHKERRQ(ierr);
238e4a0ef16SKarl Rupp       ierr = VecViennaCLRestoreArrayWrite(zz,&zgpu);CHKERRQ(ierr);
239e4a0ef16SKarl Rupp 
240e4a0ef16SKarl Rupp     } catch(char *ex) {
241e4a0ef16SKarl Rupp       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex);
242e4a0ef16SKarl Rupp     }
243e4a0ef16SKarl Rupp     ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
244*67c87b7fSKarl Rupp   }
245e4a0ef16SKarl Rupp   PetscFunctionReturn(0);
246e4a0ef16SKarl Rupp }
247e4a0ef16SKarl Rupp 
248e4a0ef16SKarl Rupp #undef __FUNCT__
249e4a0ef16SKarl Rupp #define __FUNCT__ "MatAssemblyEnd_SeqAIJViennaCL"
250e4a0ef16SKarl Rupp PetscErrorCode MatAssemblyEnd_SeqAIJViennaCL(Mat A,MatAssemblyType mode)
251e4a0ef16SKarl Rupp {
252e4a0ef16SKarl Rupp   PetscErrorCode ierr;
253e4a0ef16SKarl Rupp 
254e4a0ef16SKarl Rupp   PetscFunctionBegin;
255e4a0ef16SKarl Rupp   ierr = MatAssemblyEnd_SeqAIJ(A,mode);CHKERRQ(ierr);
256e4a0ef16SKarl Rupp   ierr = MatViennaCLCopyToGPU(A);CHKERRQ(ierr);
257e4a0ef16SKarl Rupp   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
258e4a0ef16SKarl Rupp   A->ops->mult    = MatMult_SeqAIJViennaCL;
259e4a0ef16SKarl Rupp   A->ops->multadd = MatMultAdd_SeqAIJViennaCL;
260e4a0ef16SKarl Rupp   PetscFunctionReturn(0);
261e4a0ef16SKarl Rupp }
262e4a0ef16SKarl Rupp 
263e4a0ef16SKarl Rupp /* --------------------------------------------------------------------------------*/
264e4a0ef16SKarl Rupp #undef __FUNCT__
265e4a0ef16SKarl Rupp #define __FUNCT__ "MatCreateSeqAIJViennaCL"
266e4a0ef16SKarl Rupp /*@
267e4a0ef16SKarl Rupp    MatCreateSeqAIJViennaCL - Creates a sparse matrix in AIJ (compressed row) format
268e4a0ef16SKarl Rupp    (the default parallel PETSc format).  This matrix will ultimately pushed down
269e4a0ef16SKarl Rupp    to GPUs and use the ViennaCL library for calculations. For good matrix
270e4a0ef16SKarl Rupp    assembly performance the user should preallocate the matrix storage by setting
271e4a0ef16SKarl Rupp    the parameter nz (or the array nnz).  By setting these parameters accurately,
272e4a0ef16SKarl Rupp    performance during matrix assembly can be increased substantially.
273e4a0ef16SKarl Rupp 
274e4a0ef16SKarl Rupp 
275e4a0ef16SKarl Rupp    Collective on MPI_Comm
276e4a0ef16SKarl Rupp 
277e4a0ef16SKarl Rupp    Input Parameters:
278e4a0ef16SKarl Rupp +  comm - MPI communicator, set to PETSC_COMM_SELF
279e4a0ef16SKarl Rupp .  m - number of rows
280e4a0ef16SKarl Rupp .  n - number of columns
281e4a0ef16SKarl Rupp .  nz - number of nonzeros per row (same for all rows)
282e4a0ef16SKarl Rupp -  nnz - array containing the number of nonzeros in the various rows
283e4a0ef16SKarl Rupp          (possibly different for each row) or NULL
284e4a0ef16SKarl Rupp 
285e4a0ef16SKarl Rupp    Output Parameter:
286e4a0ef16SKarl Rupp .  A - the matrix
287e4a0ef16SKarl Rupp 
288e4a0ef16SKarl Rupp    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
289e4a0ef16SKarl Rupp    MatXXXXSetPreallocation() paradigm instead of this routine directly.
290e4a0ef16SKarl Rupp    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
291e4a0ef16SKarl Rupp 
292e4a0ef16SKarl Rupp    Notes:
293e4a0ef16SKarl Rupp    If nnz is given then nz is ignored
294e4a0ef16SKarl Rupp 
295e4a0ef16SKarl Rupp    The AIJ format (also called the Yale sparse matrix format or
296e4a0ef16SKarl Rupp    compressed row storage), is fully compatible with standard Fortran 77
297e4a0ef16SKarl Rupp    storage.  That is, the stored row and column indices can begin at
298e4a0ef16SKarl Rupp    either one (as in Fortran) or zero.  See the users' manual for details.
299e4a0ef16SKarl Rupp 
300e4a0ef16SKarl Rupp    Specify the preallocated storage with either nz or nnz (not both).
301e4a0ef16SKarl Rupp    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
302e4a0ef16SKarl Rupp    allocation.  For large problems you MUST preallocate memory or you
303e4a0ef16SKarl Rupp    will get TERRIBLE performance, see the users' manual chapter on matrices.
304e4a0ef16SKarl Rupp 
305e4a0ef16SKarl Rupp    By default, this format uses inodes (identical nodes) when possible, to
306e4a0ef16SKarl Rupp    improve numerical efficiency of matrix-vector products and solves. We
307e4a0ef16SKarl Rupp    search for consecutive rows with the same nonzero structure, thereby
308e4a0ef16SKarl Rupp    reusing matrix information to achieve increased efficiency.
309e4a0ef16SKarl Rupp 
310e4a0ef16SKarl Rupp    Level: intermediate
311e4a0ef16SKarl Rupp 
312e4a0ef16SKarl Rupp .seealso: MatCreate(), MatCreateAIJ(), MatCreateAIJCUSP(), MatSetValues(), MatSeqAIJSetColumnIndices(), MatCreateSeqAIJWithArrays(), MatCreateAIJ()
313e4a0ef16SKarl Rupp 
314e4a0ef16SKarl Rupp @*/
315e4a0ef16SKarl Rupp PetscErrorCode  MatCreateSeqAIJViennaCL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
316e4a0ef16SKarl Rupp {
317e4a0ef16SKarl Rupp   PetscErrorCode ierr;
318e4a0ef16SKarl Rupp 
319e4a0ef16SKarl Rupp   PetscFunctionBegin;
320e4a0ef16SKarl Rupp   ierr = MatCreate(comm,A);CHKERRQ(ierr);
321e4a0ef16SKarl Rupp   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
322e4a0ef16SKarl Rupp   ierr = MatSetType(*A,MATSEQAIJVIENNACL);CHKERRQ(ierr);
323e4a0ef16SKarl Rupp   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,(PetscInt*)nnz);CHKERRQ(ierr);
324e4a0ef16SKarl Rupp   PetscFunctionReturn(0);
325e4a0ef16SKarl Rupp }
326e4a0ef16SKarl Rupp 
327e4a0ef16SKarl Rupp 
328e4a0ef16SKarl Rupp #undef __FUNCT__
329e4a0ef16SKarl Rupp #define __FUNCT__ "MatDestroy_SeqAIJViennaCL"
330e4a0ef16SKarl Rupp PetscErrorCode MatDestroy_SeqAIJViennaCL(Mat A)
331e4a0ef16SKarl Rupp {
332e4a0ef16SKarl Rupp   PetscErrorCode ierr;
333e4a0ef16SKarl Rupp   Mat_SeqAIJViennaCL *viennaclcontainer = (Mat_SeqAIJViennaCL*)A->spptr;
334e4a0ef16SKarl Rupp 
335e4a0ef16SKarl Rupp   PetscFunctionBegin;
336e4a0ef16SKarl Rupp   try {
337e4a0ef16SKarl Rupp     if (A->valid_GPU_matrix != PETSC_VIENNACL_UNALLOCATED) delete (ViennaCLAIJMatrix*)(viennaclcontainer->mat);
338e4a0ef16SKarl Rupp     delete viennaclcontainer;
339e4a0ef16SKarl Rupp     A->valid_GPU_matrix = PETSC_VIENNACL_UNALLOCATED;
340e4a0ef16SKarl Rupp   } catch(char *ex) {
341e4a0ef16SKarl Rupp     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex);
342e4a0ef16SKarl Rupp   }
343e4a0ef16SKarl Rupp   /*this next line is because MatDestroy tries to PetscFree spptr if it is not zero, and PetscFree only works if the memory was allocated with PetscNew or PetscMalloc, which don't call the constructor */
344e4a0ef16SKarl Rupp   A->spptr = 0;
345e4a0ef16SKarl Rupp   ierr     = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
346e4a0ef16SKarl Rupp   PetscFunctionReturn(0);
347e4a0ef16SKarl Rupp }
348e4a0ef16SKarl Rupp 
349e4a0ef16SKarl Rupp 
350e4a0ef16SKarl Rupp #undef __FUNCT__
351e4a0ef16SKarl Rupp #define __FUNCT__ "MatCreate_SeqAIJViennaCL"
352e4a0ef16SKarl Rupp PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJViennaCL(Mat B)
353e4a0ef16SKarl Rupp {
354e4a0ef16SKarl Rupp   PetscErrorCode ierr;
355e4a0ef16SKarl Rupp   Mat_SeqAIJ     *aij;
356e4a0ef16SKarl Rupp 
357e4a0ef16SKarl Rupp   PetscFunctionBegin;
358e4a0ef16SKarl Rupp   ierr            = MatCreate_SeqAIJ(B);CHKERRQ(ierr);
359e4a0ef16SKarl Rupp   aij             = (Mat_SeqAIJ*)B->data;
360e4a0ef16SKarl Rupp   aij->inode.use  = PETSC_FALSE;
361e4a0ef16SKarl Rupp   B->ops->mult    = MatMult_SeqAIJViennaCL;
362e4a0ef16SKarl Rupp   B->ops->multadd = MatMultAdd_SeqAIJViennaCL;
363e4a0ef16SKarl Rupp   B->spptr        = new Mat_SeqAIJViennaCL();
364e4a0ef16SKarl Rupp 
365e4a0ef16SKarl Rupp   ((Mat_SeqAIJViennaCL*)B->spptr)->mat     = 0;
366e4a0ef16SKarl Rupp 
367e4a0ef16SKarl Rupp   B->ops->assemblyend    = MatAssemblyEnd_SeqAIJViennaCL;
368e4a0ef16SKarl Rupp   B->ops->destroy        = MatDestroy_SeqAIJViennaCL;
369e4a0ef16SKarl Rupp   B->ops->getvecs        = MatGetVecs_SeqAIJViennaCL;
370e4a0ef16SKarl Rupp 
371e4a0ef16SKarl Rupp   //ierr = PetscObjectComposeFunction((PetscObject)B,"MatViennaCLSetFormat_C", "MatViennaCLSetFormat_SeqAIJViennaCL", MatViennaCLSetFormat_SeqAIJViennaCL);CHKERRQ(ierr);
372e4a0ef16SKarl Rupp   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJVIENNACL);CHKERRQ(ierr);
373e4a0ef16SKarl Rupp 
374e4a0ef16SKarl Rupp   B->valid_GPU_matrix = PETSC_VIENNACL_UNALLOCATED;
375e4a0ef16SKarl Rupp   PetscFunctionReturn(0);
376e4a0ef16SKarl Rupp }
377e4a0ef16SKarl Rupp 
378e4a0ef16SKarl Rupp 
379e4a0ef16SKarl Rupp /*M
380e4a0ef16SKarl Rupp    MATSEQAIJVIENNACL - MATAIJVIENNACL = "aijviennacl" = "seqaijviennacl" - A matrix type to be used for sparse matrices.
381e4a0ef16SKarl Rupp 
382e4a0ef16SKarl Rupp    A matrix type type whose data resides on GPUs. These matrices are in CSR format by
383e4a0ef16SKarl Rupp    default. All matrix calculations are performed using the ViennaCL library.
384e4a0ef16SKarl Rupp 
385e4a0ef16SKarl Rupp    Options Database Keys:
386e4a0ef16SKarl Rupp +  -mat_type aijviennacl - sets the matrix type to "seqaijviennacl" during a call to MatSetFromOptions()
387e4a0ef16SKarl Rupp .  -mat_viennacl_storage_format csr - sets the storage format of matrices for MatMult during a call to MatSetFromOptions().
388e4a0ef16SKarl Rupp -  -mat_viennacl_mult_storage_format csr - sets the storage format of matrices for MatMult during a call to MatSetFromOptions().
389e4a0ef16SKarl Rupp 
390e4a0ef16SKarl Rupp   Level: beginner
391e4a0ef16SKarl Rupp 
392e4a0ef16SKarl Rupp .seealso: MatCreateSeqAIJViennaCL(), MATAIJVIENNACL, MatCreateAIJViennaCL()
393e4a0ef16SKarl Rupp M*/
394e4a0ef16SKarl Rupp 
395