xref: /petsc/src/mat/impls/aij/seq/seqviennacl/aijviennacl.cxx (revision f7daeb2ae28109095165dd56e759bfa0c55f9e9d)
1e4a0ef16SKarl Rupp 
2e4a0ef16SKarl Rupp 
3e4a0ef16SKarl Rupp /*
4e4a0ef16SKarl Rupp     Defines the basic matrix operations for the AIJ (compressed row)
5e4a0ef16SKarl Rupp   matrix storage format.
6e4a0ef16SKarl Rupp */
7e4a0ef16SKarl Rupp 
8e4a0ef16SKarl Rupp #include "petscconf.h"
9e4a0ef16SKarl Rupp #include "../src/mat/impls/aij/seq/aij.h"          /*I "petscmat.h" I*/
10e4a0ef16SKarl Rupp #include "petscbt.h"
11e4a0ef16SKarl Rupp #include "../src/vec/vec/impls/dvecimpl.h"
12e4a0ef16SKarl Rupp #include "petsc-private/vecimpl.h"
13e4a0ef16SKarl Rupp 
14e4a0ef16SKarl Rupp #include "../src/mat/impls/aij/seq/seqviennacl/viennaclmatimpl.h"
15e4a0ef16SKarl Rupp 
16e4a0ef16SKarl Rupp 
17e4a0ef16SKarl Rupp #include <algorithm>
18e4a0ef16SKarl Rupp #include <vector>
19e4a0ef16SKarl Rupp #include <string>
20e4a0ef16SKarl Rupp 
21e4a0ef16SKarl Rupp #include "viennacl/linalg/prod.hpp"
22e4a0ef16SKarl Rupp 
23e4a0ef16SKarl Rupp #undef __FUNCT__
24e4a0ef16SKarl Rupp #define __FUNCT__ "MatViennaCLCopyToGPU"
25e4a0ef16SKarl Rupp PetscErrorCode MatViennaCLCopyToGPU(Mat A)
26e4a0ef16SKarl Rupp {
27e4a0ef16SKarl Rupp 
28e4a0ef16SKarl Rupp   Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr;
29e4a0ef16SKarl Rupp   Mat_SeqAIJ         *a              = (Mat_SeqAIJ*)A->data;
30e4a0ef16SKarl Rupp   PetscInt           *ii;
31e4a0ef16SKarl Rupp   PetscErrorCode     ierr;
32e4a0ef16SKarl Rupp 
33e4a0ef16SKarl Rupp 
34e4a0ef16SKarl Rupp   PetscFunctionBegin;
3567c87b7fSKarl Rupp   if (A->rmap->n > 0 && A->cmap->n > 0) { //some OpenCL SDKs have issues with buffers of size 0
36e4a0ef16SKarl Rupp     if (A->valid_GPU_matrix == PETSC_VIENNACL_UNALLOCATED || A->valid_GPU_matrix == PETSC_VIENNACL_CPU) {
37e4a0ef16SKarl Rupp       ierr = PetscLogEventBegin(MAT_ViennaCLCopyToGPU,A,0,0,0);CHKERRQ(ierr);
38e4a0ef16SKarl Rupp 
39e4a0ef16SKarl Rupp       try {
40e4a0ef16SKarl Rupp         viennaclstruct->mat = new ViennaCLAIJMatrix();
41e4a0ef16SKarl Rupp         if (a->compressedrow.use) {
42e4a0ef16SKarl Rupp           ii = a->compressedrow.i;
43e4a0ef16SKarl Rupp 
44e4a0ef16SKarl Rupp           viennaclstruct->mat->set(ii, a->j, a->a, A->rmap->n, A->cmap->n, a->nz);
45e4a0ef16SKarl Rupp 
46e4a0ef16SKarl Rupp           // TODO: Either convert to full CSR (inefficient), or hold row indices in temporary vector (requires additional bookkeeping for matrix-vector multiplications)
47023073b3SKarl Rupp           //       Cannot be reasonably supported in ViennaCL 1.4.x (custom kernels required), hence postponing until there is support for compressed CSR
48e4a0ef16SKarl Rupp 
49e4a0ef16SKarl Rupp           SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: Compressed CSR (only nonzero rows) not yet supported");
50e4a0ef16SKarl Rupp         } else {
51e4a0ef16SKarl Rupp 
52e4a0ef16SKarl Rupp           // Since PetscInt is in general different from cl_uint, we have to convert:
53e4a0ef16SKarl Rupp           viennacl::backend::mem_handle dummy;
54e4a0ef16SKarl Rupp 
55e4a0ef16SKarl Rupp           viennacl::backend::typesafe_host_array<unsigned int> row_buffer; row_buffer.raw_resize(dummy, A->rmap->n+1);
56e4a0ef16SKarl Rupp           for (PetscInt i=0; i<=A->rmap->n; ++i)
57e4a0ef16SKarl Rupp             row_buffer.set(i, (a->i)[i]);
58e4a0ef16SKarl Rupp 
59e4a0ef16SKarl Rupp           viennacl::backend::typesafe_host_array<unsigned int> col_buffer; col_buffer.raw_resize(dummy, a->nz);
60e4a0ef16SKarl Rupp           for (PetscInt i=0; i<a->nz; ++i)
61e4a0ef16SKarl Rupp             col_buffer.set(i, (a->j)[i]);
62e4a0ef16SKarl Rupp 
63e4a0ef16SKarl Rupp           viennaclstruct->mat->set(row_buffer.get(), col_buffer.get(), a->a, A->rmap->n, A->cmap->n, a->nz);
64e4a0ef16SKarl Rupp         }
654cf1874eSKarl Rupp         ViennaCLWaitForGPU();
664076e183SKarl Rupp       } catch(std::exception const & ex) {
674076e183SKarl Rupp         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
68e4a0ef16SKarl Rupp       }
69e4a0ef16SKarl Rupp 
70e4a0ef16SKarl Rupp       A->valid_GPU_matrix = PETSC_VIENNACL_BOTH;
71e4a0ef16SKarl Rupp 
72e4a0ef16SKarl Rupp       ierr = PetscLogEventEnd(MAT_ViennaCLCopyToGPU,A,0,0,0);CHKERRQ(ierr);
73e4a0ef16SKarl Rupp     }
7467c87b7fSKarl Rupp   }
75e4a0ef16SKarl Rupp   PetscFunctionReturn(0);
76e4a0ef16SKarl Rupp }
77e4a0ef16SKarl Rupp 
78e4a0ef16SKarl Rupp #undef __FUNCT__
79e4a0ef16SKarl Rupp #define __FUNCT__ "MatViennaCLCopyFromGPU"
800d73d530SKarl Rupp PetscErrorCode MatViennaCLCopyFromGPU(Mat A, const ViennaCLAIJMatrix *Agpu)
81e4a0ef16SKarl Rupp {
82e4a0ef16SKarl Rupp   Mat_SeqAIJ         *a              = (Mat_SeqAIJ*)A->data;
83e4a0ef16SKarl Rupp   PetscInt           m               = A->rmap->n;
84e4a0ef16SKarl Rupp   PetscErrorCode     ierr;
85e4a0ef16SKarl Rupp 
86e4a0ef16SKarl Rupp 
87e4a0ef16SKarl Rupp   PetscFunctionBegin;
88e4a0ef16SKarl Rupp   if (A->valid_GPU_matrix == PETSC_VIENNACL_UNALLOCATED) {
89e4a0ef16SKarl Rupp     try {
90e4a0ef16SKarl Rupp       if (a->compressedrow.use) {
91e4a0ef16SKarl Rupp         SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_ARG_WRONG, "ViennaCL: Cannot handle row compression for GPU matrices");
92e4a0ef16SKarl Rupp       } else {
93e4a0ef16SKarl Rupp 
94e4a0ef16SKarl Rupp         if ((PetscInt)Agpu->size1() != m) SETERRQ2(PETSC_COMM_WORLD, PETSC_ERR_ARG_SIZ, "GPU matrix has %d rows, should be %d", Agpu->size1(), m);
95e4a0ef16SKarl Rupp         a->nz           = Agpu->nnz();
96e4a0ef16SKarl Rupp         a->maxnz        = a->nz; /* Since we allocate exactly the right amount */
97e4a0ef16SKarl Rupp         A->preallocated = PETSC_TRUE;
98e4a0ef16SKarl Rupp         if (a->singlemalloc) {
99e4a0ef16SKarl Rupp           if (a->a) {ierr = PetscFree3(a->a,a->j,a->i);CHKERRQ(ierr);}
100e4a0ef16SKarl Rupp         } else {
101e4a0ef16SKarl Rupp           if (a->i) {ierr = PetscFree(a->i);CHKERRQ(ierr);}
102e4a0ef16SKarl Rupp           if (a->j) {ierr = PetscFree(a->j);CHKERRQ(ierr);}
103e4a0ef16SKarl Rupp           if (a->a) {ierr = PetscFree(a->a);CHKERRQ(ierr);}
104e4a0ef16SKarl Rupp         }
105e4a0ef16SKarl Rupp         ierr = PetscMalloc3(a->nz,PetscScalar,&a->a,a->nz,PetscInt,&a->j,m+1,PetscInt,&a->i);CHKERRQ(ierr);
106*f7daeb2aSKarl Rupp         ierr = PetscLogObjectMemory((PetscObject)A, a->nz*(sizeof(PetscScalar)+sizeof(PetscInt))+(m+1)*sizeof(PetscInt));CHKERRQ(ierr);
107e4a0ef16SKarl Rupp 
108e4a0ef16SKarl Rupp         a->singlemalloc = PETSC_TRUE;
109e4a0ef16SKarl Rupp 
110e4a0ef16SKarl Rupp         /* Setup row lengths */
111e4a0ef16SKarl Rupp         if (a->imax) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
112e4a0ef16SKarl Rupp         ierr = PetscMalloc2(m,PetscInt,&a->imax,m,PetscInt,&a->ilen);CHKERRQ(ierr);
113*f7daeb2aSKarl Rupp         ierr = PetscLogObjectMemory((PetscObject)A, 2*m*sizeof(PetscInt));CHKERRQ(ierr);
114e4a0ef16SKarl Rupp 
115e4a0ef16SKarl Rupp         /* Copy data back from GPU */
116e4a0ef16SKarl Rupp         viennacl::backend::typesafe_host_array<unsigned int> row_buffer; row_buffer.raw_resize(Agpu->handle1(), Agpu->size1() + 1);
117e4a0ef16SKarl Rupp 
118e4a0ef16SKarl Rupp         // copy row array
119e4a0ef16SKarl Rupp         viennacl::backend::memory_read(Agpu->handle1(), 0, row_buffer.raw_size(), row_buffer.get());
120e4a0ef16SKarl Rupp         (a->i)[0] = row_buffer[0];
121e4a0ef16SKarl Rupp         for (PetscInt i = 0; i < (PetscInt)Agpu->size1(); ++i) {
122e4a0ef16SKarl Rupp           (a->i)[i+1] = row_buffer[i+1];
123e4a0ef16SKarl Rupp           a->imax[i]  = a->ilen[i] = a->i[i+1] - a->i[i];  //Set imax[] and ilen[] arrays at the same time as i[] for better cache reuse
124e4a0ef16SKarl Rupp         }
125e4a0ef16SKarl Rupp 
126e4a0ef16SKarl Rupp         // copy column indices
127e4a0ef16SKarl Rupp         viennacl::backend::typesafe_host_array<unsigned int> col_buffer; col_buffer.raw_resize(Agpu->handle2(), Agpu->nnz());
128e4a0ef16SKarl Rupp         viennacl::backend::memory_read(Agpu->handle2(), 0, col_buffer.raw_size(), col_buffer.get());
129e4a0ef16SKarl Rupp         for (PetscInt i=0; i < (PetscInt)Agpu->nnz(); ++i)
130e4a0ef16SKarl Rupp           (a->j)[i] = col_buffer[i];
131e4a0ef16SKarl Rupp 
132e4a0ef16SKarl Rupp         // copy nonzero entries directly to destination (no conversion required)
133e4a0ef16SKarl Rupp         viennacl::backend::memory_read(Agpu->handle(), 0, sizeof(PetscScalar)*Agpu->nnz(), a->a);
134e4a0ef16SKarl Rupp 
1354cf1874eSKarl Rupp         ViennaCLWaitForGPU();
136023073b3SKarl Rupp         /* TODO: Once a->diag is moved out of MatAssemblyEnd(), invalidate it here. */
137e4a0ef16SKarl Rupp       }
1384076e183SKarl Rupp     } catch(std::exception const & ex) {
1394076e183SKarl Rupp       SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
140e4a0ef16SKarl Rupp     }
141e4a0ef16SKarl Rupp 
142e4a0ef16SKarl Rupp     /* This assembly prevents resetting the flag to PETSC_VIENNACL_CPU and recopying */
143e4a0ef16SKarl Rupp     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
144e4a0ef16SKarl Rupp     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
145e4a0ef16SKarl Rupp 
146e4a0ef16SKarl Rupp     A->valid_GPU_matrix = PETSC_VIENNACL_BOTH;
147e4a0ef16SKarl Rupp   } else {
148e4a0ef16SKarl Rupp     SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_ARG_WRONG, "ViennaCL error: Only valid for unallocated GPU matrices");
149e4a0ef16SKarl Rupp   }
150e4a0ef16SKarl Rupp   PetscFunctionReturn(0);
151e4a0ef16SKarl Rupp }
152e4a0ef16SKarl Rupp 
153e4a0ef16SKarl Rupp #undef __FUNCT__
154e4a0ef16SKarl Rupp #define __FUNCT__ "MatGetVecs_SeqAIJViennaCL"
155e4a0ef16SKarl Rupp PetscErrorCode MatGetVecs_SeqAIJViennaCL(Mat mat, Vec *right, Vec *left)
156e4a0ef16SKarl Rupp {
157e4a0ef16SKarl Rupp   PetscErrorCode ierr;
158e4a0ef16SKarl Rupp 
159e4a0ef16SKarl Rupp   PetscFunctionBegin;
160e4a0ef16SKarl Rupp   if (right) {
161e4a0ef16SKarl Rupp     ierr = VecCreate(PetscObjectComm((PetscObject)mat),right);CHKERRQ(ierr);
162e4a0ef16SKarl Rupp     ierr = VecSetSizes(*right,mat->cmap->n,PETSC_DETERMINE);CHKERRQ(ierr);
163e4a0ef16SKarl Rupp     ierr = VecSetBlockSize(*right,mat->rmap->bs);CHKERRQ(ierr);
164e4a0ef16SKarl Rupp     ierr = VecSetType(*right,VECSEQVIENNACL);CHKERRQ(ierr);
165e4a0ef16SKarl Rupp     ierr = PetscLayoutReference(mat->cmap,&(*right)->map);CHKERRQ(ierr);
166e4a0ef16SKarl Rupp   }
167e4a0ef16SKarl Rupp   if (left) {
168e4a0ef16SKarl Rupp     ierr = VecCreate(PetscObjectComm((PetscObject)mat),left);CHKERRQ(ierr);
169e4a0ef16SKarl Rupp     ierr = VecSetSizes(*left,mat->rmap->n,PETSC_DETERMINE);CHKERRQ(ierr);
170e4a0ef16SKarl Rupp     ierr = VecSetBlockSize(*left,mat->rmap->bs);CHKERRQ(ierr);
171e4a0ef16SKarl Rupp     ierr = VecSetType(*left,VECSEQVIENNACL);CHKERRQ(ierr);
172e4a0ef16SKarl Rupp     ierr = PetscLayoutReference(mat->rmap,&(*left)->map);CHKERRQ(ierr);
173e4a0ef16SKarl Rupp   }
174e4a0ef16SKarl Rupp   PetscFunctionReturn(0);
175e4a0ef16SKarl Rupp }
176e4a0ef16SKarl Rupp 
177e4a0ef16SKarl Rupp #undef __FUNCT__
178e4a0ef16SKarl Rupp #define __FUNCT__ "MatMult_SeqAIJViennaCL"
179e4a0ef16SKarl Rupp PetscErrorCode MatMult_SeqAIJViennaCL(Mat A,Vec xx,Vec yy)
180e4a0ef16SKarl Rupp {
181e4a0ef16SKarl Rupp   Mat_SeqAIJ           *a = (Mat_SeqAIJ*)A->data;
182e4a0ef16SKarl Rupp   PetscErrorCode       ierr;
183e4a0ef16SKarl Rupp   Mat_SeqAIJViennaCL   *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr;
1840d73d530SKarl Rupp   const ViennaCLVector *xgpu=NULL;
1850d73d530SKarl Rupp   ViennaCLVector       *ygpu=NULL;
186e4a0ef16SKarl Rupp 
187e4a0ef16SKarl Rupp   PetscFunctionBegin;
18867c87b7fSKarl Rupp   if (A->rmap->n > 0 && A->cmap->n > 0) {
189e4a0ef16SKarl Rupp     ierr = VecViennaCLGetArrayRead(xx,&xgpu);CHKERRQ(ierr);
190e4a0ef16SKarl Rupp     ierr = VecViennaCLGetArrayWrite(yy,&ygpu);CHKERRQ(ierr);
191e4a0ef16SKarl Rupp     try {
192e4a0ef16SKarl Rupp       *ygpu = viennacl::linalg::prod(*viennaclstruct->mat,*xgpu);
1934cf1874eSKarl Rupp       ViennaCLWaitForGPU();
1944076e183SKarl Rupp     } catch (std::exception const & ex) {
1954076e183SKarl Rupp       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
196e4a0ef16SKarl Rupp     }
197e4a0ef16SKarl Rupp     ierr = VecViennaCLRestoreArrayRead(xx,&xgpu);CHKERRQ(ierr);
198e4a0ef16SKarl Rupp     ierr = VecViennaCLRestoreArrayWrite(yy,&ygpu);CHKERRQ(ierr);
199e4a0ef16SKarl Rupp     ierr = PetscLogFlops(2.0*a->nz - viennaclstruct->mat->nnz());CHKERRQ(ierr);
20067c87b7fSKarl Rupp   }
201e4a0ef16SKarl Rupp   PetscFunctionReturn(0);
202e4a0ef16SKarl Rupp }
203e4a0ef16SKarl Rupp 
204e4a0ef16SKarl Rupp 
205e4a0ef16SKarl Rupp 
206e4a0ef16SKarl Rupp #undef __FUNCT__
207e4a0ef16SKarl Rupp #define __FUNCT__ "MatMultAdd_SeqAIJViennaCL"
208e4a0ef16SKarl Rupp PetscErrorCode MatMultAdd_SeqAIJViennaCL(Mat A,Vec xx,Vec yy,Vec zz)
209e4a0ef16SKarl Rupp {
210e4a0ef16SKarl Rupp   Mat_SeqAIJ           *a = (Mat_SeqAIJ*)A->data;
211e4a0ef16SKarl Rupp   PetscErrorCode       ierr;
212e4a0ef16SKarl Rupp   Mat_SeqAIJViennaCL   *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr;
2130d73d530SKarl Rupp   const ViennaCLVector *xgpu=NULL,*ygpu=NULL;
2140d73d530SKarl Rupp   ViennaCLVector       *zgpu=NULL;
215e4a0ef16SKarl Rupp 
216e4a0ef16SKarl Rupp   PetscFunctionBegin;
21767c87b7fSKarl Rupp   if (A->rmap->n > 0 && A->cmap->n > 0) {
218e4a0ef16SKarl Rupp     try {
219e4a0ef16SKarl Rupp       ierr = VecViennaCLGetArrayRead(xx,&xgpu);CHKERRQ(ierr);
220e4a0ef16SKarl Rupp       ierr = VecViennaCLGetArrayRead(yy,&ygpu);CHKERRQ(ierr);
221e4a0ef16SKarl Rupp       ierr = VecViennaCLGetArrayWrite(zz,&zgpu);CHKERRQ(ierr);
222e4a0ef16SKarl Rupp 
223e4a0ef16SKarl Rupp       if (a->compressedrow.use) {
224e4a0ef16SKarl Rupp           SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: Compressed CSR (only nonzero rows) not yet supported");
225e4a0ef16SKarl Rupp       } else {
226e4a0ef16SKarl Rupp         if (zz == xx || zz == yy) { //temporary required
227e4a0ef16SKarl Rupp           ViennaCLVector temp = viennacl::linalg::prod(*viennaclstruct->mat, *xgpu);
228e4a0ef16SKarl Rupp           *zgpu = *ygpu + temp;
2294cf1874eSKarl Rupp           ViennaCLWaitForGPU();
230e4a0ef16SKarl Rupp         } else {
231e4a0ef16SKarl Rupp           *zgpu = viennacl::linalg::prod(*viennaclstruct->mat, *xgpu);
232e4a0ef16SKarl Rupp           *zgpu += *ygpu;
2334cf1874eSKarl Rupp           ViennaCLWaitForGPU();
234e4a0ef16SKarl Rupp         }
235e4a0ef16SKarl Rupp       }
236e4a0ef16SKarl Rupp 
237e4a0ef16SKarl Rupp       ierr = VecViennaCLRestoreArrayRead(xx,&xgpu);CHKERRQ(ierr);
238e4a0ef16SKarl Rupp       ierr = VecViennaCLRestoreArrayRead(yy,&ygpu);CHKERRQ(ierr);
239e4a0ef16SKarl Rupp       ierr = VecViennaCLRestoreArrayWrite(zz,&zgpu);CHKERRQ(ierr);
240e4a0ef16SKarl Rupp 
2414076e183SKarl Rupp     } catch(std::exception const & ex) {
2424076e183SKarl Rupp       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
243e4a0ef16SKarl Rupp     }
244e4a0ef16SKarl Rupp     ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
24567c87b7fSKarl Rupp   }
246e4a0ef16SKarl Rupp   PetscFunctionReturn(0);
247e4a0ef16SKarl Rupp }
248e4a0ef16SKarl Rupp 
249e4a0ef16SKarl Rupp #undef __FUNCT__
250e4a0ef16SKarl Rupp #define __FUNCT__ "MatAssemblyEnd_SeqAIJViennaCL"
251e4a0ef16SKarl Rupp PetscErrorCode MatAssemblyEnd_SeqAIJViennaCL(Mat A,MatAssemblyType mode)
252e4a0ef16SKarl Rupp {
253e4a0ef16SKarl Rupp   PetscErrorCode ierr;
254e4a0ef16SKarl Rupp 
255e4a0ef16SKarl Rupp   PetscFunctionBegin;
256e4a0ef16SKarl Rupp   ierr = MatAssemblyEnd_SeqAIJ(A,mode);CHKERRQ(ierr);
257e4a0ef16SKarl Rupp   ierr = MatViennaCLCopyToGPU(A);CHKERRQ(ierr);
258e4a0ef16SKarl Rupp   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
259e4a0ef16SKarl Rupp   A->ops->mult    = MatMult_SeqAIJViennaCL;
260e4a0ef16SKarl Rupp   A->ops->multadd = MatMultAdd_SeqAIJViennaCL;
261e4a0ef16SKarl Rupp   PetscFunctionReturn(0);
262e4a0ef16SKarl Rupp }
263e4a0ef16SKarl Rupp 
264e4a0ef16SKarl Rupp /* --------------------------------------------------------------------------------*/
265e4a0ef16SKarl Rupp #undef __FUNCT__
266e4a0ef16SKarl Rupp #define __FUNCT__ "MatCreateSeqAIJViennaCL"
267e4a0ef16SKarl Rupp /*@
268e4a0ef16SKarl Rupp    MatCreateSeqAIJViennaCL - Creates a sparse matrix in AIJ (compressed row) format
26919fddfadSKarl Rupp    (the default parallel PETSc format).  This matrix will ultimately be pushed down
270e4a0ef16SKarl Rupp    to GPUs and use the ViennaCL library for calculations. For good matrix
271e4a0ef16SKarl Rupp    assembly performance the user should preallocate the matrix storage by setting
272e4a0ef16SKarl Rupp    the parameter nz (or the array nnz).  By setting these parameters accurately,
273e4a0ef16SKarl Rupp    performance during matrix assembly can be increased substantially.
274e4a0ef16SKarl Rupp 
275e4a0ef16SKarl Rupp 
276e4a0ef16SKarl Rupp    Collective on MPI_Comm
277e4a0ef16SKarl Rupp 
278e4a0ef16SKarl Rupp    Input Parameters:
279e4a0ef16SKarl Rupp +  comm - MPI communicator, set to PETSC_COMM_SELF
280e4a0ef16SKarl Rupp .  m - number of rows
281e4a0ef16SKarl Rupp .  n - number of columns
282e4a0ef16SKarl Rupp .  nz - number of nonzeros per row (same for all rows)
283e4a0ef16SKarl Rupp -  nnz - array containing the number of nonzeros in the various rows
284e4a0ef16SKarl Rupp          (possibly different for each row) or NULL
285e4a0ef16SKarl Rupp 
286e4a0ef16SKarl Rupp    Output Parameter:
287e4a0ef16SKarl Rupp .  A - the matrix
288e4a0ef16SKarl Rupp 
289e4a0ef16SKarl Rupp    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
290e4a0ef16SKarl Rupp    MatXXXXSetPreallocation() paradigm instead of this routine directly.
291e4a0ef16SKarl Rupp    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
292e4a0ef16SKarl Rupp 
293e4a0ef16SKarl Rupp    Notes:
294e4a0ef16SKarl Rupp    If nnz is given then nz is ignored
295e4a0ef16SKarl Rupp 
296e4a0ef16SKarl Rupp    The AIJ format (also called the Yale sparse matrix format or
297e4a0ef16SKarl Rupp    compressed row storage), is fully compatible with standard Fortran 77
298e4a0ef16SKarl Rupp    storage.  That is, the stored row and column indices can begin at
299e4a0ef16SKarl Rupp    either one (as in Fortran) or zero.  See the users' manual for details.
300e4a0ef16SKarl Rupp 
301e4a0ef16SKarl Rupp    Specify the preallocated storage with either nz or nnz (not both).
302e4a0ef16SKarl Rupp    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
303e4a0ef16SKarl Rupp    allocation.  For large problems you MUST preallocate memory or you
304e4a0ef16SKarl Rupp    will get TERRIBLE performance, see the users' manual chapter on matrices.
305e4a0ef16SKarl Rupp 
306e4a0ef16SKarl Rupp    Level: intermediate
307e4a0ef16SKarl Rupp 
308e4a0ef16SKarl Rupp .seealso: MatCreate(), MatCreateAIJ(), MatCreateAIJCUSP(), MatSetValues(), MatSeqAIJSetColumnIndices(), MatCreateSeqAIJWithArrays(), MatCreateAIJ()
309e4a0ef16SKarl Rupp 
310e4a0ef16SKarl Rupp @*/
311e4a0ef16SKarl Rupp PetscErrorCode  MatCreateSeqAIJViennaCL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
312e4a0ef16SKarl Rupp {
313e4a0ef16SKarl Rupp   PetscErrorCode ierr;
314e4a0ef16SKarl Rupp 
315e4a0ef16SKarl Rupp   PetscFunctionBegin;
316e4a0ef16SKarl Rupp   ierr = MatCreate(comm,A);CHKERRQ(ierr);
317e4a0ef16SKarl Rupp   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
318e4a0ef16SKarl Rupp   ierr = MatSetType(*A,MATSEQAIJVIENNACL);CHKERRQ(ierr);
319e4a0ef16SKarl Rupp   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,(PetscInt*)nnz);CHKERRQ(ierr);
320e4a0ef16SKarl Rupp   PetscFunctionReturn(0);
321e4a0ef16SKarl Rupp }
322e4a0ef16SKarl Rupp 
323e4a0ef16SKarl Rupp 
324e4a0ef16SKarl Rupp #undef __FUNCT__
325e4a0ef16SKarl Rupp #define __FUNCT__ "MatDestroy_SeqAIJViennaCL"
326e4a0ef16SKarl Rupp PetscErrorCode MatDestroy_SeqAIJViennaCL(Mat A)
327e4a0ef16SKarl Rupp {
328e4a0ef16SKarl Rupp   PetscErrorCode ierr;
329e4a0ef16SKarl Rupp   Mat_SeqAIJViennaCL *viennaclcontainer = (Mat_SeqAIJViennaCL*)A->spptr;
330e4a0ef16SKarl Rupp 
331e4a0ef16SKarl Rupp   PetscFunctionBegin;
332e4a0ef16SKarl Rupp   try {
333e4a0ef16SKarl Rupp     if (A->valid_GPU_matrix != PETSC_VIENNACL_UNALLOCATED) delete (ViennaCLAIJMatrix*)(viennaclcontainer->mat);
334e4a0ef16SKarl Rupp     delete viennaclcontainer;
335e4a0ef16SKarl Rupp     A->valid_GPU_matrix = PETSC_VIENNACL_UNALLOCATED;
3364076e183SKarl Rupp   } catch(std::exception const & ex) {
3374076e183SKarl Rupp     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
338e4a0ef16SKarl Rupp   }
339e4a0ef16SKarl Rupp   /* this next line is because MatDestroy tries to PetscFree spptr if it is not zero, and PetscFree only works if the memory was allocated with PetscNew or PetscMalloc, which don't call the constructor */
340e4a0ef16SKarl Rupp   A->spptr = 0;
341e4a0ef16SKarl Rupp   ierr     = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
342e4a0ef16SKarl Rupp   PetscFunctionReturn(0);
343e4a0ef16SKarl Rupp }
344e4a0ef16SKarl Rupp 
345e4a0ef16SKarl Rupp 
346e4a0ef16SKarl Rupp #undef __FUNCT__
347e4a0ef16SKarl Rupp #define __FUNCT__ "MatCreate_SeqAIJViennaCL"
348e4a0ef16SKarl Rupp PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJViennaCL(Mat B)
349e4a0ef16SKarl Rupp {
350e4a0ef16SKarl Rupp   PetscErrorCode ierr;
351e4a0ef16SKarl Rupp   Mat_SeqAIJ     *aij;
352e4a0ef16SKarl Rupp 
353e4a0ef16SKarl Rupp   PetscFunctionBegin;
354e4a0ef16SKarl Rupp   ierr            = MatCreate_SeqAIJ(B);CHKERRQ(ierr);
355e4a0ef16SKarl Rupp   aij             = (Mat_SeqAIJ*)B->data;
356e4a0ef16SKarl Rupp   aij->inode.use  = PETSC_FALSE;
357e4a0ef16SKarl Rupp   B->ops->mult    = MatMult_SeqAIJViennaCL;
358e4a0ef16SKarl Rupp   B->ops->multadd = MatMultAdd_SeqAIJViennaCL;
359e4a0ef16SKarl Rupp   B->spptr        = new Mat_SeqAIJViennaCL();
360e4a0ef16SKarl Rupp 
361e4a0ef16SKarl Rupp   ((Mat_SeqAIJViennaCL*)B->spptr)->mat = 0;
362e4a0ef16SKarl Rupp 
363e4a0ef16SKarl Rupp   B->ops->assemblyend    = MatAssemblyEnd_SeqAIJViennaCL;
364e4a0ef16SKarl Rupp   B->ops->destroy        = MatDestroy_SeqAIJViennaCL;
365e4a0ef16SKarl Rupp   B->ops->getvecs        = MatGetVecs_SeqAIJViennaCL;
366e4a0ef16SKarl Rupp 
36765e3cb35SKarl Rupp   ierr = MatSetFromOptions_SeqViennaCL(B);CHKERRQ(ierr); /* Allows to set device type before allocating any objects */
368e4a0ef16SKarl Rupp   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJVIENNACL);CHKERRQ(ierr);
369e4a0ef16SKarl Rupp 
370e4a0ef16SKarl Rupp   B->valid_GPU_matrix = PETSC_VIENNACL_UNALLOCATED;
371e4a0ef16SKarl Rupp   PetscFunctionReturn(0);
372e4a0ef16SKarl Rupp }
373e4a0ef16SKarl Rupp 
374e4a0ef16SKarl Rupp 
375e4a0ef16SKarl Rupp /*M
376e4a0ef16SKarl Rupp    MATSEQAIJVIENNACL - MATAIJVIENNACL = "aijviennacl" = "seqaijviennacl" - A matrix type to be used for sparse matrices.
377e4a0ef16SKarl Rupp 
378e4a0ef16SKarl Rupp    A matrix type type whose data resides on GPUs. These matrices are in CSR format by
379e4a0ef16SKarl Rupp    default. All matrix calculations are performed using the ViennaCL library.
380e4a0ef16SKarl Rupp 
381e4a0ef16SKarl Rupp    Options Database Keys:
382e4a0ef16SKarl Rupp +  -mat_type aijviennacl - sets the matrix type to "seqaijviennacl" during a call to MatSetFromOptions()
383e4a0ef16SKarl Rupp .  -mat_viennacl_storage_format csr - sets the storage format of matrices for MatMult during a call to MatSetFromOptions().
384e4a0ef16SKarl Rupp -  -mat_viennacl_mult_storage_format csr - sets the storage format of matrices for MatMult during a call to MatSetFromOptions().
385e4a0ef16SKarl Rupp 
386e4a0ef16SKarl Rupp   Level: beginner
387e4a0ef16SKarl Rupp 
388e4a0ef16SKarl Rupp .seealso: MatCreateSeqAIJViennaCL(), MATAIJVIENNACL, MatCreateAIJViennaCL()
389e4a0ef16SKarl Rupp M*/
390e4a0ef16SKarl Rupp 
391