xref: /petsc/src/mat/impls/aij/seq/seqviennacl/aijviennacl.cxx (revision 19fddfadf26cdedd36d5f24fa1f2bdedfcd3712b)
1e4a0ef16SKarl Rupp 
2e4a0ef16SKarl Rupp 
3e4a0ef16SKarl Rupp /*
4e4a0ef16SKarl Rupp     Defines the basic matrix operations for the AIJ (compressed row)
5e4a0ef16SKarl Rupp   matrix storage format.
6e4a0ef16SKarl Rupp */
7e4a0ef16SKarl Rupp 
8e4a0ef16SKarl Rupp #include "petscconf.h"
9e4a0ef16SKarl Rupp #include "../src/mat/impls/aij/seq/aij.h"          /*I "petscmat.h" I*/
10e4a0ef16SKarl Rupp #include "petscbt.h"
11e4a0ef16SKarl Rupp #include "../src/vec/vec/impls/dvecimpl.h"
12e4a0ef16SKarl Rupp #include "petsc-private/vecimpl.h"
13e4a0ef16SKarl Rupp 
14e4a0ef16SKarl Rupp #include "../src/mat/impls/aij/seq/seqviennacl/viennaclmatimpl.h"
15e4a0ef16SKarl Rupp 
16e4a0ef16SKarl Rupp 
17e4a0ef16SKarl Rupp #include <algorithm>
18e4a0ef16SKarl Rupp #include <vector>
19e4a0ef16SKarl Rupp #include <string>
20e4a0ef16SKarl Rupp 
21e4a0ef16SKarl Rupp #include "viennacl/linalg/prod.hpp"
22e4a0ef16SKarl Rupp 
23e4a0ef16SKarl Rupp #undef __FUNCT__
24e4a0ef16SKarl Rupp #define __FUNCT__ "MatViennaCLCopyToGPU"
25e4a0ef16SKarl Rupp PetscErrorCode MatViennaCLCopyToGPU(Mat A)
26e4a0ef16SKarl Rupp {
27e4a0ef16SKarl Rupp 
28e4a0ef16SKarl Rupp   Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr;
29e4a0ef16SKarl Rupp   Mat_SeqAIJ         *a              = (Mat_SeqAIJ*)A->data;
30e4a0ef16SKarl Rupp   PetscInt           *ii;
31e4a0ef16SKarl Rupp   PetscErrorCode     ierr;
32e4a0ef16SKarl Rupp 
33e4a0ef16SKarl Rupp 
34e4a0ef16SKarl Rupp   PetscFunctionBegin;
3567c87b7fSKarl Rupp   if (A->rmap->n > 0 && A->cmap->n > 0) { //some OpenCL SDKs have issues with buffers of size 0
36e4a0ef16SKarl Rupp     if (A->valid_GPU_matrix == PETSC_VIENNACL_UNALLOCATED || A->valid_GPU_matrix == PETSC_VIENNACL_CPU) {
37e4a0ef16SKarl Rupp       ierr = PetscLogEventBegin(MAT_ViennaCLCopyToGPU,A,0,0,0);CHKERRQ(ierr);
38e4a0ef16SKarl Rupp 
39e4a0ef16SKarl Rupp       try {
40e4a0ef16SKarl Rupp         viennaclstruct->mat = new ViennaCLAIJMatrix();
41e4a0ef16SKarl Rupp         if (a->compressedrow.use) {
42e4a0ef16SKarl Rupp           ii = a->compressedrow.i;
43e4a0ef16SKarl Rupp 
44e4a0ef16SKarl Rupp           viennaclstruct->mat->set(ii, a->j, a->a, A->rmap->n, A->cmap->n, a->nz);
45e4a0ef16SKarl Rupp 
46e4a0ef16SKarl Rupp           // TODO: Either convert to full CSR (inefficient), or hold row indices in temporary vector (requires additional bookkeeping for matrix-vector multiplications)
47e4a0ef16SKarl Rupp 
48e4a0ef16SKarl Rupp           SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: Compressed CSR (only nonzero rows) not yet supported");
49e4a0ef16SKarl Rupp         } else {
50e4a0ef16SKarl Rupp 
51e4a0ef16SKarl Rupp           // Since PetscInt is in general different from cl_uint, we have to convert:
52e4a0ef16SKarl Rupp           viennacl::backend::mem_handle dummy;
53e4a0ef16SKarl Rupp 
54e4a0ef16SKarl Rupp           viennacl::backend::typesafe_host_array<unsigned int> row_buffer; row_buffer.raw_resize(dummy, A->rmap->n+1);
55e4a0ef16SKarl Rupp           for (PetscInt i=0; i<=A->rmap->n; ++i)
56e4a0ef16SKarl Rupp             row_buffer.set(i, (a->i)[i]);
57e4a0ef16SKarl Rupp 
58e4a0ef16SKarl Rupp           viennacl::backend::typesafe_host_array<unsigned int> col_buffer; col_buffer.raw_resize(dummy, a->nz);
59e4a0ef16SKarl Rupp           for (PetscInt i=0; i<a->nz; ++i)
60e4a0ef16SKarl Rupp             col_buffer.set(i, (a->j)[i]);
61e4a0ef16SKarl Rupp 
62e4a0ef16SKarl Rupp           viennaclstruct->mat->set(row_buffer.get(), col_buffer.get(), a->a, A->rmap->n, A->cmap->n, a->nz);
63e4a0ef16SKarl Rupp         }
644076e183SKarl Rupp       } catch(std::exception const & ex) {
654076e183SKarl Rupp         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
66e4a0ef16SKarl Rupp       }
67e4a0ef16SKarl Rupp 
68e4a0ef16SKarl Rupp       A->valid_GPU_matrix = PETSC_VIENNACL_BOTH;
69e4a0ef16SKarl Rupp 
70e4a0ef16SKarl Rupp       ierr = PetscLogEventEnd(MAT_ViennaCLCopyToGPU,A,0,0,0);CHKERRQ(ierr);
71e4a0ef16SKarl Rupp     }
7267c87b7fSKarl Rupp   }
73e4a0ef16SKarl Rupp   PetscFunctionReturn(0);
74e4a0ef16SKarl Rupp }
75e4a0ef16SKarl Rupp 
76e4a0ef16SKarl Rupp #undef __FUNCT__
77e4a0ef16SKarl Rupp #define __FUNCT__ "MatViennaCLCopyFromGPU"
78e4a0ef16SKarl Rupp PetscErrorCode MatViennaCLCopyFromGPU(Mat A, ViennaCLAIJMatrix *Agpu)
79e4a0ef16SKarl Rupp {
80e4a0ef16SKarl Rupp   Mat_SeqAIJ         *a              = (Mat_SeqAIJ*)A->data;
81e4a0ef16SKarl Rupp   PetscInt           m               = A->rmap->n;
82e4a0ef16SKarl Rupp   PetscErrorCode     ierr;
83e4a0ef16SKarl Rupp 
84e4a0ef16SKarl Rupp 
85e4a0ef16SKarl Rupp   PetscFunctionBegin;
86e4a0ef16SKarl Rupp   if (A->valid_GPU_matrix == PETSC_VIENNACL_UNALLOCATED) {
87e4a0ef16SKarl Rupp     try {
88e4a0ef16SKarl Rupp       if (a->compressedrow.use) {
89e4a0ef16SKarl Rupp         SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_ARG_WRONG, "ViennaCL: Cannot handle row compression for GPU matrices");
90e4a0ef16SKarl Rupp       } else {
91e4a0ef16SKarl Rupp 
92e4a0ef16SKarl Rupp         if ((PetscInt)Agpu->size1() != m) SETERRQ2(PETSC_COMM_WORLD, PETSC_ERR_ARG_SIZ, "GPU matrix has %d rows, should be %d", Agpu->size1(), m);
93e4a0ef16SKarl Rupp         a->nz           = Agpu->nnz();
94e4a0ef16SKarl Rupp         a->maxnz        = a->nz; /* Since we allocate exactly the right amount */
95e4a0ef16SKarl Rupp         A->preallocated = PETSC_TRUE;
96e4a0ef16SKarl Rupp         if (a->singlemalloc) {
97e4a0ef16SKarl Rupp           if (a->a) {ierr = PetscFree3(a->a,a->j,a->i);CHKERRQ(ierr);}
98e4a0ef16SKarl Rupp         } else {
99e4a0ef16SKarl Rupp           if (a->i) {ierr = PetscFree(a->i);CHKERRQ(ierr);}
100e4a0ef16SKarl Rupp           if (a->j) {ierr = PetscFree(a->j);CHKERRQ(ierr);}
101e4a0ef16SKarl Rupp           if (a->a) {ierr = PetscFree(a->a);CHKERRQ(ierr);}
102e4a0ef16SKarl Rupp         }
103e4a0ef16SKarl Rupp         ierr = PetscMalloc3(a->nz,PetscScalar,&a->a,a->nz,PetscInt,&a->j,m+1,PetscInt,&a->i);CHKERRQ(ierr);
104e4a0ef16SKarl Rupp         ierr = PetscLogObjectMemory(A, a->nz*(sizeof(PetscScalar)+sizeof(PetscInt))+(m+1)*sizeof(PetscInt));CHKERRQ(ierr);
105e4a0ef16SKarl Rupp 
106e4a0ef16SKarl Rupp         a->singlemalloc = PETSC_TRUE;
107e4a0ef16SKarl Rupp 
108e4a0ef16SKarl Rupp         /* Setup row lengths */
109e4a0ef16SKarl Rupp         if (a->imax) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
110e4a0ef16SKarl Rupp         ierr = PetscMalloc2(m,PetscInt,&a->imax,m,PetscInt,&a->ilen);CHKERRQ(ierr);
111e4a0ef16SKarl Rupp         ierr = PetscLogObjectMemory(A, 2*m*sizeof(PetscInt));CHKERRQ(ierr);
112e4a0ef16SKarl Rupp 
113e4a0ef16SKarl Rupp         /* Copy data back from GPU */
114e4a0ef16SKarl Rupp         viennacl::backend::typesafe_host_array<unsigned int> row_buffer; row_buffer.raw_resize(Agpu->handle1(), Agpu->size1() + 1);
115e4a0ef16SKarl Rupp 
116e4a0ef16SKarl Rupp         // copy row array
117e4a0ef16SKarl Rupp         viennacl::backend::memory_read(Agpu->handle1(), 0, row_buffer.raw_size(), row_buffer.get());
118e4a0ef16SKarl Rupp         (a->i)[0] = row_buffer[0];
119e4a0ef16SKarl Rupp         for (PetscInt i = 0; i < (PetscInt)Agpu->size1(); ++i) {
120e4a0ef16SKarl Rupp           (a->i)[i+1] = row_buffer[i+1];
121e4a0ef16SKarl Rupp           a->imax[i]  = a->ilen[i] = a->i[i+1] - a->i[i];  //Set imax[] and ilen[] arrays at the same time as i[] for better cache reuse
122e4a0ef16SKarl Rupp         }
123e4a0ef16SKarl Rupp 
124e4a0ef16SKarl Rupp         // copy column indices
125e4a0ef16SKarl Rupp         viennacl::backend::typesafe_host_array<unsigned int> col_buffer; col_buffer.raw_resize(Agpu->handle2(), Agpu->nnz());
126e4a0ef16SKarl Rupp         viennacl::backend::memory_read(Agpu->handle2(), 0, col_buffer.raw_size(), col_buffer.get());
127e4a0ef16SKarl Rupp         for (PetscInt i=0; i < (PetscInt)Agpu->nnz(); ++i)
128e4a0ef16SKarl Rupp           (a->j)[i] = col_buffer[i];
129e4a0ef16SKarl Rupp 
130e4a0ef16SKarl Rupp         // copy nonzero entries directly to destination (no conversion required)
131e4a0ef16SKarl Rupp         viennacl::backend::memory_read(Agpu->handle(), 0, sizeof(PetscScalar)*Agpu->nnz(), a->a);
132e4a0ef16SKarl Rupp 
133e4a0ef16SKarl Rupp         /* TODO: What about a->diag? */
134e4a0ef16SKarl Rupp       }
1354076e183SKarl Rupp     } catch(std::exception const & ex) {
1364076e183SKarl Rupp       SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
137e4a0ef16SKarl Rupp     }
138e4a0ef16SKarl Rupp 
139e4a0ef16SKarl Rupp     /* This assembly prevents resetting the flag to PETSC_VIENNACL_CPU and recopying */
140e4a0ef16SKarl Rupp     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
141e4a0ef16SKarl Rupp     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
142e4a0ef16SKarl Rupp 
143e4a0ef16SKarl Rupp     A->valid_GPU_matrix = PETSC_VIENNACL_BOTH;
144e4a0ef16SKarl Rupp   } else {
145e4a0ef16SKarl Rupp     SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_ARG_WRONG, "ViennaCL error: Only valid for unallocated GPU matrices");
146e4a0ef16SKarl Rupp   }
147e4a0ef16SKarl Rupp   PetscFunctionReturn(0);
148e4a0ef16SKarl Rupp }
149e4a0ef16SKarl Rupp 
150e4a0ef16SKarl Rupp #undef __FUNCT__
151e4a0ef16SKarl Rupp #define __FUNCT__ "MatGetVecs_SeqAIJViennaCL"
152e4a0ef16SKarl Rupp PetscErrorCode MatGetVecs_SeqAIJViennaCL(Mat mat, Vec *right, Vec *left)
153e4a0ef16SKarl Rupp {
154e4a0ef16SKarl Rupp   PetscErrorCode ierr;
155e4a0ef16SKarl Rupp 
156e4a0ef16SKarl Rupp   PetscFunctionBegin;
157e4a0ef16SKarl Rupp   if (right) {
158e4a0ef16SKarl Rupp     ierr = VecCreate(PetscObjectComm((PetscObject)mat),right);CHKERRQ(ierr);
159e4a0ef16SKarl Rupp     ierr = VecSetSizes(*right,mat->cmap->n,PETSC_DETERMINE);CHKERRQ(ierr);
160e4a0ef16SKarl Rupp     ierr = VecSetBlockSize(*right,mat->rmap->bs);CHKERRQ(ierr);
161e4a0ef16SKarl Rupp     ierr = VecSetType(*right,VECSEQVIENNACL);CHKERRQ(ierr);
162e4a0ef16SKarl Rupp     ierr = PetscLayoutReference(mat->cmap,&(*right)->map);CHKERRQ(ierr);
163e4a0ef16SKarl Rupp   }
164e4a0ef16SKarl Rupp   if (left) {
165e4a0ef16SKarl Rupp     ierr = VecCreate(PetscObjectComm((PetscObject)mat),left);CHKERRQ(ierr);
166e4a0ef16SKarl Rupp     ierr = VecSetSizes(*left,mat->rmap->n,PETSC_DETERMINE);CHKERRQ(ierr);
167e4a0ef16SKarl Rupp     ierr = VecSetBlockSize(*left,mat->rmap->bs);CHKERRQ(ierr);
168e4a0ef16SKarl Rupp     ierr = VecSetType(*left,VECSEQVIENNACL);CHKERRQ(ierr);
169e4a0ef16SKarl Rupp     ierr = PetscLayoutReference(mat->rmap,&(*left)->map);CHKERRQ(ierr);
170e4a0ef16SKarl Rupp   }
171e4a0ef16SKarl Rupp   PetscFunctionReturn(0);
172e4a0ef16SKarl Rupp }
173e4a0ef16SKarl Rupp 
174e4a0ef16SKarl Rupp #undef __FUNCT__
175e4a0ef16SKarl Rupp #define __FUNCT__ "MatMult_SeqAIJViennaCL"
176e4a0ef16SKarl Rupp PetscErrorCode MatMult_SeqAIJViennaCL(Mat A,Vec xx,Vec yy)
177e4a0ef16SKarl Rupp {
178e4a0ef16SKarl Rupp   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
179e4a0ef16SKarl Rupp   PetscErrorCode     ierr;
180e4a0ef16SKarl Rupp   Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr;
181e4a0ef16SKarl Rupp   ViennaCLVector     *xgpu=NULL,*ygpu=NULL;
182e4a0ef16SKarl Rupp 
183e4a0ef16SKarl Rupp   PetscFunctionBegin;
18467c87b7fSKarl Rupp   if (A->rmap->n > 0 && A->cmap->n > 0) {
185e4a0ef16SKarl Rupp     ierr = VecViennaCLGetArrayRead(xx,&xgpu);CHKERRQ(ierr);
186e4a0ef16SKarl Rupp     ierr = VecViennaCLGetArrayWrite(yy,&ygpu);CHKERRQ(ierr);
187e4a0ef16SKarl Rupp     try {
188e4a0ef16SKarl Rupp       *ygpu = viennacl::linalg::prod(*viennaclstruct->mat,*xgpu);
1894076e183SKarl Rupp     } catch (std::exception const & ex) {
1904076e183SKarl Rupp       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
191e4a0ef16SKarl Rupp     }
192e4a0ef16SKarl Rupp     ierr = VecViennaCLRestoreArrayRead(xx,&xgpu);CHKERRQ(ierr);
193e4a0ef16SKarl Rupp     ierr = VecViennaCLRestoreArrayWrite(yy,&ygpu);CHKERRQ(ierr);
194e4a0ef16SKarl Rupp     ierr = PetscLogFlops(2.0*a->nz - viennaclstruct->mat->nnz());CHKERRQ(ierr);
19567c87b7fSKarl Rupp   }
196e4a0ef16SKarl Rupp   PetscFunctionReturn(0);
197e4a0ef16SKarl Rupp }
198e4a0ef16SKarl Rupp 
199e4a0ef16SKarl Rupp 
200e4a0ef16SKarl Rupp 
201e4a0ef16SKarl Rupp #undef __FUNCT__
202e4a0ef16SKarl Rupp #define __FUNCT__ "MatMultAdd_SeqAIJViennaCL"
203e4a0ef16SKarl Rupp PetscErrorCode MatMultAdd_SeqAIJViennaCL(Mat A,Vec xx,Vec yy,Vec zz)
204e4a0ef16SKarl Rupp {
205e4a0ef16SKarl Rupp   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
206e4a0ef16SKarl Rupp   PetscErrorCode     ierr;
207e4a0ef16SKarl Rupp   Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr;
208e4a0ef16SKarl Rupp   ViennaCLVector     *xgpu=NULL,*ygpu=NULL,*zgpu=NULL;
209e4a0ef16SKarl Rupp 
210e4a0ef16SKarl Rupp   PetscFunctionBegin;
21167c87b7fSKarl Rupp   if (A->rmap->n > 0 && A->cmap->n > 0) {
212e4a0ef16SKarl Rupp     try {
213e4a0ef16SKarl Rupp       ierr = VecViennaCLGetArrayRead(xx,&xgpu);CHKERRQ(ierr);
214e4a0ef16SKarl Rupp       ierr = VecViennaCLGetArrayRead(yy,&ygpu);CHKERRQ(ierr);
215e4a0ef16SKarl Rupp       ierr = VecViennaCLGetArrayWrite(zz,&zgpu);CHKERRQ(ierr);
216e4a0ef16SKarl Rupp 
217e4a0ef16SKarl Rupp       if (a->compressedrow.use) {
218e4a0ef16SKarl Rupp           SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: Compressed CSR (only nonzero rows) not yet supported");
219e4a0ef16SKarl Rupp       } else {
220e4a0ef16SKarl Rupp         if (zz == xx || zz == yy) { //temporary required
221e4a0ef16SKarl Rupp           ViennaCLVector temp = viennacl::linalg::prod(*viennaclstruct->mat, *xgpu);
222e4a0ef16SKarl Rupp           *zgpu = *ygpu + temp;
223e4a0ef16SKarl Rupp         } else {
224e4a0ef16SKarl Rupp           *zgpu = viennacl::linalg::prod(*viennaclstruct->mat, *xgpu);
225e4a0ef16SKarl Rupp           *zgpu += *ygpu;
226e4a0ef16SKarl Rupp         }
227e4a0ef16SKarl Rupp       }
228e4a0ef16SKarl Rupp 
229e4a0ef16SKarl Rupp       ierr = VecViennaCLRestoreArrayRead(xx,&xgpu);CHKERRQ(ierr);
230e4a0ef16SKarl Rupp       ierr = VecViennaCLRestoreArrayRead(yy,&ygpu);CHKERRQ(ierr);
231e4a0ef16SKarl Rupp       ierr = VecViennaCLRestoreArrayWrite(zz,&zgpu);CHKERRQ(ierr);
232e4a0ef16SKarl Rupp 
2334076e183SKarl Rupp     } catch(std::exception const & ex) {
2344076e183SKarl Rupp       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
235e4a0ef16SKarl Rupp     }
236e4a0ef16SKarl Rupp     ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
23767c87b7fSKarl Rupp   }
238e4a0ef16SKarl Rupp   PetscFunctionReturn(0);
239e4a0ef16SKarl Rupp }
240e4a0ef16SKarl Rupp 
241e4a0ef16SKarl Rupp #undef __FUNCT__
242e4a0ef16SKarl Rupp #define __FUNCT__ "MatAssemblyEnd_SeqAIJViennaCL"
243e4a0ef16SKarl Rupp PetscErrorCode MatAssemblyEnd_SeqAIJViennaCL(Mat A,MatAssemblyType mode)
244e4a0ef16SKarl Rupp {
245e4a0ef16SKarl Rupp   PetscErrorCode ierr;
246e4a0ef16SKarl Rupp 
247e4a0ef16SKarl Rupp   PetscFunctionBegin;
248e4a0ef16SKarl Rupp   ierr = MatAssemblyEnd_SeqAIJ(A,mode);CHKERRQ(ierr);
249e4a0ef16SKarl Rupp   ierr = MatViennaCLCopyToGPU(A);CHKERRQ(ierr);
250e4a0ef16SKarl Rupp   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
251e4a0ef16SKarl Rupp   A->ops->mult    = MatMult_SeqAIJViennaCL;
252e4a0ef16SKarl Rupp   A->ops->multadd = MatMultAdd_SeqAIJViennaCL;
253e4a0ef16SKarl Rupp   PetscFunctionReturn(0);
254e4a0ef16SKarl Rupp }
255e4a0ef16SKarl Rupp 
256e4a0ef16SKarl Rupp /* --------------------------------------------------------------------------------*/
257e4a0ef16SKarl Rupp #undef __FUNCT__
258e4a0ef16SKarl Rupp #define __FUNCT__ "MatCreateSeqAIJViennaCL"
259e4a0ef16SKarl Rupp /*@
260e4a0ef16SKarl Rupp    MatCreateSeqAIJViennaCL - Creates a sparse matrix in AIJ (compressed row) format
261*19fddfadSKarl Rupp    (the default parallel PETSc format).  This matrix will ultimately be pushed down
262e4a0ef16SKarl Rupp    to GPUs and use the ViennaCL library for calculations. For good matrix
263e4a0ef16SKarl Rupp    assembly performance the user should preallocate the matrix storage by setting
264e4a0ef16SKarl Rupp    the parameter nz (or the array nnz).  By setting these parameters accurately,
265e4a0ef16SKarl Rupp    performance during matrix assembly can be increased substantially.
266e4a0ef16SKarl Rupp 
267e4a0ef16SKarl Rupp 
268e4a0ef16SKarl Rupp    Collective on MPI_Comm
269e4a0ef16SKarl Rupp 
270e4a0ef16SKarl Rupp    Input Parameters:
271e4a0ef16SKarl Rupp +  comm - MPI communicator, set to PETSC_COMM_SELF
272e4a0ef16SKarl Rupp .  m - number of rows
273e4a0ef16SKarl Rupp .  n - number of columns
274e4a0ef16SKarl Rupp .  nz - number of nonzeros per row (same for all rows)
275e4a0ef16SKarl Rupp -  nnz - array containing the number of nonzeros in the various rows
276e4a0ef16SKarl Rupp          (possibly different for each row) or NULL
277e4a0ef16SKarl Rupp 
278e4a0ef16SKarl Rupp    Output Parameter:
279e4a0ef16SKarl Rupp .  A - the matrix
280e4a0ef16SKarl Rupp 
281e4a0ef16SKarl Rupp    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
282e4a0ef16SKarl Rupp    MatXXXXSetPreallocation() paradigm instead of this routine directly.
283e4a0ef16SKarl Rupp    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
284e4a0ef16SKarl Rupp 
285e4a0ef16SKarl Rupp    Notes:
286e4a0ef16SKarl Rupp    If nnz is given then nz is ignored
287e4a0ef16SKarl Rupp 
288e4a0ef16SKarl Rupp    The AIJ format (also called the Yale sparse matrix format or
289e4a0ef16SKarl Rupp    compressed row storage), is fully compatible with standard Fortran 77
290e4a0ef16SKarl Rupp    storage.  That is, the stored row and column indices can begin at
291e4a0ef16SKarl Rupp    either one (as in Fortran) or zero.  See the users' manual for details.
292e4a0ef16SKarl Rupp 
293e4a0ef16SKarl Rupp    Specify the preallocated storage with either nz or nnz (not both).
294e4a0ef16SKarl Rupp    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
295e4a0ef16SKarl Rupp    allocation.  For large problems you MUST preallocate memory or you
296e4a0ef16SKarl Rupp    will get TERRIBLE performance, see the users' manual chapter on matrices.
297e4a0ef16SKarl Rupp 
298e4a0ef16SKarl Rupp    Level: intermediate
299e4a0ef16SKarl Rupp 
300e4a0ef16SKarl Rupp .seealso: MatCreate(), MatCreateAIJ(), MatCreateAIJCUSP(), MatSetValues(), MatSeqAIJSetColumnIndices(), MatCreateSeqAIJWithArrays(), MatCreateAIJ()
301e4a0ef16SKarl Rupp 
302e4a0ef16SKarl Rupp @*/
303e4a0ef16SKarl Rupp PetscErrorCode  MatCreateSeqAIJViennaCL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
304e4a0ef16SKarl Rupp {
305e4a0ef16SKarl Rupp   PetscErrorCode ierr;
306e4a0ef16SKarl Rupp 
307e4a0ef16SKarl Rupp   PetscFunctionBegin;
308e4a0ef16SKarl Rupp   ierr = MatCreate(comm,A);CHKERRQ(ierr);
309e4a0ef16SKarl Rupp   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
310e4a0ef16SKarl Rupp   ierr = MatSetType(*A,MATSEQAIJVIENNACL);CHKERRQ(ierr);
311e4a0ef16SKarl Rupp   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,(PetscInt*)nnz);CHKERRQ(ierr);
312e4a0ef16SKarl Rupp   PetscFunctionReturn(0);
313e4a0ef16SKarl Rupp }
314e4a0ef16SKarl Rupp 
315e4a0ef16SKarl Rupp 
316e4a0ef16SKarl Rupp #undef __FUNCT__
317e4a0ef16SKarl Rupp #define __FUNCT__ "MatDestroy_SeqAIJViennaCL"
318e4a0ef16SKarl Rupp PetscErrorCode MatDestroy_SeqAIJViennaCL(Mat A)
319e4a0ef16SKarl Rupp {
320e4a0ef16SKarl Rupp   PetscErrorCode ierr;
321e4a0ef16SKarl Rupp   Mat_SeqAIJViennaCL *viennaclcontainer = (Mat_SeqAIJViennaCL*)A->spptr;
322e4a0ef16SKarl Rupp 
323e4a0ef16SKarl Rupp   PetscFunctionBegin;
324e4a0ef16SKarl Rupp   try {
325e4a0ef16SKarl Rupp     if (A->valid_GPU_matrix != PETSC_VIENNACL_UNALLOCATED) delete (ViennaCLAIJMatrix*)(viennaclcontainer->mat);
326e4a0ef16SKarl Rupp     delete viennaclcontainer;
327e4a0ef16SKarl Rupp     A->valid_GPU_matrix = PETSC_VIENNACL_UNALLOCATED;
3284076e183SKarl Rupp   } catch(std::exception const & ex) {
3294076e183SKarl Rupp     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
330e4a0ef16SKarl Rupp   }
331e4a0ef16SKarl Rupp   /* this next line is because MatDestroy tries to PetscFree spptr if it is not zero, and PetscFree only works if the memory was allocated with PetscNew or PetscMalloc, which don't call the constructor */
332e4a0ef16SKarl Rupp   A->spptr = 0;
333e4a0ef16SKarl Rupp   ierr     = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
334e4a0ef16SKarl Rupp   PetscFunctionReturn(0);
335e4a0ef16SKarl Rupp }
336e4a0ef16SKarl Rupp 
337e4a0ef16SKarl Rupp 
338e4a0ef16SKarl Rupp #undef __FUNCT__
339e4a0ef16SKarl Rupp #define __FUNCT__ "MatCreate_SeqAIJViennaCL"
340e4a0ef16SKarl Rupp PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJViennaCL(Mat B)
341e4a0ef16SKarl Rupp {
342e4a0ef16SKarl Rupp   PetscErrorCode ierr;
343e4a0ef16SKarl Rupp   Mat_SeqAIJ     *aij;
344e4a0ef16SKarl Rupp 
345e4a0ef16SKarl Rupp   PetscFunctionBegin;
346e4a0ef16SKarl Rupp   ierr            = MatCreate_SeqAIJ(B);CHKERRQ(ierr);
347e4a0ef16SKarl Rupp   aij             = (Mat_SeqAIJ*)B->data;
348e4a0ef16SKarl Rupp   aij->inode.use  = PETSC_FALSE;
349e4a0ef16SKarl Rupp   B->ops->mult    = MatMult_SeqAIJViennaCL;
350e4a0ef16SKarl Rupp   B->ops->multadd = MatMultAdd_SeqAIJViennaCL;
351e4a0ef16SKarl Rupp   B->spptr        = new Mat_SeqAIJViennaCL();
352e4a0ef16SKarl Rupp 
353e4a0ef16SKarl Rupp   ((Mat_SeqAIJViennaCL*)B->spptr)->mat = 0;
354e4a0ef16SKarl Rupp 
355e4a0ef16SKarl Rupp   B->ops->assemblyend    = MatAssemblyEnd_SeqAIJViennaCL;
356e4a0ef16SKarl Rupp   B->ops->destroy        = MatDestroy_SeqAIJViennaCL;
357e4a0ef16SKarl Rupp   B->ops->getvecs        = MatGetVecs_SeqAIJViennaCL;
358e4a0ef16SKarl Rupp 
359e4a0ef16SKarl Rupp   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJVIENNACL);CHKERRQ(ierr);
360e4a0ef16SKarl Rupp 
361e4a0ef16SKarl Rupp   B->valid_GPU_matrix = PETSC_VIENNACL_UNALLOCATED;
362e4a0ef16SKarl Rupp   PetscFunctionReturn(0);
363e4a0ef16SKarl Rupp }
364e4a0ef16SKarl Rupp 
365e4a0ef16SKarl Rupp 
366e4a0ef16SKarl Rupp /*M
367e4a0ef16SKarl Rupp    MATSEQAIJVIENNACL - MATAIJVIENNACL = "aijviennacl" = "seqaijviennacl" - A matrix type to be used for sparse matrices.
368e4a0ef16SKarl Rupp 
369e4a0ef16SKarl Rupp    A matrix type type whose data resides on GPUs. These matrices are in CSR format by
370e4a0ef16SKarl Rupp    default. All matrix calculations are performed using the ViennaCL library.
371e4a0ef16SKarl Rupp 
372e4a0ef16SKarl Rupp    Options Database Keys:
373e4a0ef16SKarl Rupp +  -mat_type aijviennacl - sets the matrix type to "seqaijviennacl" during a call to MatSetFromOptions()
374e4a0ef16SKarl Rupp .  -mat_viennacl_storage_format csr - sets the storage format of matrices for MatMult during a call to MatSetFromOptions().
375e4a0ef16SKarl Rupp -  -mat_viennacl_mult_storage_format csr - sets the storage format of matrices for MatMult during a call to MatSetFromOptions().
376e4a0ef16SKarl Rupp 
377e4a0ef16SKarl Rupp   Level: beginner
378e4a0ef16SKarl Rupp 
379e4a0ef16SKarl Rupp .seealso: MatCreateSeqAIJViennaCL(), MATAIJVIENNACL, MatCreateAIJViennaCL()
380e4a0ef16SKarl Rupp M*/
381e4a0ef16SKarl Rupp 
382