xref: /petsc/src/mat/impls/aij/seq/seqviennacl/aijviennacl.cxx (revision 33d57670fcdbf57d9203d482728f549b81403a0e)
1e4a0ef16SKarl Rupp 
2e4a0ef16SKarl Rupp 
3e4a0ef16SKarl Rupp /*
4e4a0ef16SKarl Rupp     Defines the basic matrix operations for the AIJ (compressed row)
5e4a0ef16SKarl Rupp   matrix storage format.
6e4a0ef16SKarl Rupp */
7e4a0ef16SKarl Rupp 
8aaa7dc30SBarry Smith #include <petscconf.h>
9aaa7dc30SBarry Smith #include <../src/mat/impls/aij/seq/aij.h>          /*I "petscmat.h" I*/
10aaa7dc30SBarry Smith #include <petscbt.h>
11aaa7dc30SBarry Smith #include <../src/vec/vec/impls/dvecimpl.h>
12aaa7dc30SBarry Smith #include <petsc-private/vecimpl.h>
13e4a0ef16SKarl Rupp 
14aaa7dc30SBarry Smith #include <../src/mat/impls/aij/seq/seqviennacl/viennaclmatimpl.h>
15e4a0ef16SKarl Rupp 
16e4a0ef16SKarl Rupp 
17e4a0ef16SKarl Rupp #include <algorithm>
18e4a0ef16SKarl Rupp #include <vector>
19e4a0ef16SKarl Rupp #include <string>
20e4a0ef16SKarl Rupp 
21e4a0ef16SKarl Rupp #include "viennacl/linalg/prod.hpp"
22e4a0ef16SKarl Rupp 
23e4a0ef16SKarl Rupp #undef __FUNCT__
24e4a0ef16SKarl Rupp #define __FUNCT__ "MatViennaCLCopyToGPU"
25e4a0ef16SKarl Rupp PetscErrorCode MatViennaCLCopyToGPU(Mat A)
26e4a0ef16SKarl Rupp {
27e4a0ef16SKarl Rupp 
28e4a0ef16SKarl Rupp   Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr;
29e4a0ef16SKarl Rupp   Mat_SeqAIJ         *a              = (Mat_SeqAIJ*)A->data;
30e4a0ef16SKarl Rupp   PetscInt           *ii;
31e4a0ef16SKarl Rupp   PetscErrorCode     ierr;
32e4a0ef16SKarl Rupp 
33e4a0ef16SKarl Rupp 
34e4a0ef16SKarl Rupp   PetscFunctionBegin;
3567c87b7fSKarl Rupp   if (A->rmap->n > 0 && A->cmap->n > 0) { //some OpenCL SDKs have issues with buffers of size 0
36e4a0ef16SKarl Rupp     if (A->valid_GPU_matrix == PETSC_VIENNACL_UNALLOCATED || A->valid_GPU_matrix == PETSC_VIENNACL_CPU) {
37e4a0ef16SKarl Rupp       ierr = PetscLogEventBegin(MAT_ViennaCLCopyToGPU,A,0,0,0);CHKERRQ(ierr);
38e4a0ef16SKarl Rupp 
39e4a0ef16SKarl Rupp       try {
401fc5b511SKarl Rupp         ierr = PetscObjectSetFromOptions_ViennaCL((PetscObject)A);CHKERRQ(ierr); /* Allows to set device type before allocating any objects */
41e4a0ef16SKarl Rupp         viennaclstruct->mat = new ViennaCLAIJMatrix();
42e4a0ef16SKarl Rupp         if (a->compressedrow.use) {
43e4a0ef16SKarl Rupp           ii = a->compressedrow.i;
44e4a0ef16SKarl Rupp 
45e4a0ef16SKarl Rupp           viennaclstruct->mat->set(ii, a->j, a->a, A->rmap->n, A->cmap->n, a->nz);
46e4a0ef16SKarl Rupp 
47e4a0ef16SKarl Rupp           // TODO: Either convert to full CSR (inefficient), or hold row indices in temporary vector (requires additional bookkeeping for matrix-vector multiplications)
48023073b3SKarl Rupp           //       Cannot be reasonably supported in ViennaCL 1.4.x (custom kernels required), hence postponing until there is support for compressed CSR
49e4a0ef16SKarl Rupp 
50e4a0ef16SKarl Rupp           SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: Compressed CSR (only nonzero rows) not yet supported");
51e4a0ef16SKarl Rupp         } else {
52e4a0ef16SKarl Rupp 
53e4a0ef16SKarl Rupp           // Since PetscInt is in general different from cl_uint, we have to convert:
54e4a0ef16SKarl Rupp           viennacl::backend::mem_handle dummy;
55e4a0ef16SKarl Rupp 
56e4a0ef16SKarl Rupp           viennacl::backend::typesafe_host_array<unsigned int> row_buffer; row_buffer.raw_resize(dummy, A->rmap->n+1);
57e4a0ef16SKarl Rupp           for (PetscInt i=0; i<=A->rmap->n; ++i)
58e4a0ef16SKarl Rupp             row_buffer.set(i, (a->i)[i]);
59e4a0ef16SKarl Rupp 
60e4a0ef16SKarl Rupp           viennacl::backend::typesafe_host_array<unsigned int> col_buffer; col_buffer.raw_resize(dummy, a->nz);
61e4a0ef16SKarl Rupp           for (PetscInt i=0; i<a->nz; ++i)
62e4a0ef16SKarl Rupp             col_buffer.set(i, (a->j)[i]);
63e4a0ef16SKarl Rupp 
64e4a0ef16SKarl Rupp           viennaclstruct->mat->set(row_buffer.get(), col_buffer.get(), a->a, A->rmap->n, A->cmap->n, a->nz);
65e4a0ef16SKarl Rupp         }
664cf1874eSKarl Rupp         ViennaCLWaitForGPU();
674076e183SKarl Rupp       } catch(std::exception const & ex) {
684076e183SKarl Rupp         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
69e4a0ef16SKarl Rupp       }
70e4a0ef16SKarl Rupp 
71e4a0ef16SKarl Rupp       A->valid_GPU_matrix = PETSC_VIENNACL_BOTH;
72e4a0ef16SKarl Rupp 
73e4a0ef16SKarl Rupp       ierr = PetscLogEventEnd(MAT_ViennaCLCopyToGPU,A,0,0,0);CHKERRQ(ierr);
74e4a0ef16SKarl Rupp     }
7567c87b7fSKarl Rupp   }
76e4a0ef16SKarl Rupp   PetscFunctionReturn(0);
77e4a0ef16SKarl Rupp }
78e4a0ef16SKarl Rupp 
79e4a0ef16SKarl Rupp #undef __FUNCT__
80e4a0ef16SKarl Rupp #define __FUNCT__ "MatViennaCLCopyFromGPU"
810d73d530SKarl Rupp PetscErrorCode MatViennaCLCopyFromGPU(Mat A, const ViennaCLAIJMatrix *Agpu)
82e4a0ef16SKarl Rupp {
83e4a0ef16SKarl Rupp   Mat_SeqAIJ         *a              = (Mat_SeqAIJ*)A->data;
84e4a0ef16SKarl Rupp   PetscInt           m               = A->rmap->n;
85e4a0ef16SKarl Rupp   PetscErrorCode     ierr;
86e4a0ef16SKarl Rupp 
87e4a0ef16SKarl Rupp 
88e4a0ef16SKarl Rupp   PetscFunctionBegin;
89e4a0ef16SKarl Rupp   if (A->valid_GPU_matrix == PETSC_VIENNACL_UNALLOCATED) {
90e4a0ef16SKarl Rupp     try {
91e4a0ef16SKarl Rupp       if (a->compressedrow.use) {
92e4a0ef16SKarl Rupp         SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_ARG_WRONG, "ViennaCL: Cannot handle row compression for GPU matrices");
93e4a0ef16SKarl Rupp       } else {
94e4a0ef16SKarl Rupp 
95e4a0ef16SKarl Rupp         if ((PetscInt)Agpu->size1() != m) SETERRQ2(PETSC_COMM_WORLD, PETSC_ERR_ARG_SIZ, "GPU matrix has %d rows, should be %d", Agpu->size1(), m);
96e4a0ef16SKarl Rupp         a->nz           = Agpu->nnz();
97e4a0ef16SKarl Rupp         a->maxnz        = a->nz; /* Since we allocate exactly the right amount */
98e4a0ef16SKarl Rupp         A->preallocated = PETSC_TRUE;
99e4a0ef16SKarl Rupp         if (a->singlemalloc) {
100e4a0ef16SKarl Rupp           if (a->a) {ierr = PetscFree3(a->a,a->j,a->i);CHKERRQ(ierr);}
101e4a0ef16SKarl Rupp         } else {
102e4a0ef16SKarl Rupp           if (a->i) {ierr = PetscFree(a->i);CHKERRQ(ierr);}
103e4a0ef16SKarl Rupp           if (a->j) {ierr = PetscFree(a->j);CHKERRQ(ierr);}
104e4a0ef16SKarl Rupp           if (a->a) {ierr = PetscFree(a->a);CHKERRQ(ierr);}
105e4a0ef16SKarl Rupp         }
106dcca6d9dSJed Brown         ierr = PetscMalloc3(a->nz,&a->a,a->nz,&a->j,m+1,&a->i);CHKERRQ(ierr);
107f7daeb2aSKarl Rupp         ierr = PetscLogObjectMemory((PetscObject)A, a->nz*(sizeof(PetscScalar)+sizeof(PetscInt))+(m+1)*sizeof(PetscInt));CHKERRQ(ierr);
108e4a0ef16SKarl Rupp 
109e4a0ef16SKarl Rupp         a->singlemalloc = PETSC_TRUE;
110e4a0ef16SKarl Rupp 
111e4a0ef16SKarl Rupp         /* Setup row lengths */
112e4a0ef16SKarl Rupp         if (a->imax) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
113dcca6d9dSJed Brown         ierr = PetscMalloc2(m,&a->imax,m,&a->ilen);CHKERRQ(ierr);
114f7daeb2aSKarl Rupp         ierr = PetscLogObjectMemory((PetscObject)A, 2*m*sizeof(PetscInt));CHKERRQ(ierr);
115e4a0ef16SKarl Rupp 
116e4a0ef16SKarl Rupp         /* Copy data back from GPU */
117e4a0ef16SKarl Rupp         viennacl::backend::typesafe_host_array<unsigned int> row_buffer; row_buffer.raw_resize(Agpu->handle1(), Agpu->size1() + 1);
118e4a0ef16SKarl Rupp 
119e4a0ef16SKarl Rupp         // copy row array
120e4a0ef16SKarl Rupp         viennacl::backend::memory_read(Agpu->handle1(), 0, row_buffer.raw_size(), row_buffer.get());
121e4a0ef16SKarl Rupp         (a->i)[0] = row_buffer[0];
122e4a0ef16SKarl Rupp         for (PetscInt i = 0; i < (PetscInt)Agpu->size1(); ++i) {
123e4a0ef16SKarl Rupp           (a->i)[i+1] = row_buffer[i+1];
124e4a0ef16SKarl Rupp           a->imax[i]  = a->ilen[i] = a->i[i+1] - a->i[i];  //Set imax[] and ilen[] arrays at the same time as i[] for better cache reuse
125e4a0ef16SKarl Rupp         }
126e4a0ef16SKarl Rupp 
127e4a0ef16SKarl Rupp         // copy column indices
128e4a0ef16SKarl Rupp         viennacl::backend::typesafe_host_array<unsigned int> col_buffer; col_buffer.raw_resize(Agpu->handle2(), Agpu->nnz());
129e4a0ef16SKarl Rupp         viennacl::backend::memory_read(Agpu->handle2(), 0, col_buffer.raw_size(), col_buffer.get());
130e4a0ef16SKarl Rupp         for (PetscInt i=0; i < (PetscInt)Agpu->nnz(); ++i)
131e4a0ef16SKarl Rupp           (a->j)[i] = col_buffer[i];
132e4a0ef16SKarl Rupp 
133e4a0ef16SKarl Rupp         // copy nonzero entries directly to destination (no conversion required)
134e4a0ef16SKarl Rupp         viennacl::backend::memory_read(Agpu->handle(), 0, sizeof(PetscScalar)*Agpu->nnz(), a->a);
135e4a0ef16SKarl Rupp 
1364cf1874eSKarl Rupp         ViennaCLWaitForGPU();
137023073b3SKarl Rupp         /* TODO: Once a->diag is moved out of MatAssemblyEnd(), invalidate it here. */
138e4a0ef16SKarl Rupp       }
1394076e183SKarl Rupp     } catch(std::exception const & ex) {
1404076e183SKarl Rupp       SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
141e4a0ef16SKarl Rupp     }
142e4a0ef16SKarl Rupp 
143e4a0ef16SKarl Rupp     /* This assembly prevents resetting the flag to PETSC_VIENNACL_CPU and recopying */
144e4a0ef16SKarl Rupp     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
145e4a0ef16SKarl Rupp     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
146e4a0ef16SKarl Rupp 
147e4a0ef16SKarl Rupp     A->valid_GPU_matrix = PETSC_VIENNACL_BOTH;
148e4a0ef16SKarl Rupp   } else {
149e4a0ef16SKarl Rupp     SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_ARG_WRONG, "ViennaCL error: Only valid for unallocated GPU matrices");
150e4a0ef16SKarl Rupp   }
151e4a0ef16SKarl Rupp   PetscFunctionReturn(0);
152e4a0ef16SKarl Rupp }
153e4a0ef16SKarl Rupp 
154e4a0ef16SKarl Rupp #undef __FUNCT__
155e4a0ef16SKarl Rupp #define __FUNCT__ "MatGetVecs_SeqAIJViennaCL"
156e4a0ef16SKarl Rupp PetscErrorCode MatGetVecs_SeqAIJViennaCL(Mat mat, Vec *right, Vec *left)
157e4a0ef16SKarl Rupp {
158e4a0ef16SKarl Rupp   PetscErrorCode ierr;
159*33d57670SJed Brown   PetscInt rbs,cbs;
160e4a0ef16SKarl Rupp 
161e4a0ef16SKarl Rupp   PetscFunctionBegin;
162*33d57670SJed Brown   ierr = MatGetBlockSizes(mat,&rbs,&cbs);CHKERRQ(ierr);
163e4a0ef16SKarl Rupp   if (right) {
164e4a0ef16SKarl Rupp     ierr = VecCreate(PetscObjectComm((PetscObject)mat),right);CHKERRQ(ierr);
165e4a0ef16SKarl Rupp     ierr = VecSetSizes(*right,mat->cmap->n,PETSC_DETERMINE);CHKERRQ(ierr);
166*33d57670SJed Brown     ierr = VecSetBlockSize(*right,cbs);CHKERRQ(ierr);
167e4a0ef16SKarl Rupp     ierr = VecSetType(*right,VECSEQVIENNACL);CHKERRQ(ierr);
168e4a0ef16SKarl Rupp     ierr = PetscLayoutReference(mat->cmap,&(*right)->map);CHKERRQ(ierr);
169e4a0ef16SKarl Rupp   }
170e4a0ef16SKarl Rupp   if (left) {
171e4a0ef16SKarl Rupp     ierr = VecCreate(PetscObjectComm((PetscObject)mat),left);CHKERRQ(ierr);
172e4a0ef16SKarl Rupp     ierr = VecSetSizes(*left,mat->rmap->n,PETSC_DETERMINE);CHKERRQ(ierr);
173*33d57670SJed Brown     ierr = VecSetBlockSize(*left,rbs);CHKERRQ(ierr);
174e4a0ef16SKarl Rupp     ierr = VecSetType(*left,VECSEQVIENNACL);CHKERRQ(ierr);
175e4a0ef16SKarl Rupp     ierr = PetscLayoutReference(mat->rmap,&(*left)->map);CHKERRQ(ierr);
176e4a0ef16SKarl Rupp   }
177e4a0ef16SKarl Rupp   PetscFunctionReturn(0);
178e4a0ef16SKarl Rupp }
179e4a0ef16SKarl Rupp 
180e4a0ef16SKarl Rupp #undef __FUNCT__
181e4a0ef16SKarl Rupp #define __FUNCT__ "MatMult_SeqAIJViennaCL"
182e4a0ef16SKarl Rupp PetscErrorCode MatMult_SeqAIJViennaCL(Mat A,Vec xx,Vec yy)
183e4a0ef16SKarl Rupp {
184e4a0ef16SKarl Rupp   Mat_SeqAIJ           *a = (Mat_SeqAIJ*)A->data;
185e4a0ef16SKarl Rupp   PetscErrorCode       ierr;
186e4a0ef16SKarl Rupp   Mat_SeqAIJViennaCL   *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr;
1870d73d530SKarl Rupp   const ViennaCLVector *xgpu=NULL;
1880d73d530SKarl Rupp   ViennaCLVector       *ygpu=NULL;
189e4a0ef16SKarl Rupp 
190e4a0ef16SKarl Rupp   PetscFunctionBegin;
19167c87b7fSKarl Rupp   if (A->rmap->n > 0 && A->cmap->n > 0) {
192e4a0ef16SKarl Rupp     ierr = VecViennaCLGetArrayRead(xx,&xgpu);CHKERRQ(ierr);
193e4a0ef16SKarl Rupp     ierr = VecViennaCLGetArrayWrite(yy,&ygpu);CHKERRQ(ierr);
194e4a0ef16SKarl Rupp     try {
195e4a0ef16SKarl Rupp       *ygpu = viennacl::linalg::prod(*viennaclstruct->mat,*xgpu);
1964cf1874eSKarl Rupp       ViennaCLWaitForGPU();
1974076e183SKarl Rupp     } catch (std::exception const & ex) {
1984076e183SKarl Rupp       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
199e4a0ef16SKarl Rupp     }
200e4a0ef16SKarl Rupp     ierr = VecViennaCLRestoreArrayRead(xx,&xgpu);CHKERRQ(ierr);
201e4a0ef16SKarl Rupp     ierr = VecViennaCLRestoreArrayWrite(yy,&ygpu);CHKERRQ(ierr);
202e4a0ef16SKarl Rupp     ierr = PetscLogFlops(2.0*a->nz - viennaclstruct->mat->nnz());CHKERRQ(ierr);
20367c87b7fSKarl Rupp   }
204e4a0ef16SKarl Rupp   PetscFunctionReturn(0);
205e4a0ef16SKarl Rupp }
206e4a0ef16SKarl Rupp 
207e4a0ef16SKarl Rupp 
208e4a0ef16SKarl Rupp 
209e4a0ef16SKarl Rupp #undef __FUNCT__
210e4a0ef16SKarl Rupp #define __FUNCT__ "MatMultAdd_SeqAIJViennaCL"
211e4a0ef16SKarl Rupp PetscErrorCode MatMultAdd_SeqAIJViennaCL(Mat A,Vec xx,Vec yy,Vec zz)
212e4a0ef16SKarl Rupp {
213e4a0ef16SKarl Rupp   Mat_SeqAIJ           *a = (Mat_SeqAIJ*)A->data;
214e4a0ef16SKarl Rupp   PetscErrorCode       ierr;
215e4a0ef16SKarl Rupp   Mat_SeqAIJViennaCL   *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr;
2160d73d530SKarl Rupp   const ViennaCLVector *xgpu=NULL,*ygpu=NULL;
2170d73d530SKarl Rupp   ViennaCLVector       *zgpu=NULL;
218e4a0ef16SKarl Rupp 
219e4a0ef16SKarl Rupp   PetscFunctionBegin;
22067c87b7fSKarl Rupp   if (A->rmap->n > 0 && A->cmap->n > 0) {
221e4a0ef16SKarl Rupp     try {
222e4a0ef16SKarl Rupp       ierr = VecViennaCLGetArrayRead(xx,&xgpu);CHKERRQ(ierr);
223e4a0ef16SKarl Rupp       ierr = VecViennaCLGetArrayRead(yy,&ygpu);CHKERRQ(ierr);
224e4a0ef16SKarl Rupp       ierr = VecViennaCLGetArrayWrite(zz,&zgpu);CHKERRQ(ierr);
225e4a0ef16SKarl Rupp 
226e4a0ef16SKarl Rupp       if (a->compressedrow.use) {
227e4a0ef16SKarl Rupp           SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: Compressed CSR (only nonzero rows) not yet supported");
228e4a0ef16SKarl Rupp       } else {
229e4a0ef16SKarl Rupp         if (zz == xx || zz == yy) { //temporary required
230e4a0ef16SKarl Rupp           ViennaCLVector temp = viennacl::linalg::prod(*viennaclstruct->mat, *xgpu);
231e4a0ef16SKarl Rupp           *zgpu = *ygpu + temp;
2324cf1874eSKarl Rupp           ViennaCLWaitForGPU();
233e4a0ef16SKarl Rupp         } else {
234e4a0ef16SKarl Rupp           *zgpu = viennacl::linalg::prod(*viennaclstruct->mat, *xgpu);
235e4a0ef16SKarl Rupp           *zgpu += *ygpu;
2364cf1874eSKarl Rupp           ViennaCLWaitForGPU();
237e4a0ef16SKarl Rupp         }
238e4a0ef16SKarl Rupp       }
239e4a0ef16SKarl Rupp 
240e4a0ef16SKarl Rupp       ierr = VecViennaCLRestoreArrayRead(xx,&xgpu);CHKERRQ(ierr);
241e4a0ef16SKarl Rupp       ierr = VecViennaCLRestoreArrayRead(yy,&ygpu);CHKERRQ(ierr);
242e4a0ef16SKarl Rupp       ierr = VecViennaCLRestoreArrayWrite(zz,&zgpu);CHKERRQ(ierr);
243e4a0ef16SKarl Rupp 
2444076e183SKarl Rupp     } catch(std::exception const & ex) {
2454076e183SKarl Rupp       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
246e4a0ef16SKarl Rupp     }
247e4a0ef16SKarl Rupp     ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
24867c87b7fSKarl Rupp   }
249e4a0ef16SKarl Rupp   PetscFunctionReturn(0);
250e4a0ef16SKarl Rupp }
251e4a0ef16SKarl Rupp 
252e4a0ef16SKarl Rupp #undef __FUNCT__
253e4a0ef16SKarl Rupp #define __FUNCT__ "MatAssemblyEnd_SeqAIJViennaCL"
254e4a0ef16SKarl Rupp PetscErrorCode MatAssemblyEnd_SeqAIJViennaCL(Mat A,MatAssemblyType mode)
255e4a0ef16SKarl Rupp {
256e4a0ef16SKarl Rupp   PetscErrorCode ierr;
257e4a0ef16SKarl Rupp 
258e4a0ef16SKarl Rupp   PetscFunctionBegin;
259e4a0ef16SKarl Rupp   ierr = MatAssemblyEnd_SeqAIJ(A,mode);CHKERRQ(ierr);
260e4a0ef16SKarl Rupp   ierr = MatViennaCLCopyToGPU(A);CHKERRQ(ierr);
261e4a0ef16SKarl Rupp   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
262e4a0ef16SKarl Rupp   A->ops->mult    = MatMult_SeqAIJViennaCL;
263e4a0ef16SKarl Rupp   A->ops->multadd = MatMultAdd_SeqAIJViennaCL;
264e4a0ef16SKarl Rupp   PetscFunctionReturn(0);
265e4a0ef16SKarl Rupp }
266e4a0ef16SKarl Rupp 
267e4a0ef16SKarl Rupp /* --------------------------------------------------------------------------------*/
268e4a0ef16SKarl Rupp #undef __FUNCT__
269e4a0ef16SKarl Rupp #define __FUNCT__ "MatCreateSeqAIJViennaCL"
270e4a0ef16SKarl Rupp /*@
271e4a0ef16SKarl Rupp    MatCreateSeqAIJViennaCL - Creates a sparse matrix in AIJ (compressed row) format
27219fddfadSKarl Rupp    (the default parallel PETSc format).  This matrix will ultimately be pushed down
273e4a0ef16SKarl Rupp    to GPUs and use the ViennaCL library for calculations. For good matrix
274e4a0ef16SKarl Rupp    assembly performance the user should preallocate the matrix storage by setting
275e4a0ef16SKarl Rupp    the parameter nz (or the array nnz).  By setting these parameters accurately,
276e4a0ef16SKarl Rupp    performance during matrix assembly can be increased substantially.
277e4a0ef16SKarl Rupp 
278e4a0ef16SKarl Rupp 
279e4a0ef16SKarl Rupp    Collective on MPI_Comm
280e4a0ef16SKarl Rupp 
281e4a0ef16SKarl Rupp    Input Parameters:
282e4a0ef16SKarl Rupp +  comm - MPI communicator, set to PETSC_COMM_SELF
283e4a0ef16SKarl Rupp .  m - number of rows
284e4a0ef16SKarl Rupp .  n - number of columns
285e4a0ef16SKarl Rupp .  nz - number of nonzeros per row (same for all rows)
286e4a0ef16SKarl Rupp -  nnz - array containing the number of nonzeros in the various rows
287e4a0ef16SKarl Rupp          (possibly different for each row) or NULL
288e4a0ef16SKarl Rupp 
289e4a0ef16SKarl Rupp    Output Parameter:
290e4a0ef16SKarl Rupp .  A - the matrix
291e4a0ef16SKarl Rupp 
292e4a0ef16SKarl Rupp    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
293e4a0ef16SKarl Rupp    MatXXXXSetPreallocation() paradigm instead of this routine directly.
294e4a0ef16SKarl Rupp    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
295e4a0ef16SKarl Rupp 
296e4a0ef16SKarl Rupp    Notes:
297e4a0ef16SKarl Rupp    If nnz is given then nz is ignored
298e4a0ef16SKarl Rupp 
299e4a0ef16SKarl Rupp    The AIJ format (also called the Yale sparse matrix format or
300e4a0ef16SKarl Rupp    compressed row storage), is fully compatible with standard Fortran 77
301e4a0ef16SKarl Rupp    storage.  That is, the stored row and column indices can begin at
302e4a0ef16SKarl Rupp    either one (as in Fortran) or zero.  See the users' manual for details.
303e4a0ef16SKarl Rupp 
304e4a0ef16SKarl Rupp    Specify the preallocated storage with either nz or nnz (not both).
305e4a0ef16SKarl Rupp    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
306e4a0ef16SKarl Rupp    allocation.  For large problems you MUST preallocate memory or you
307e4a0ef16SKarl Rupp    will get TERRIBLE performance, see the users' manual chapter on matrices.
308e4a0ef16SKarl Rupp 
309e4a0ef16SKarl Rupp    Level: intermediate
310e4a0ef16SKarl Rupp 
311e4a0ef16SKarl Rupp .seealso: MatCreate(), MatCreateAIJ(), MatCreateAIJCUSP(), MatSetValues(), MatSeqAIJSetColumnIndices(), MatCreateSeqAIJWithArrays(), MatCreateAIJ()
312e4a0ef16SKarl Rupp 
313e4a0ef16SKarl Rupp @*/
314e4a0ef16SKarl Rupp PetscErrorCode  MatCreateSeqAIJViennaCL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
315e4a0ef16SKarl Rupp {
316e4a0ef16SKarl Rupp   PetscErrorCode ierr;
317e4a0ef16SKarl Rupp 
318e4a0ef16SKarl Rupp   PetscFunctionBegin;
319e4a0ef16SKarl Rupp   ierr = MatCreate(comm,A);CHKERRQ(ierr);
320e4a0ef16SKarl Rupp   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
321e4a0ef16SKarl Rupp   ierr = MatSetType(*A,MATSEQAIJVIENNACL);CHKERRQ(ierr);
322e4a0ef16SKarl Rupp   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,(PetscInt*)nnz);CHKERRQ(ierr);
323e4a0ef16SKarl Rupp   PetscFunctionReturn(0);
324e4a0ef16SKarl Rupp }
325e4a0ef16SKarl Rupp 
326e4a0ef16SKarl Rupp 
327e4a0ef16SKarl Rupp #undef __FUNCT__
328e4a0ef16SKarl Rupp #define __FUNCT__ "MatDestroy_SeqAIJViennaCL"
329e4a0ef16SKarl Rupp PetscErrorCode MatDestroy_SeqAIJViennaCL(Mat A)
330e4a0ef16SKarl Rupp {
331e4a0ef16SKarl Rupp   PetscErrorCode ierr;
332e4a0ef16SKarl Rupp   Mat_SeqAIJViennaCL *viennaclcontainer = (Mat_SeqAIJViennaCL*)A->spptr;
333e4a0ef16SKarl Rupp 
334e4a0ef16SKarl Rupp   PetscFunctionBegin;
335e4a0ef16SKarl Rupp   try {
336e4a0ef16SKarl Rupp     if (A->valid_GPU_matrix != PETSC_VIENNACL_UNALLOCATED) delete (ViennaCLAIJMatrix*)(viennaclcontainer->mat);
337e4a0ef16SKarl Rupp     delete viennaclcontainer;
338e4a0ef16SKarl Rupp     A->valid_GPU_matrix = PETSC_VIENNACL_UNALLOCATED;
3394076e183SKarl Rupp   } catch(std::exception const & ex) {
3404076e183SKarl Rupp     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
341e4a0ef16SKarl Rupp   }
342e4a0ef16SKarl Rupp   /* this next line is because MatDestroy tries to PetscFree spptr if it is not zero, and PetscFree only works if the memory was allocated with PetscNew or PetscMalloc, which don't call the constructor */
343e4a0ef16SKarl Rupp   A->spptr = 0;
344e4a0ef16SKarl Rupp   ierr     = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
345e4a0ef16SKarl Rupp   PetscFunctionReturn(0);
346e4a0ef16SKarl Rupp }
347e4a0ef16SKarl Rupp 
348e4a0ef16SKarl Rupp 
349e4a0ef16SKarl Rupp #undef __FUNCT__
350e4a0ef16SKarl Rupp #define __FUNCT__ "MatCreate_SeqAIJViennaCL"
351e4a0ef16SKarl Rupp PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJViennaCL(Mat B)
352e4a0ef16SKarl Rupp {
353e4a0ef16SKarl Rupp   PetscErrorCode ierr;
354e4a0ef16SKarl Rupp   Mat_SeqAIJ     *aij;
355e4a0ef16SKarl Rupp 
356e4a0ef16SKarl Rupp   PetscFunctionBegin;
357e4a0ef16SKarl Rupp   ierr            = MatCreate_SeqAIJ(B);CHKERRQ(ierr);
358e4a0ef16SKarl Rupp   aij             = (Mat_SeqAIJ*)B->data;
359e4a0ef16SKarl Rupp   aij->inode.use  = PETSC_FALSE;
360e4a0ef16SKarl Rupp   B->ops->mult    = MatMult_SeqAIJViennaCL;
361e4a0ef16SKarl Rupp   B->ops->multadd = MatMultAdd_SeqAIJViennaCL;
362e4a0ef16SKarl Rupp   B->spptr        = new Mat_SeqAIJViennaCL();
363e4a0ef16SKarl Rupp 
364e4a0ef16SKarl Rupp   ((Mat_SeqAIJViennaCL*)B->spptr)->mat = 0;
365e4a0ef16SKarl Rupp 
366e4a0ef16SKarl Rupp   B->ops->assemblyend    = MatAssemblyEnd_SeqAIJViennaCL;
367e4a0ef16SKarl Rupp   B->ops->destroy        = MatDestroy_SeqAIJViennaCL;
368e4a0ef16SKarl Rupp   B->ops->getvecs        = MatGetVecs_SeqAIJViennaCL;
369e4a0ef16SKarl Rupp 
370e4a0ef16SKarl Rupp   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJVIENNACL);CHKERRQ(ierr);
371e4a0ef16SKarl Rupp 
372e4a0ef16SKarl Rupp   B->valid_GPU_matrix = PETSC_VIENNACL_UNALLOCATED;
373e4a0ef16SKarl Rupp   PetscFunctionReturn(0);
374e4a0ef16SKarl Rupp }
375e4a0ef16SKarl Rupp 
376e4a0ef16SKarl Rupp 
377e4a0ef16SKarl Rupp /*M
378e4a0ef16SKarl Rupp    MATSEQAIJVIENNACL - MATAIJVIENNACL = "aijviennacl" = "seqaijviennacl" - A matrix type to be used for sparse matrices.
379e4a0ef16SKarl Rupp 
380e4a0ef16SKarl Rupp    A matrix type type whose data resides on GPUs. These matrices are in CSR format by
381e4a0ef16SKarl Rupp    default. All matrix calculations are performed using the ViennaCL library.
382e4a0ef16SKarl Rupp 
383e4a0ef16SKarl Rupp    Options Database Keys:
384e4a0ef16SKarl Rupp +  -mat_type aijviennacl - sets the matrix type to "seqaijviennacl" during a call to MatSetFromOptions()
385e4a0ef16SKarl Rupp .  -mat_viennacl_storage_format csr - sets the storage format of matrices for MatMult during a call to MatSetFromOptions().
386e4a0ef16SKarl Rupp -  -mat_viennacl_mult_storage_format csr - sets the storage format of matrices for MatMult during a call to MatSetFromOptions().
387e4a0ef16SKarl Rupp 
388e4a0ef16SKarl Rupp   Level: beginner
389e4a0ef16SKarl Rupp 
390e4a0ef16SKarl Rupp .seealso: MatCreateSeqAIJViennaCL(), MATAIJVIENNACL, MatCreateAIJViennaCL()
391e4a0ef16SKarl Rupp M*/
392e4a0ef16SKarl Rupp 
393