xref: /petsc/src/mat/impls/aij/seq/seqviennacl/aijviennacl.cxx (revision 071fcb05f2a6aea8aef2c2090580530b9e9df77e)
1e4a0ef16SKarl Rupp 
2e4a0ef16SKarl Rupp 
3e4a0ef16SKarl Rupp /*
4e4a0ef16SKarl Rupp     Defines the basic matrix operations for the AIJ (compressed row)
5e4a0ef16SKarl Rupp   matrix storage format.
6e4a0ef16SKarl Rupp */
7e4a0ef16SKarl Rupp 
8aaa7dc30SBarry Smith #include <petscconf.h>
9aaa7dc30SBarry Smith #include <../src/mat/impls/aij/seq/aij.h>          /*I "petscmat.h" I*/
10aaa7dc30SBarry Smith #include <petscbt.h>
11aaa7dc30SBarry Smith #include <../src/vec/vec/impls/dvecimpl.h>
12af0996ceSBarry Smith #include <petsc/private/vecimpl.h>
13e4a0ef16SKarl Rupp 
14aaa7dc30SBarry Smith #include <../src/mat/impls/aij/seq/seqviennacl/viennaclmatimpl.h>
15e4a0ef16SKarl Rupp 
16e4a0ef16SKarl Rupp 
17e4a0ef16SKarl Rupp #include <algorithm>
18e4a0ef16SKarl Rupp #include <vector>
19e4a0ef16SKarl Rupp #include <string>
20e4a0ef16SKarl Rupp 
21e4a0ef16SKarl Rupp #include "viennacl/linalg/prod.hpp"
22e4a0ef16SKarl Rupp 
238713a8baSPatrick Sanan PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJViennaCL(Mat A, MatType type, MatReuse reuse, Mat *newmat);
2472367587SKarl Rupp PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat,MatFactorType,Mat*);
2572367587SKarl Rupp 
268713a8baSPatrick Sanan 
27e4a0ef16SKarl Rupp PetscErrorCode MatViennaCLCopyToGPU(Mat A)
28e4a0ef16SKarl Rupp {
29e4a0ef16SKarl Rupp 
30e4a0ef16SKarl Rupp   Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr;
31e4a0ef16SKarl Rupp   Mat_SeqAIJ         *a              = (Mat_SeqAIJ*)A->data;
32e4a0ef16SKarl Rupp   PetscErrorCode     ierr;
33e4a0ef16SKarl Rupp 
34e4a0ef16SKarl Rupp   PetscFunctionBegin;
35bf1781e8SStefano Zampini   if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) { //some OpenCL SDKs have issues with buffers of size 0
36b8ced49eSKarl Rupp     if (A->valid_GPU_matrix == PETSC_OFFLOAD_UNALLOCATED || A->valid_GPU_matrix == PETSC_OFFLOAD_CPU) {
37e4a0ef16SKarl Rupp       ierr = PetscLogEventBegin(MAT_ViennaCLCopyToGPU,A,0,0,0);CHKERRQ(ierr);
38e4a0ef16SKarl Rupp 
39e4a0ef16SKarl Rupp       try {
40e4a0ef16SKarl Rupp         if (a->compressedrow.use) {
41a3430c56SKarl Rupp           if (!viennaclstruct->compressed_mat) viennaclstruct->compressed_mat = new ViennaCLCompressedAIJMatrix();
42e4a0ef16SKarl Rupp 
43a3430c56SKarl Rupp           // Since PetscInt is different from cl_uint, we have to convert:
44a3430c56SKarl Rupp           viennacl::backend::mem_handle dummy;
45e4a0ef16SKarl Rupp 
46a3430c56SKarl Rupp           viennacl::backend::typesafe_host_array<unsigned int> row_buffer; row_buffer.raw_resize(dummy, a->compressedrow.nrows+1);
47a3430c56SKarl Rupp           for (PetscInt i=0; i<=a->compressedrow.nrows; ++i)
48a3430c56SKarl Rupp             row_buffer.set(i, (a->compressedrow.i)[i]);
49e4a0ef16SKarl Rupp 
50a3430c56SKarl Rupp           viennacl::backend::typesafe_host_array<unsigned int> row_indices; row_indices.raw_resize(dummy, a->compressedrow.nrows);
51a3430c56SKarl Rupp           for (PetscInt i=0; i<a->compressedrow.nrows; ++i)
52a3430c56SKarl Rupp             row_indices.set(i, (a->compressedrow.rindex)[i]);
53a3430c56SKarl Rupp 
54a3430c56SKarl Rupp           viennacl::backend::typesafe_host_array<unsigned int> col_buffer; col_buffer.raw_resize(dummy, a->nz);
55a3430c56SKarl Rupp           for (PetscInt i=0; i<a->nz; ++i)
56a3430c56SKarl Rupp             col_buffer.set(i, (a->j)[i]);
57a3430c56SKarl Rupp 
58a3430c56SKarl Rupp           viennaclstruct->compressed_mat->set(row_buffer.get(), row_indices.get(), col_buffer.get(), a->a, A->rmap->n, A->cmap->n, a->compressedrow.nrows, a->nz);
59e4a0ef16SKarl Rupp         } else {
60a3430c56SKarl Rupp           if (!viennaclstruct->mat) viennaclstruct->mat = new ViennaCLAIJMatrix();
61e4a0ef16SKarl Rupp 
62e4a0ef16SKarl Rupp           // Since PetscInt is in general different from cl_uint, we have to convert:
63e4a0ef16SKarl Rupp           viennacl::backend::mem_handle dummy;
64e4a0ef16SKarl Rupp 
65e4a0ef16SKarl Rupp           viennacl::backend::typesafe_host_array<unsigned int> row_buffer; row_buffer.raw_resize(dummy, A->rmap->n+1);
66e4a0ef16SKarl Rupp           for (PetscInt i=0; i<=A->rmap->n; ++i)
67e4a0ef16SKarl Rupp             row_buffer.set(i, (a->i)[i]);
68e4a0ef16SKarl Rupp 
69e4a0ef16SKarl Rupp           viennacl::backend::typesafe_host_array<unsigned int> col_buffer; col_buffer.raw_resize(dummy, a->nz);
70e4a0ef16SKarl Rupp           for (PetscInt i=0; i<a->nz; ++i)
71e4a0ef16SKarl Rupp             col_buffer.set(i, (a->j)[i]);
72e4a0ef16SKarl Rupp 
73e4a0ef16SKarl Rupp           viennaclstruct->mat->set(row_buffer.get(), col_buffer.get(), a->a, A->rmap->n, A->cmap->n, a->nz);
74e4a0ef16SKarl Rupp         }
754cf1874eSKarl Rupp         ViennaCLWaitForGPU();
764076e183SKarl Rupp       } catch(std::exception const & ex) {
774076e183SKarl Rupp         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
78e4a0ef16SKarl Rupp       }
79e4a0ef16SKarl Rupp 
80a3430c56SKarl Rupp       // Create temporary vector for v += A*x:
81a3430c56SKarl Rupp       if (viennaclstruct->tempvec) {
829b66742cSDave May         if (viennaclstruct->tempvec->size() != static_cast<std::size_t>(A->rmap->n)) {
83a3430c56SKarl Rupp           delete (ViennaCLVector*)viennaclstruct->tempvec;
849b66742cSDave May           viennaclstruct->tempvec = new ViennaCLVector(A->rmap->n);
85a3430c56SKarl Rupp         } else {
86a3430c56SKarl Rupp           viennaclstruct->tempvec->clear();
87a3430c56SKarl Rupp         }
88a3430c56SKarl Rupp       } else {
899b66742cSDave May         viennaclstruct->tempvec = new ViennaCLVector(A->rmap->n);
90a3430c56SKarl Rupp       }
91a3430c56SKarl Rupp 
92b8ced49eSKarl Rupp       A->valid_GPU_matrix = PETSC_OFFLOAD_BOTH;
93e4a0ef16SKarl Rupp 
94e4a0ef16SKarl Rupp       ierr = PetscLogEventEnd(MAT_ViennaCLCopyToGPU,A,0,0,0);CHKERRQ(ierr);
95e4a0ef16SKarl Rupp     }
9667c87b7fSKarl Rupp   }
97e4a0ef16SKarl Rupp   PetscFunctionReturn(0);
98e4a0ef16SKarl Rupp }
99e4a0ef16SKarl Rupp 
1000d73d530SKarl Rupp PetscErrorCode MatViennaCLCopyFromGPU(Mat A, const ViennaCLAIJMatrix *Agpu)
101e4a0ef16SKarl Rupp {
102e4a0ef16SKarl Rupp   Mat_SeqAIJ         *a              = (Mat_SeqAIJ*)A->data;
103e4a0ef16SKarl Rupp   PetscInt           m               = A->rmap->n;
104e4a0ef16SKarl Rupp   PetscErrorCode     ierr;
105e4a0ef16SKarl Rupp 
106e4a0ef16SKarl Rupp 
107e4a0ef16SKarl Rupp   PetscFunctionBegin;
108b8ced49eSKarl Rupp   if (A->valid_GPU_matrix == PETSC_OFFLOAD_UNALLOCATED) {
109e4a0ef16SKarl Rupp     try {
1106c4ed002SBarry Smith       if (a->compressedrow.use) SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_ARG_WRONG, "ViennaCL: Cannot handle row compression for GPU matrices");
1116c4ed002SBarry Smith       else {
112e4a0ef16SKarl Rupp 
113e4a0ef16SKarl Rupp         if ((PetscInt)Agpu->size1() != m) SETERRQ2(PETSC_COMM_WORLD, PETSC_ERR_ARG_SIZ, "GPU matrix has %d rows, should be %d", Agpu->size1(), m);
114e4a0ef16SKarl Rupp         a->nz           = Agpu->nnz();
115e4a0ef16SKarl Rupp         a->maxnz        = a->nz; /* Since we allocate exactly the right amount */
116e4a0ef16SKarl Rupp         A->preallocated = PETSC_TRUE;
117e4a0ef16SKarl Rupp         if (a->singlemalloc) {
118e4a0ef16SKarl Rupp           if (a->a) {ierr = PetscFree3(a->a,a->j,a->i);CHKERRQ(ierr);}
119e4a0ef16SKarl Rupp         } else {
120e4a0ef16SKarl Rupp           if (a->i) {ierr = PetscFree(a->i);CHKERRQ(ierr);}
121e4a0ef16SKarl Rupp           if (a->j) {ierr = PetscFree(a->j);CHKERRQ(ierr);}
122e4a0ef16SKarl Rupp           if (a->a) {ierr = PetscFree(a->a);CHKERRQ(ierr);}
123e4a0ef16SKarl Rupp         }
124dcca6d9dSJed Brown         ierr = PetscMalloc3(a->nz,&a->a,a->nz,&a->j,m+1,&a->i);CHKERRQ(ierr);
125f7daeb2aSKarl Rupp         ierr = PetscLogObjectMemory((PetscObject)A, a->nz*(sizeof(PetscScalar)+sizeof(PetscInt))+(m+1)*sizeof(PetscInt));CHKERRQ(ierr);
126e4a0ef16SKarl Rupp 
127e4a0ef16SKarl Rupp         a->singlemalloc = PETSC_TRUE;
128e4a0ef16SKarl Rupp 
129e4a0ef16SKarl Rupp         /* Setup row lengths */
130*071fcb05SBarry Smith         ierr = PetscFree(a->imax);CHKERRQ(ierr);
131*071fcb05SBarry Smith         ierr = PetscFree(a->ilen);CHKERRQ(ierr);
132*071fcb05SBarry Smith         ierr = PetscMalloc1(m,&a->imax);CHKERRQ(ierr);
133*071fcb05SBarry Smith         ierr = PetscMalloc1(m,&a->ilen);CHKERRQ(ierr);
134f7daeb2aSKarl Rupp         ierr = PetscLogObjectMemory((PetscObject)A, 2*m*sizeof(PetscInt));CHKERRQ(ierr);
135e4a0ef16SKarl Rupp 
136e4a0ef16SKarl Rupp         /* Copy data back from GPU */
137e4a0ef16SKarl Rupp         viennacl::backend::typesafe_host_array<unsigned int> row_buffer; row_buffer.raw_resize(Agpu->handle1(), Agpu->size1() + 1);
138e4a0ef16SKarl Rupp 
139e4a0ef16SKarl Rupp         // copy row array
140e4a0ef16SKarl Rupp         viennacl::backend::memory_read(Agpu->handle1(), 0, row_buffer.raw_size(), row_buffer.get());
141e4a0ef16SKarl Rupp         (a->i)[0] = row_buffer[0];
142e4a0ef16SKarl Rupp         for (PetscInt i = 0; i < (PetscInt)Agpu->size1(); ++i) {
143e4a0ef16SKarl Rupp           (a->i)[i+1] = row_buffer[i+1];
144e4a0ef16SKarl Rupp           a->imax[i]  = a->ilen[i] = a->i[i+1] - a->i[i];  //Set imax[] and ilen[] arrays at the same time as i[] for better cache reuse
145e4a0ef16SKarl Rupp         }
146e4a0ef16SKarl Rupp 
147e4a0ef16SKarl Rupp         // copy column indices
148e4a0ef16SKarl Rupp         viennacl::backend::typesafe_host_array<unsigned int> col_buffer; col_buffer.raw_resize(Agpu->handle2(), Agpu->nnz());
149e4a0ef16SKarl Rupp         viennacl::backend::memory_read(Agpu->handle2(), 0, col_buffer.raw_size(), col_buffer.get());
150e4a0ef16SKarl Rupp         for (PetscInt i=0; i < (PetscInt)Agpu->nnz(); ++i)
151e4a0ef16SKarl Rupp           (a->j)[i] = col_buffer[i];
152e4a0ef16SKarl Rupp 
153e4a0ef16SKarl Rupp         // copy nonzero entries directly to destination (no conversion required)
154e4a0ef16SKarl Rupp         viennacl::backend::memory_read(Agpu->handle(), 0, sizeof(PetscScalar)*Agpu->nnz(), a->a);
155e4a0ef16SKarl Rupp 
1564cf1874eSKarl Rupp         ViennaCLWaitForGPU();
157023073b3SKarl Rupp         /* TODO: Once a->diag is moved out of MatAssemblyEnd(), invalidate it here. */
158e4a0ef16SKarl Rupp       }
1594076e183SKarl Rupp     } catch(std::exception const & ex) {
1604076e183SKarl Rupp       SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
161e4a0ef16SKarl Rupp     }
162e4a0ef16SKarl Rupp 
163b8ced49eSKarl Rupp     /* This assembly prevents resetting the flag to PETSC_OFFLOAD_CPU and recopying */
164e4a0ef16SKarl Rupp     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
165e4a0ef16SKarl Rupp     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
166e4a0ef16SKarl Rupp 
167b8ced49eSKarl Rupp     A->valid_GPU_matrix = PETSC_OFFLOAD_BOTH;
1686c4ed002SBarry Smith   } else SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_ARG_WRONG, "ViennaCL error: Only valid for unallocated GPU matrices");
169e4a0ef16SKarl Rupp   PetscFunctionReturn(0);
170e4a0ef16SKarl Rupp }
171e4a0ef16SKarl Rupp 
172e4a0ef16SKarl Rupp PetscErrorCode MatMult_SeqAIJViennaCL(Mat A,Vec xx,Vec yy)
173e4a0ef16SKarl Rupp {
174e4a0ef16SKarl Rupp   Mat_SeqAIJ           *a = (Mat_SeqAIJ*)A->data;
175e4a0ef16SKarl Rupp   PetscErrorCode       ierr;
176e4a0ef16SKarl Rupp   Mat_SeqAIJViennaCL   *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr;
1770d73d530SKarl Rupp   const ViennaCLVector *xgpu=NULL;
1780d73d530SKarl Rupp   ViennaCLVector       *ygpu=NULL;
179e4a0ef16SKarl Rupp 
180e4a0ef16SKarl Rupp   PetscFunctionBegin;
181bf1781e8SStefano Zampini   if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) {
182e4a0ef16SKarl Rupp     ierr = VecViennaCLGetArrayRead(xx,&xgpu);CHKERRQ(ierr);
183e4a0ef16SKarl Rupp     ierr = VecViennaCLGetArrayWrite(yy,&ygpu);CHKERRQ(ierr);
184e4a0ef16SKarl Rupp     try {
185b7832b47SStefano Zampini       if (a->compressedrow.use) {
186b7832b47SStefano Zampini         *ygpu = viennacl::linalg::prod(*viennaclstruct->compressed_mat, *xgpu);
187b7832b47SStefano Zampini       } else {
188e4a0ef16SKarl Rupp         *ygpu = viennacl::linalg::prod(*viennaclstruct->mat,*xgpu);
189b7832b47SStefano Zampini       }
1904cf1874eSKarl Rupp       ViennaCLWaitForGPU();
1914076e183SKarl Rupp     } catch (std::exception const & ex) {
1924076e183SKarl Rupp       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
193e4a0ef16SKarl Rupp     }
194e4a0ef16SKarl Rupp     ierr = VecViennaCLRestoreArrayRead(xx,&xgpu);CHKERRQ(ierr);
195e4a0ef16SKarl Rupp     ierr = VecViennaCLRestoreArrayWrite(yy,&ygpu);CHKERRQ(ierr);
1969b66742cSDave May     ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
197bf1781e8SStefano Zampini   } else {
198bf1781e8SStefano Zampini     ierr = VecSet(yy,0);CHKERRQ(ierr);
19967c87b7fSKarl Rupp   }
200e4a0ef16SKarl Rupp   PetscFunctionReturn(0);
201e4a0ef16SKarl Rupp }
202e4a0ef16SKarl Rupp 
203e4a0ef16SKarl Rupp PetscErrorCode MatMultAdd_SeqAIJViennaCL(Mat A,Vec xx,Vec yy,Vec zz)
204e4a0ef16SKarl Rupp {
205e4a0ef16SKarl Rupp   Mat_SeqAIJ           *a = (Mat_SeqAIJ*)A->data;
206e4a0ef16SKarl Rupp   PetscErrorCode       ierr;
207e4a0ef16SKarl Rupp   Mat_SeqAIJViennaCL   *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr;
2080d73d530SKarl Rupp   const ViennaCLVector *xgpu=NULL,*ygpu=NULL;
2090d73d530SKarl Rupp   ViennaCLVector       *zgpu=NULL;
210e4a0ef16SKarl Rupp 
211e4a0ef16SKarl Rupp   PetscFunctionBegin;
212bf1781e8SStefano Zampini   if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) {
213e4a0ef16SKarl Rupp     try {
214e4a0ef16SKarl Rupp       ierr = VecViennaCLGetArrayRead(xx,&xgpu);CHKERRQ(ierr);
215e4a0ef16SKarl Rupp       ierr = VecViennaCLGetArrayRead(yy,&ygpu);CHKERRQ(ierr);
216e4a0ef16SKarl Rupp       ierr = VecViennaCLGetArrayWrite(zz,&zgpu);CHKERRQ(ierr);
217e4a0ef16SKarl Rupp 
218e4a0ef16SKarl Rupp       if (a->compressedrow.use) {
219a3430c56SKarl Rupp         ViennaCLVector temp = viennacl::linalg::prod(*viennaclstruct->compressed_mat, *xgpu);
220e4a0ef16SKarl Rupp         *zgpu = *ygpu + temp;
2214cf1874eSKarl Rupp         ViennaCLWaitForGPU();
222e4a0ef16SKarl Rupp       } else {
223a3430c56SKarl Rupp         if (zz == xx || zz == yy) { //temporary required
224a3430c56SKarl Rupp           ViennaCLVector temp = viennacl::linalg::prod(*viennaclstruct->mat, *xgpu);
225a3430c56SKarl Rupp           *zgpu = *ygpu;
226a3430c56SKarl Rupp           *zgpu += temp;
227a3430c56SKarl Rupp           ViennaCLWaitForGPU();
228a3430c56SKarl Rupp         } else {
229a3430c56SKarl Rupp           *viennaclstruct->tempvec = viennacl::linalg::prod(*viennaclstruct->mat, *xgpu);
230a3430c56SKarl Rupp           *zgpu = *ygpu + *viennaclstruct->tempvec;
2314cf1874eSKarl Rupp           ViennaCLWaitForGPU();
232e4a0ef16SKarl Rupp         }
233e4a0ef16SKarl Rupp       }
234e4a0ef16SKarl Rupp 
235e4a0ef16SKarl Rupp       ierr = VecViennaCLRestoreArrayRead(xx,&xgpu);CHKERRQ(ierr);
236e4a0ef16SKarl Rupp       ierr = VecViennaCLRestoreArrayRead(yy,&ygpu);CHKERRQ(ierr);
237e4a0ef16SKarl Rupp       ierr = VecViennaCLRestoreArrayWrite(zz,&zgpu);CHKERRQ(ierr);
238e4a0ef16SKarl Rupp 
2394076e183SKarl Rupp     } catch(std::exception const & ex) {
2404076e183SKarl Rupp       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
241e4a0ef16SKarl Rupp     }
242e4a0ef16SKarl Rupp     ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
243bf1781e8SStefano Zampini   } else {
244bf1781e8SStefano Zampini     ierr = VecCopy(yy,zz);CHKERRQ(ierr);
24567c87b7fSKarl Rupp   }
246e4a0ef16SKarl Rupp   PetscFunctionReturn(0);
247e4a0ef16SKarl Rupp }
248e4a0ef16SKarl Rupp 
249e4a0ef16SKarl Rupp PetscErrorCode MatAssemblyEnd_SeqAIJViennaCL(Mat A,MatAssemblyType mode)
250e4a0ef16SKarl Rupp {
251e4a0ef16SKarl Rupp   PetscErrorCode ierr;
252e4a0ef16SKarl Rupp 
253e4a0ef16SKarl Rupp   PetscFunctionBegin;
254e4a0ef16SKarl Rupp   ierr = MatAssemblyEnd_SeqAIJ(A,mode);CHKERRQ(ierr);
255e7e92044SBarry Smith   if (!A->pinnedtocpu) {
256e4a0ef16SKarl Rupp     ierr = MatViennaCLCopyToGPU(A);CHKERRQ(ierr);
257e7e92044SBarry Smith   }
258e4a0ef16SKarl Rupp   PetscFunctionReturn(0);
259e4a0ef16SKarl Rupp }
260e4a0ef16SKarl Rupp 
261e4a0ef16SKarl Rupp /* --------------------------------------------------------------------------------*/
262e4a0ef16SKarl Rupp /*@
263e4a0ef16SKarl Rupp    MatCreateSeqAIJViennaCL - Creates a sparse matrix in AIJ (compressed row) format
26419fddfadSKarl Rupp    (the default parallel PETSc format).  This matrix will ultimately be pushed down
265e4a0ef16SKarl Rupp    to GPUs and use the ViennaCL library for calculations. For good matrix
266e4a0ef16SKarl Rupp    assembly performance the user should preallocate the matrix storage by setting
267e4a0ef16SKarl Rupp    the parameter nz (or the array nnz).  By setting these parameters accurately,
268e4a0ef16SKarl Rupp    performance during matrix assembly can be increased substantially.
269e4a0ef16SKarl Rupp 
270e4a0ef16SKarl Rupp 
271d083f849SBarry Smith    Collective
272e4a0ef16SKarl Rupp 
273e4a0ef16SKarl Rupp    Input Parameters:
274e4a0ef16SKarl Rupp +  comm - MPI communicator, set to PETSC_COMM_SELF
275e4a0ef16SKarl Rupp .  m - number of rows
276e4a0ef16SKarl Rupp .  n - number of columns
277e4a0ef16SKarl Rupp .  nz - number of nonzeros per row (same for all rows)
278e4a0ef16SKarl Rupp -  nnz - array containing the number of nonzeros in the various rows
279e4a0ef16SKarl Rupp          (possibly different for each row) or NULL
280e4a0ef16SKarl Rupp 
281e4a0ef16SKarl Rupp    Output Parameter:
282e4a0ef16SKarl Rupp .  A - the matrix
283e4a0ef16SKarl Rupp 
284e4a0ef16SKarl Rupp    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
285e4a0ef16SKarl Rupp    MatXXXXSetPreallocation() paradigm instead of this routine directly.
286e4a0ef16SKarl Rupp    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
287e4a0ef16SKarl Rupp 
288e4a0ef16SKarl Rupp    Notes:
289e4a0ef16SKarl Rupp    If nnz is given then nz is ignored
290e4a0ef16SKarl Rupp 
291e4a0ef16SKarl Rupp    The AIJ format (also called the Yale sparse matrix format or
292e4a0ef16SKarl Rupp    compressed row storage), is fully compatible with standard Fortran 77
293e4a0ef16SKarl Rupp    storage.  That is, the stored row and column indices can begin at
294e4a0ef16SKarl Rupp    either one (as in Fortran) or zero.  See the users' manual for details.
295e4a0ef16SKarl Rupp 
296e4a0ef16SKarl Rupp    Specify the preallocated storage with either nz or nnz (not both).
297e4a0ef16SKarl Rupp    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
298e4a0ef16SKarl Rupp    allocation.  For large problems you MUST preallocate memory or you
299e4a0ef16SKarl Rupp    will get TERRIBLE performance, see the users' manual chapter on matrices.
300e4a0ef16SKarl Rupp 
301e4a0ef16SKarl Rupp    Level: intermediate
302e4a0ef16SKarl Rupp 
303e9e886b6SKarl Rupp .seealso: MatCreate(), MatCreateAIJ(), MatCreateAIJCUSPARSE(), MatSetValues(), MatSeqAIJSetColumnIndices(), MatCreateSeqAIJWithArrays(), MatCreateAIJ()
304e4a0ef16SKarl Rupp 
305e4a0ef16SKarl Rupp @*/
306e4a0ef16SKarl Rupp PetscErrorCode  MatCreateSeqAIJViennaCL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
307e4a0ef16SKarl Rupp {
308e4a0ef16SKarl Rupp   PetscErrorCode ierr;
309e4a0ef16SKarl Rupp 
310e4a0ef16SKarl Rupp   PetscFunctionBegin;
311e4a0ef16SKarl Rupp   ierr = MatCreate(comm,A);CHKERRQ(ierr);
312e4a0ef16SKarl Rupp   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
313e4a0ef16SKarl Rupp   ierr = MatSetType(*A,MATSEQAIJVIENNACL);CHKERRQ(ierr);
314e4a0ef16SKarl Rupp   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,(PetscInt*)nnz);CHKERRQ(ierr);
315e4a0ef16SKarl Rupp   PetscFunctionReturn(0);
316e4a0ef16SKarl Rupp }
317e4a0ef16SKarl Rupp 
318e4a0ef16SKarl Rupp 
319e4a0ef16SKarl Rupp PetscErrorCode MatDestroy_SeqAIJViennaCL(Mat A)
320e4a0ef16SKarl Rupp {
321e4a0ef16SKarl Rupp   PetscErrorCode ierr;
322e4a0ef16SKarl Rupp   Mat_SeqAIJViennaCL *viennaclcontainer = (Mat_SeqAIJViennaCL*)A->spptr;
323e4a0ef16SKarl Rupp 
324e4a0ef16SKarl Rupp   PetscFunctionBegin;
325e4a0ef16SKarl Rupp   try {
3266447cd05SKarl Rupp     if (viennaclcontainer) {
3276447cd05SKarl Rupp       delete viennaclcontainer->tempvec;
3286447cd05SKarl Rupp       delete viennaclcontainer->mat;
3296447cd05SKarl Rupp       delete viennaclcontainer->compressed_mat;
330e4a0ef16SKarl Rupp       delete viennaclcontainer;
3316447cd05SKarl Rupp     }
332b8ced49eSKarl Rupp     A->valid_GPU_matrix = PETSC_OFFLOAD_UNALLOCATED;
3334076e183SKarl Rupp   } catch(std::exception const & ex) {
3344076e183SKarl Rupp     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
335e4a0ef16SKarl Rupp   }
3368713a8baSPatrick Sanan 
3378713a8baSPatrick Sanan   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqaij_seqaijviennacl_C",NULL);CHKERRQ(ierr);
3388713a8baSPatrick Sanan 
339e4a0ef16SKarl Rupp   /* this next line is because MatDestroy tries to PetscFree spptr if it is not zero, and PetscFree only works if the memory was allocated with PetscNew or PetscMalloc, which don't call the constructor */
340e4a0ef16SKarl Rupp   A->spptr = 0;
341e4a0ef16SKarl Rupp   ierr     = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
342e4a0ef16SKarl Rupp   PetscFunctionReturn(0);
343e4a0ef16SKarl Rupp }
344e4a0ef16SKarl Rupp 
345e4a0ef16SKarl Rupp 
346e4a0ef16SKarl Rupp PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJViennaCL(Mat B)
347e4a0ef16SKarl Rupp {
348e4a0ef16SKarl Rupp   PetscErrorCode ierr;
349e4a0ef16SKarl Rupp 
350e4a0ef16SKarl Rupp   PetscFunctionBegin;
351e4a0ef16SKarl Rupp   ierr = MatCreate_SeqAIJ(B);CHKERRQ(ierr);
3528713a8baSPatrick Sanan   ierr = MatConvert_SeqAIJ_SeqAIJViennaCL(B,MATSEQAIJVIENNACL,MAT_INPLACE_MATRIX,&B);
3538713a8baSPatrick Sanan   PetscFunctionReturn(0);
3548713a8baSPatrick Sanan }
3558713a8baSPatrick Sanan 
356e7e92044SBarry Smith static PetscErrorCode MatPinToCPU_SeqAIJViennaCL(Mat,PetscBool);
357c3cca76eSKarl Rupp static PetscErrorCode MatDuplicate_SeqAIJViennaCL(Mat A,MatDuplicateOption cpvalues,Mat *B)
358c3cca76eSKarl Rupp {
359c3cca76eSKarl Rupp   PetscErrorCode ierr;
360c3cca76eSKarl Rupp   Mat            C;
361c3cca76eSKarl Rupp 
362c3cca76eSKarl Rupp   PetscFunctionBegin;
363c3cca76eSKarl Rupp   ierr = MatDuplicate_SeqAIJ(A,cpvalues,B);CHKERRQ(ierr);
364c3cca76eSKarl Rupp   C = *B;
365c3cca76eSKarl Rupp 
366e7e92044SBarry Smith   ierr = MatPinToCPU_SeqAIJViennaCL(A,PETSC_FALSE);CHKERRQ(ierr);
367e7e92044SBarry Smith   C->ops->pintocpu = MatPinToCPU_SeqAIJViennaCL;
368c3cca76eSKarl Rupp 
369c3cca76eSKarl Rupp   C->spptr        = new Mat_SeqAIJViennaCL();
370c3cca76eSKarl Rupp   ((Mat_SeqAIJViennaCL*)C->spptr)->tempvec        = NULL;
371c3cca76eSKarl Rupp   ((Mat_SeqAIJViennaCL*)C->spptr)->mat            = NULL;
372c3cca76eSKarl Rupp   ((Mat_SeqAIJViennaCL*)C->spptr)->compressed_mat = NULL;
373c3cca76eSKarl Rupp 
374c3cca76eSKarl Rupp   ierr = PetscObjectChangeTypeName((PetscObject)C,MATSEQAIJVIENNACL);CHKERRQ(ierr);
375c3cca76eSKarl Rupp 
376b8ced49eSKarl Rupp   C->valid_GPU_matrix = PETSC_OFFLOAD_UNALLOCATED;
377c3cca76eSKarl Rupp 
378c3cca76eSKarl Rupp   /* If the source matrix is already assembled, copy the destination matrix to the GPU */
379c3cca76eSKarl Rupp   if (C->assembled) {
380c3cca76eSKarl Rupp     ierr = MatViennaCLCopyToGPU(C);CHKERRQ(ierr);
381c3cca76eSKarl Rupp   }
382c3cca76eSKarl Rupp 
383c3cca76eSKarl Rupp   PetscFunctionReturn(0);
384c3cca76eSKarl Rupp }
385c3cca76eSKarl Rupp 
386e7e92044SBarry Smith static PetscErrorCode MatPinToCPU_SeqAIJViennaCL(Mat A,PetscBool flg)
387e7e92044SBarry Smith {
388e7e92044SBarry Smith   PetscFunctionBegin;
389e7e92044SBarry Smith   A->pinnedtocpu = flg;
390e7e92044SBarry Smith   if (flg) {
391e7e92044SBarry Smith     A->ops->mult           = MatMult_SeqAIJ;
392e7e92044SBarry Smith     A->ops->multadd        = MatMultAdd_SeqAIJ;
393e7e92044SBarry Smith     A->ops->assemblyend    = MatAssemblyEnd_SeqAIJ;
394e7e92044SBarry Smith     A->ops->destroy        = MatDestroy_SeqAIJ;
395e7e92044SBarry Smith     A->ops->duplicate      = MatDuplicate_SeqAIJ;
396e7e92044SBarry Smith   } else {
397e7e92044SBarry Smith     A->ops->mult           = MatMult_SeqAIJViennaCL;
398e7e92044SBarry Smith     A->ops->multadd        = MatMultAdd_SeqAIJViennaCL;
399e7e92044SBarry Smith     A->ops->assemblyend    = MatAssemblyEnd_SeqAIJViennaCL;
400e7e92044SBarry Smith     A->ops->destroy        = MatDestroy_SeqAIJViennaCL;
401e7e92044SBarry Smith     A->ops->duplicate      = MatDuplicate_SeqAIJViennaCL;
402e7e92044SBarry Smith   }
403e7e92044SBarry Smith   PetscFunctionReturn(0);
404e7e92044SBarry Smith }
405e7e92044SBarry Smith 
4068713a8baSPatrick Sanan PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJViennaCL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
4078713a8baSPatrick Sanan {
4088713a8baSPatrick Sanan   PetscErrorCode ierr;
4098713a8baSPatrick Sanan   Mat            B;
4108713a8baSPatrick Sanan   Mat_SeqAIJ     *aij;
4118713a8baSPatrick Sanan 
4128713a8baSPatrick Sanan   PetscFunctionBegin;
4138713a8baSPatrick Sanan 
4148713a8baSPatrick Sanan   if (reuse == MAT_REUSE_MATRIX) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"MAT_REUSE_MATRIX is not supported. Consider using MAT_INPLACE_MATRIX instead");
4158713a8baSPatrick Sanan 
4168713a8baSPatrick Sanan   if (reuse == MAT_INITIAL_MATRIX) {
4178713a8baSPatrick Sanan     ierr = MatDuplicate(A,MAT_COPY_VALUES,newmat);CHKERRQ(ierr);
4188713a8baSPatrick Sanan   }
4198713a8baSPatrick Sanan 
4208713a8baSPatrick Sanan   B = *newmat;
4218713a8baSPatrick Sanan 
422e4a0ef16SKarl Rupp   aij             = (Mat_SeqAIJ*)B->data;
423e4a0ef16SKarl Rupp   aij->inode.use  = PETSC_FALSE;
4248713a8baSPatrick Sanan 
425e4a0ef16SKarl Rupp   B->spptr        = new Mat_SeqAIJViennaCL();
426e4a0ef16SKarl Rupp 
427a3430c56SKarl Rupp   ((Mat_SeqAIJViennaCL*)B->spptr)->tempvec        = NULL;
428a3430c56SKarl Rupp   ((Mat_SeqAIJViennaCL*)B->spptr)->mat            = NULL;
429a3430c56SKarl Rupp   ((Mat_SeqAIJViennaCL*)B->spptr)->compressed_mat = NULL;
430e4a0ef16SKarl Rupp 
431e7e92044SBarry Smith   ierr = MatPinToCPU_SeqAIJViennaCL(A,PETSC_FALSE);CHKERRQ(ierr);
432e7e92044SBarry Smith   A->ops->pintocpu = MatPinToCPU_SeqAIJViennaCL;
433e4a0ef16SKarl Rupp 
434e4a0ef16SKarl Rupp   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJVIENNACL);CHKERRQ(ierr);
43534136279SStefano Zampini   ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr);
43634136279SStefano Zampini   ierr = PetscStrallocpy(VECVIENNACL,&B->defaultvectype);CHKERRQ(ierr);
437e4a0ef16SKarl Rupp 
4388713a8baSPatrick Sanan   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqaij_seqaijviennacl_C",MatConvert_SeqAIJ_SeqAIJViennaCL);CHKERRQ(ierr);
4398713a8baSPatrick Sanan 
440b8ced49eSKarl Rupp   B->valid_GPU_matrix = PETSC_OFFLOAD_UNALLOCATED;
4418713a8baSPatrick Sanan 
4428713a8baSPatrick Sanan   /* If the source matrix is already assembled, copy the destination matrix to the GPU */
4438713a8baSPatrick Sanan   if (B->assembled) {
4448713a8baSPatrick Sanan     ierr = MatViennaCLCopyToGPU(B);CHKERRQ(ierr);
4458713a8baSPatrick Sanan   }
4468713a8baSPatrick Sanan 
447e4a0ef16SKarl Rupp   PetscFunctionReturn(0);
448e4a0ef16SKarl Rupp }
449e4a0ef16SKarl Rupp 
450e4a0ef16SKarl Rupp 
4513ca39a21SBarry Smith /*MC
452e4a0ef16SKarl Rupp    MATSEQAIJVIENNACL - MATAIJVIENNACL = "aijviennacl" = "seqaijviennacl" - A matrix type to be used for sparse matrices.
453e4a0ef16SKarl Rupp 
454e4a0ef16SKarl Rupp    A matrix type type whose data resides on GPUs. These matrices are in CSR format by
455e4a0ef16SKarl Rupp    default. All matrix calculations are performed using the ViennaCL library.
456e4a0ef16SKarl Rupp 
457e4a0ef16SKarl Rupp    Options Database Keys:
458e4a0ef16SKarl Rupp +  -mat_type aijviennacl - sets the matrix type to "seqaijviennacl" during a call to MatSetFromOptions()
459e4a0ef16SKarl Rupp .  -mat_viennacl_storage_format csr - sets the storage format of matrices for MatMult during a call to MatSetFromOptions().
460e4a0ef16SKarl Rupp -  -mat_viennacl_mult_storage_format csr - sets the storage format of matrices for MatMult during a call to MatSetFromOptions().
461e4a0ef16SKarl Rupp 
462e4a0ef16SKarl Rupp   Level: beginner
463e4a0ef16SKarl Rupp 
464e4a0ef16SKarl Rupp .seealso: MatCreateSeqAIJViennaCL(), MATAIJVIENNACL, MatCreateAIJViennaCL()
465e4a0ef16SKarl Rupp M*/
466e4a0ef16SKarl Rupp 
46772367587SKarl Rupp 
4683ca39a21SBarry Smith PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ViennaCL(void)
46972367587SKarl Rupp {
47072367587SKarl Rupp   PetscErrorCode ierr;
47172367587SKarl Rupp 
47272367587SKarl Rupp   PetscFunctionBegin;
4733ca39a21SBarry Smith   ierr = MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL,    MAT_FACTOR_LU,MatGetFactor_seqaij_petsc);CHKERRQ(ierr);
4743ca39a21SBarry Smith   ierr = MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL,    MAT_FACTOR_CHOLESKY,MatGetFactor_seqaij_petsc);CHKERRQ(ierr);
4753ca39a21SBarry Smith   ierr = MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL,    MAT_FACTOR_ILU,MatGetFactor_seqaij_petsc);CHKERRQ(ierr);
4763ca39a21SBarry Smith   ierr = MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL,    MAT_FACTOR_ICC,MatGetFactor_seqaij_petsc);CHKERRQ(ierr);
47772367587SKarl Rupp   PetscFunctionReturn(0);
47872367587SKarl Rupp }
479