xref: /petsc/src/mat/impls/aij/seq/seqviennacl/aijviennacl.cxx (revision b8ced49e4e0eb4a309ccfcd29d1e92cf1cda6377)
1e4a0ef16SKarl Rupp 
2e4a0ef16SKarl Rupp 
3e4a0ef16SKarl Rupp /*
4e4a0ef16SKarl Rupp     Defines the basic matrix operations for the AIJ (compressed row)
5e4a0ef16SKarl Rupp   matrix storage format.
6e4a0ef16SKarl Rupp */
7e4a0ef16SKarl Rupp 
8aaa7dc30SBarry Smith #include <petscconf.h>
9aaa7dc30SBarry Smith #include <../src/mat/impls/aij/seq/aij.h>          /*I "petscmat.h" I*/
10aaa7dc30SBarry Smith #include <petscbt.h>
11aaa7dc30SBarry Smith #include <../src/vec/vec/impls/dvecimpl.h>
12af0996ceSBarry Smith #include <petsc/private/vecimpl.h>
13e4a0ef16SKarl Rupp 
14aaa7dc30SBarry Smith #include <../src/mat/impls/aij/seq/seqviennacl/viennaclmatimpl.h>
15e4a0ef16SKarl Rupp 
16e4a0ef16SKarl Rupp 
17e4a0ef16SKarl Rupp #include <algorithm>
18e4a0ef16SKarl Rupp #include <vector>
19e4a0ef16SKarl Rupp #include <string>
20e4a0ef16SKarl Rupp 
21e4a0ef16SKarl Rupp #include "viennacl/linalg/prod.hpp"
22e4a0ef16SKarl Rupp 
238713a8baSPatrick Sanan PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJViennaCL(Mat A, MatType type, MatReuse reuse, Mat *newmat);
2472367587SKarl Rupp PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat,MatFactorType,Mat*);
2572367587SKarl Rupp 
268713a8baSPatrick Sanan 
27e4a0ef16SKarl Rupp PetscErrorCode MatViennaCLCopyToGPU(Mat A)
28e4a0ef16SKarl Rupp {
29e4a0ef16SKarl Rupp 
30e4a0ef16SKarl Rupp   Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr;
31e4a0ef16SKarl Rupp   Mat_SeqAIJ         *a              = (Mat_SeqAIJ*)A->data;
32e4a0ef16SKarl Rupp   PetscErrorCode     ierr;
33e4a0ef16SKarl Rupp 
34e4a0ef16SKarl Rupp 
35e4a0ef16SKarl Rupp   PetscFunctionBegin;
3667c87b7fSKarl Rupp   if (A->rmap->n > 0 && A->cmap->n > 0) { //some OpenCL SDKs have issues with buffers of size 0
37*b8ced49eSKarl Rupp     if (A->valid_GPU_matrix == PETSC_OFFLOAD_UNALLOCATED || A->valid_GPU_matrix == PETSC_OFFLOAD_CPU) {
38e4a0ef16SKarl Rupp       ierr = PetscLogEventBegin(MAT_ViennaCLCopyToGPU,A,0,0,0);CHKERRQ(ierr);
39e4a0ef16SKarl Rupp 
40e4a0ef16SKarl Rupp       try {
41e4a0ef16SKarl Rupp         if (a->compressedrow.use) {
42a3430c56SKarl Rupp           if (!viennaclstruct->compressed_mat) viennaclstruct->compressed_mat = new ViennaCLCompressedAIJMatrix();
43e4a0ef16SKarl Rupp 
44a3430c56SKarl Rupp           // Since PetscInt is different from cl_uint, we have to convert:
45a3430c56SKarl Rupp           viennacl::backend::mem_handle dummy;
46e4a0ef16SKarl Rupp 
47a3430c56SKarl Rupp           viennacl::backend::typesafe_host_array<unsigned int> row_buffer; row_buffer.raw_resize(dummy, a->compressedrow.nrows+1);
48a3430c56SKarl Rupp           for (PetscInt i=0; i<=a->compressedrow.nrows; ++i)
49a3430c56SKarl Rupp             row_buffer.set(i, (a->compressedrow.i)[i]);
50e4a0ef16SKarl Rupp 
51a3430c56SKarl Rupp           viennacl::backend::typesafe_host_array<unsigned int> row_indices; row_indices.raw_resize(dummy, a->compressedrow.nrows);
52a3430c56SKarl Rupp           for (PetscInt i=0; i<a->compressedrow.nrows; ++i)
53a3430c56SKarl Rupp             row_indices.set(i, (a->compressedrow.rindex)[i]);
54a3430c56SKarl Rupp 
55a3430c56SKarl Rupp           viennacl::backend::typesafe_host_array<unsigned int> col_buffer; col_buffer.raw_resize(dummy, a->nz);
56a3430c56SKarl Rupp           for (PetscInt i=0; i<a->nz; ++i)
57a3430c56SKarl Rupp             col_buffer.set(i, (a->j)[i]);
58a3430c56SKarl Rupp 
59a3430c56SKarl Rupp           viennaclstruct->compressed_mat->set(row_buffer.get(), row_indices.get(), col_buffer.get(), a->a, A->rmap->n, A->cmap->n, a->compressedrow.nrows, a->nz);
60e4a0ef16SKarl Rupp         } else {
61a3430c56SKarl Rupp           if (!viennaclstruct->mat) viennaclstruct->mat = new ViennaCLAIJMatrix();
62e4a0ef16SKarl Rupp 
63e4a0ef16SKarl Rupp           // Since PetscInt is in general different from cl_uint, we have to convert:
64e4a0ef16SKarl Rupp           viennacl::backend::mem_handle dummy;
65e4a0ef16SKarl Rupp 
66e4a0ef16SKarl Rupp           viennacl::backend::typesafe_host_array<unsigned int> row_buffer; row_buffer.raw_resize(dummy, A->rmap->n+1);
67e4a0ef16SKarl Rupp           for (PetscInt i=0; i<=A->rmap->n; ++i)
68e4a0ef16SKarl Rupp             row_buffer.set(i, (a->i)[i]);
69e4a0ef16SKarl Rupp 
70e4a0ef16SKarl Rupp           viennacl::backend::typesafe_host_array<unsigned int> col_buffer; col_buffer.raw_resize(dummy, a->nz);
71e4a0ef16SKarl Rupp           for (PetscInt i=0; i<a->nz; ++i)
72e4a0ef16SKarl Rupp             col_buffer.set(i, (a->j)[i]);
73e4a0ef16SKarl Rupp 
74e4a0ef16SKarl Rupp           viennaclstruct->mat->set(row_buffer.get(), col_buffer.get(), a->a, A->rmap->n, A->cmap->n, a->nz);
75e4a0ef16SKarl Rupp         }
764cf1874eSKarl Rupp         ViennaCLWaitForGPU();
774076e183SKarl Rupp       } catch(std::exception const & ex) {
784076e183SKarl Rupp         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
79e4a0ef16SKarl Rupp       }
80e4a0ef16SKarl Rupp 
81a3430c56SKarl Rupp       // Create temporary vector for v += A*x:
82a3430c56SKarl Rupp       if (viennaclstruct->tempvec) {
839b66742cSDave May         if (viennaclstruct->tempvec->size() != static_cast<std::size_t>(A->rmap->n)) {
84a3430c56SKarl Rupp           delete (ViennaCLVector*)viennaclstruct->tempvec;
859b66742cSDave May           viennaclstruct->tempvec = new ViennaCLVector(A->rmap->n);
86a3430c56SKarl Rupp         } else {
87a3430c56SKarl Rupp           viennaclstruct->tempvec->clear();
88a3430c56SKarl Rupp         }
89a3430c56SKarl Rupp       } else {
909b66742cSDave May         viennaclstruct->tempvec = new ViennaCLVector(A->rmap->n);
91a3430c56SKarl Rupp       }
92a3430c56SKarl Rupp 
93*b8ced49eSKarl Rupp       A->valid_GPU_matrix = PETSC_OFFLOAD_BOTH;
94e4a0ef16SKarl Rupp 
95e4a0ef16SKarl Rupp       ierr = PetscLogEventEnd(MAT_ViennaCLCopyToGPU,A,0,0,0);CHKERRQ(ierr);
96e4a0ef16SKarl Rupp     }
9767c87b7fSKarl Rupp   }
98e4a0ef16SKarl Rupp   PetscFunctionReturn(0);
99e4a0ef16SKarl Rupp }
100e4a0ef16SKarl Rupp 
1010d73d530SKarl Rupp PetscErrorCode MatViennaCLCopyFromGPU(Mat A, const ViennaCLAIJMatrix *Agpu)
102e4a0ef16SKarl Rupp {
103e4a0ef16SKarl Rupp   Mat_SeqAIJ         *a              = (Mat_SeqAIJ*)A->data;
104e4a0ef16SKarl Rupp   PetscInt           m               = A->rmap->n;
105e4a0ef16SKarl Rupp   PetscErrorCode     ierr;
106e4a0ef16SKarl Rupp 
107e4a0ef16SKarl Rupp 
108e4a0ef16SKarl Rupp   PetscFunctionBegin;
109*b8ced49eSKarl Rupp   if (A->valid_GPU_matrix == PETSC_OFFLOAD_UNALLOCATED) {
110e4a0ef16SKarl Rupp     try {
1116c4ed002SBarry Smith       if (a->compressedrow.use) SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_ARG_WRONG, "ViennaCL: Cannot handle row compression for GPU matrices");
1126c4ed002SBarry Smith       else {
113e4a0ef16SKarl Rupp 
114e4a0ef16SKarl Rupp         if ((PetscInt)Agpu->size1() != m) SETERRQ2(PETSC_COMM_WORLD, PETSC_ERR_ARG_SIZ, "GPU matrix has %d rows, should be %d", Agpu->size1(), m);
115e4a0ef16SKarl Rupp         a->nz           = Agpu->nnz();
116e4a0ef16SKarl Rupp         a->maxnz        = a->nz; /* Since we allocate exactly the right amount */
117e4a0ef16SKarl Rupp         A->preallocated = PETSC_TRUE;
118e4a0ef16SKarl Rupp         if (a->singlemalloc) {
119e4a0ef16SKarl Rupp           if (a->a) {ierr = PetscFree3(a->a,a->j,a->i);CHKERRQ(ierr);}
120e4a0ef16SKarl Rupp         } else {
121e4a0ef16SKarl Rupp           if (a->i) {ierr = PetscFree(a->i);CHKERRQ(ierr);}
122e4a0ef16SKarl Rupp           if (a->j) {ierr = PetscFree(a->j);CHKERRQ(ierr);}
123e4a0ef16SKarl Rupp           if (a->a) {ierr = PetscFree(a->a);CHKERRQ(ierr);}
124e4a0ef16SKarl Rupp         }
125dcca6d9dSJed Brown         ierr = PetscMalloc3(a->nz,&a->a,a->nz,&a->j,m+1,&a->i);CHKERRQ(ierr);
126f7daeb2aSKarl Rupp         ierr = PetscLogObjectMemory((PetscObject)A, a->nz*(sizeof(PetscScalar)+sizeof(PetscInt))+(m+1)*sizeof(PetscInt));CHKERRQ(ierr);
127e4a0ef16SKarl Rupp 
128e4a0ef16SKarl Rupp         a->singlemalloc = PETSC_TRUE;
129e4a0ef16SKarl Rupp 
130e4a0ef16SKarl Rupp         /* Setup row lengths */
131e4a0ef16SKarl Rupp         if (a->imax) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
132dcca6d9dSJed Brown         ierr = PetscMalloc2(m,&a->imax,m,&a->ilen);CHKERRQ(ierr);
133f7daeb2aSKarl Rupp         ierr = PetscLogObjectMemory((PetscObject)A, 2*m*sizeof(PetscInt));CHKERRQ(ierr);
134e4a0ef16SKarl Rupp 
135e4a0ef16SKarl Rupp         /* Copy data back from GPU */
136e4a0ef16SKarl Rupp         viennacl::backend::typesafe_host_array<unsigned int> row_buffer; row_buffer.raw_resize(Agpu->handle1(), Agpu->size1() + 1);
137e4a0ef16SKarl Rupp 
138e4a0ef16SKarl Rupp         // copy row array
139e4a0ef16SKarl Rupp         viennacl::backend::memory_read(Agpu->handle1(), 0, row_buffer.raw_size(), row_buffer.get());
140e4a0ef16SKarl Rupp         (a->i)[0] = row_buffer[0];
141e4a0ef16SKarl Rupp         for (PetscInt i = 0; i < (PetscInt)Agpu->size1(); ++i) {
142e4a0ef16SKarl Rupp           (a->i)[i+1] = row_buffer[i+1];
143e4a0ef16SKarl Rupp           a->imax[i]  = a->ilen[i] = a->i[i+1] - a->i[i];  //Set imax[] and ilen[] arrays at the same time as i[] for better cache reuse
144e4a0ef16SKarl Rupp         }
145e4a0ef16SKarl Rupp 
146e4a0ef16SKarl Rupp         // copy column indices
147e4a0ef16SKarl Rupp         viennacl::backend::typesafe_host_array<unsigned int> col_buffer; col_buffer.raw_resize(Agpu->handle2(), Agpu->nnz());
148e4a0ef16SKarl Rupp         viennacl::backend::memory_read(Agpu->handle2(), 0, col_buffer.raw_size(), col_buffer.get());
149e4a0ef16SKarl Rupp         for (PetscInt i=0; i < (PetscInt)Agpu->nnz(); ++i)
150e4a0ef16SKarl Rupp           (a->j)[i] = col_buffer[i];
151e4a0ef16SKarl Rupp 
152e4a0ef16SKarl Rupp         // copy nonzero entries directly to destination (no conversion required)
153e4a0ef16SKarl Rupp         viennacl::backend::memory_read(Agpu->handle(), 0, sizeof(PetscScalar)*Agpu->nnz(), a->a);
154e4a0ef16SKarl Rupp 
1554cf1874eSKarl Rupp         ViennaCLWaitForGPU();
156023073b3SKarl Rupp         /* TODO: Once a->diag is moved out of MatAssemblyEnd(), invalidate it here. */
157e4a0ef16SKarl Rupp       }
1584076e183SKarl Rupp     } catch(std::exception const & ex) {
1594076e183SKarl Rupp       SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
160e4a0ef16SKarl Rupp     }
161e4a0ef16SKarl Rupp 
162*b8ced49eSKarl Rupp     /* This assembly prevents resetting the flag to PETSC_OFFLOAD_CPU and recopying */
163e4a0ef16SKarl Rupp     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
164e4a0ef16SKarl Rupp     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
165e4a0ef16SKarl Rupp 
166*b8ced49eSKarl Rupp     A->valid_GPU_matrix = PETSC_OFFLOAD_BOTH;
1676c4ed002SBarry Smith   } else SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_ARG_WRONG, "ViennaCL error: Only valid for unallocated GPU matrices");
168e4a0ef16SKarl Rupp   PetscFunctionReturn(0);
169e4a0ef16SKarl Rupp }
170e4a0ef16SKarl Rupp 
1712a7a6963SBarry Smith PetscErrorCode MatCreateVecs_SeqAIJViennaCL(Mat mat, Vec *right, Vec *left)
172e4a0ef16SKarl Rupp {
173e4a0ef16SKarl Rupp   PetscErrorCode ierr;
17433d57670SJed Brown   PetscInt rbs,cbs;
175e4a0ef16SKarl Rupp 
176e4a0ef16SKarl Rupp   PetscFunctionBegin;
17733d57670SJed Brown   ierr = MatGetBlockSizes(mat,&rbs,&cbs);CHKERRQ(ierr);
178e4a0ef16SKarl Rupp   if (right) {
179e4a0ef16SKarl Rupp     ierr = VecCreate(PetscObjectComm((PetscObject)mat),right);CHKERRQ(ierr);
180e4a0ef16SKarl Rupp     ierr = VecSetSizes(*right,mat->cmap->n,PETSC_DETERMINE);CHKERRQ(ierr);
18133d57670SJed Brown     ierr = VecSetBlockSize(*right,cbs);CHKERRQ(ierr);
182e4a0ef16SKarl Rupp     ierr = VecSetType(*right,VECSEQVIENNACL);CHKERRQ(ierr);
183e4a0ef16SKarl Rupp     ierr = PetscLayoutReference(mat->cmap,&(*right)->map);CHKERRQ(ierr);
184e4a0ef16SKarl Rupp   }
185e4a0ef16SKarl Rupp   if (left) {
186e4a0ef16SKarl Rupp     ierr = VecCreate(PetscObjectComm((PetscObject)mat),left);CHKERRQ(ierr);
187e4a0ef16SKarl Rupp     ierr = VecSetSizes(*left,mat->rmap->n,PETSC_DETERMINE);CHKERRQ(ierr);
18833d57670SJed Brown     ierr = VecSetBlockSize(*left,rbs);CHKERRQ(ierr);
189e4a0ef16SKarl Rupp     ierr = VecSetType(*left,VECSEQVIENNACL);CHKERRQ(ierr);
190e4a0ef16SKarl Rupp     ierr = PetscLayoutReference(mat->rmap,&(*left)->map);CHKERRQ(ierr);
191e4a0ef16SKarl Rupp   }
192e4a0ef16SKarl Rupp   PetscFunctionReturn(0);
193e4a0ef16SKarl Rupp }
194e4a0ef16SKarl Rupp 
195e4a0ef16SKarl Rupp PetscErrorCode MatMult_SeqAIJViennaCL(Mat A,Vec xx,Vec yy)
196e4a0ef16SKarl Rupp {
197e4a0ef16SKarl Rupp   Mat_SeqAIJ           *a = (Mat_SeqAIJ*)A->data;
198e4a0ef16SKarl Rupp   PetscErrorCode       ierr;
199e4a0ef16SKarl Rupp   Mat_SeqAIJViennaCL   *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr;
2000d73d530SKarl Rupp   const ViennaCLVector *xgpu=NULL;
2010d73d530SKarl Rupp   ViennaCLVector       *ygpu=NULL;
202e4a0ef16SKarl Rupp 
203e4a0ef16SKarl Rupp   PetscFunctionBegin;
20467c87b7fSKarl Rupp   if (A->rmap->n > 0 && A->cmap->n > 0) {
205e4a0ef16SKarl Rupp     ierr = VecViennaCLGetArrayRead(xx,&xgpu);CHKERRQ(ierr);
206e4a0ef16SKarl Rupp     ierr = VecViennaCLGetArrayWrite(yy,&ygpu);CHKERRQ(ierr);
207e4a0ef16SKarl Rupp     try {
208e4a0ef16SKarl Rupp       *ygpu = viennacl::linalg::prod(*viennaclstruct->mat,*xgpu);
2094cf1874eSKarl Rupp       ViennaCLWaitForGPU();
2104076e183SKarl Rupp     } catch (std::exception const & ex) {
2114076e183SKarl Rupp       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
212e4a0ef16SKarl Rupp     }
213e4a0ef16SKarl Rupp     ierr = VecViennaCLRestoreArrayRead(xx,&xgpu);CHKERRQ(ierr);
214e4a0ef16SKarl Rupp     ierr = VecViennaCLRestoreArrayWrite(yy,&ygpu);CHKERRQ(ierr);
2159b66742cSDave May     ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
21667c87b7fSKarl Rupp   }
217e4a0ef16SKarl Rupp   PetscFunctionReturn(0);
218e4a0ef16SKarl Rupp }
219e4a0ef16SKarl Rupp 
220e4a0ef16SKarl Rupp 
221e4a0ef16SKarl Rupp 
222e4a0ef16SKarl Rupp PetscErrorCode MatMultAdd_SeqAIJViennaCL(Mat A,Vec xx,Vec yy,Vec zz)
223e4a0ef16SKarl Rupp {
224e4a0ef16SKarl Rupp   Mat_SeqAIJ           *a = (Mat_SeqAIJ*)A->data;
225e4a0ef16SKarl Rupp   PetscErrorCode       ierr;
226e4a0ef16SKarl Rupp   Mat_SeqAIJViennaCL   *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr;
2270d73d530SKarl Rupp   const ViennaCLVector *xgpu=NULL,*ygpu=NULL;
2280d73d530SKarl Rupp   ViennaCLVector       *zgpu=NULL;
229e4a0ef16SKarl Rupp 
230e4a0ef16SKarl Rupp   PetscFunctionBegin;
23167c87b7fSKarl Rupp   if (A->rmap->n > 0 && A->cmap->n > 0) {
232e4a0ef16SKarl Rupp     try {
233e4a0ef16SKarl Rupp       ierr = VecViennaCLGetArrayRead(xx,&xgpu);CHKERRQ(ierr);
234e4a0ef16SKarl Rupp       ierr = VecViennaCLGetArrayRead(yy,&ygpu);CHKERRQ(ierr);
235e4a0ef16SKarl Rupp       ierr = VecViennaCLGetArrayWrite(zz,&zgpu);CHKERRQ(ierr);
236e4a0ef16SKarl Rupp 
237e4a0ef16SKarl Rupp       if (a->compressedrow.use) {
238a3430c56SKarl Rupp         ViennaCLVector temp = viennacl::linalg::prod(*viennaclstruct->compressed_mat, *xgpu);
239e4a0ef16SKarl Rupp         *zgpu = *ygpu + temp;
2404cf1874eSKarl Rupp         ViennaCLWaitForGPU();
241e4a0ef16SKarl Rupp       } else {
242a3430c56SKarl Rupp         if (zz == xx || zz == yy) { //temporary required
243a3430c56SKarl Rupp           ViennaCLVector temp = viennacl::linalg::prod(*viennaclstruct->mat, *xgpu);
244a3430c56SKarl Rupp           *zgpu = *ygpu;
245a3430c56SKarl Rupp           *zgpu += temp;
246a3430c56SKarl Rupp           ViennaCLWaitForGPU();
247a3430c56SKarl Rupp         } else {
248a3430c56SKarl Rupp           *viennaclstruct->tempvec = viennacl::linalg::prod(*viennaclstruct->mat, *xgpu);
249a3430c56SKarl Rupp           *zgpu = *ygpu + *viennaclstruct->tempvec;
2504cf1874eSKarl Rupp           ViennaCLWaitForGPU();
251e4a0ef16SKarl Rupp         }
252e4a0ef16SKarl Rupp       }
253e4a0ef16SKarl Rupp 
254e4a0ef16SKarl Rupp       ierr = VecViennaCLRestoreArrayRead(xx,&xgpu);CHKERRQ(ierr);
255e4a0ef16SKarl Rupp       ierr = VecViennaCLRestoreArrayRead(yy,&ygpu);CHKERRQ(ierr);
256e4a0ef16SKarl Rupp       ierr = VecViennaCLRestoreArrayWrite(zz,&zgpu);CHKERRQ(ierr);
257e4a0ef16SKarl Rupp 
2584076e183SKarl Rupp     } catch(std::exception const & ex) {
2594076e183SKarl Rupp       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
260e4a0ef16SKarl Rupp     }
261e4a0ef16SKarl Rupp     ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
26267c87b7fSKarl Rupp   }
263e4a0ef16SKarl Rupp   PetscFunctionReturn(0);
264e4a0ef16SKarl Rupp }
265e4a0ef16SKarl Rupp 
266e4a0ef16SKarl Rupp PetscErrorCode MatAssemblyEnd_SeqAIJViennaCL(Mat A,MatAssemblyType mode)
267e4a0ef16SKarl Rupp {
268e4a0ef16SKarl Rupp   PetscErrorCode ierr;
269e4a0ef16SKarl Rupp 
270e4a0ef16SKarl Rupp   PetscFunctionBegin;
271e4a0ef16SKarl Rupp   ierr = MatAssemblyEnd_SeqAIJ(A,mode);CHKERRQ(ierr);
272e4a0ef16SKarl Rupp   ierr = MatViennaCLCopyToGPU(A);CHKERRQ(ierr);
273e4a0ef16SKarl Rupp   PetscFunctionReturn(0);
274e4a0ef16SKarl Rupp }
275e4a0ef16SKarl Rupp 
276e4a0ef16SKarl Rupp /* --------------------------------------------------------------------------------*/
277e4a0ef16SKarl Rupp /*@
278e4a0ef16SKarl Rupp    MatCreateSeqAIJViennaCL - Creates a sparse matrix in AIJ (compressed row) format
27919fddfadSKarl Rupp    (the default parallel PETSc format).  This matrix will ultimately be pushed down
280e4a0ef16SKarl Rupp    to GPUs and use the ViennaCL library for calculations. For good matrix
281e4a0ef16SKarl Rupp    assembly performance the user should preallocate the matrix storage by setting
282e4a0ef16SKarl Rupp    the parameter nz (or the array nnz).  By setting these parameters accurately,
283e4a0ef16SKarl Rupp    performance during matrix assembly can be increased substantially.
284e4a0ef16SKarl Rupp 
285e4a0ef16SKarl Rupp 
286e4a0ef16SKarl Rupp    Collective on MPI_Comm
287e4a0ef16SKarl Rupp 
288e4a0ef16SKarl Rupp    Input Parameters:
289e4a0ef16SKarl Rupp +  comm - MPI communicator, set to PETSC_COMM_SELF
290e4a0ef16SKarl Rupp .  m - number of rows
291e4a0ef16SKarl Rupp .  n - number of columns
292e4a0ef16SKarl Rupp .  nz - number of nonzeros per row (same for all rows)
293e4a0ef16SKarl Rupp -  nnz - array containing the number of nonzeros in the various rows
294e4a0ef16SKarl Rupp          (possibly different for each row) or NULL
295e4a0ef16SKarl Rupp 
296e4a0ef16SKarl Rupp    Output Parameter:
297e4a0ef16SKarl Rupp .  A - the matrix
298e4a0ef16SKarl Rupp 
299e4a0ef16SKarl Rupp    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
300e4a0ef16SKarl Rupp    MatXXXXSetPreallocation() paradigm instead of this routine directly.
301e4a0ef16SKarl Rupp    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
302e4a0ef16SKarl Rupp 
303e4a0ef16SKarl Rupp    Notes:
304e4a0ef16SKarl Rupp    If nnz is given then nz is ignored
305e4a0ef16SKarl Rupp 
306e4a0ef16SKarl Rupp    The AIJ format (also called the Yale sparse matrix format or
307e4a0ef16SKarl Rupp    compressed row storage), is fully compatible with standard Fortran 77
308e4a0ef16SKarl Rupp    storage.  That is, the stored row and column indices can begin at
309e4a0ef16SKarl Rupp    either one (as in Fortran) or zero.  See the users' manual for details.
310e4a0ef16SKarl Rupp 
311e4a0ef16SKarl Rupp    Specify the preallocated storage with either nz or nnz (not both).
312e4a0ef16SKarl Rupp    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
313e4a0ef16SKarl Rupp    allocation.  For large problems you MUST preallocate memory or you
314e4a0ef16SKarl Rupp    will get TERRIBLE performance, see the users' manual chapter on matrices.
315e4a0ef16SKarl Rupp 
316e4a0ef16SKarl Rupp    Level: intermediate
317e4a0ef16SKarl Rupp 
318e9e886b6SKarl Rupp .seealso: MatCreate(), MatCreateAIJ(), MatCreateAIJCUSPARSE(), MatSetValues(), MatSeqAIJSetColumnIndices(), MatCreateSeqAIJWithArrays(), MatCreateAIJ()
319e4a0ef16SKarl Rupp 
320e4a0ef16SKarl Rupp @*/
321e4a0ef16SKarl Rupp PetscErrorCode  MatCreateSeqAIJViennaCL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
322e4a0ef16SKarl Rupp {
323e4a0ef16SKarl Rupp   PetscErrorCode ierr;
324e4a0ef16SKarl Rupp 
325e4a0ef16SKarl Rupp   PetscFunctionBegin;
326e4a0ef16SKarl Rupp   ierr = MatCreate(comm,A);CHKERRQ(ierr);
327e4a0ef16SKarl Rupp   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
328e4a0ef16SKarl Rupp   ierr = MatSetType(*A,MATSEQAIJVIENNACL);CHKERRQ(ierr);
329e4a0ef16SKarl Rupp   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,(PetscInt*)nnz);CHKERRQ(ierr);
330e4a0ef16SKarl Rupp   PetscFunctionReturn(0);
331e4a0ef16SKarl Rupp }
332e4a0ef16SKarl Rupp 
333e4a0ef16SKarl Rupp 
334e4a0ef16SKarl Rupp PetscErrorCode MatDestroy_SeqAIJViennaCL(Mat A)
335e4a0ef16SKarl Rupp {
336e4a0ef16SKarl Rupp   PetscErrorCode ierr;
337e4a0ef16SKarl Rupp   Mat_SeqAIJViennaCL *viennaclcontainer = (Mat_SeqAIJViennaCL*)A->spptr;
338e4a0ef16SKarl Rupp 
339e4a0ef16SKarl Rupp   PetscFunctionBegin;
340e4a0ef16SKarl Rupp   try {
3416447cd05SKarl Rupp     if (viennaclcontainer) {
3426447cd05SKarl Rupp       delete viennaclcontainer->tempvec;
3436447cd05SKarl Rupp       delete viennaclcontainer->mat;
3446447cd05SKarl Rupp       delete viennaclcontainer->compressed_mat;
345e4a0ef16SKarl Rupp       delete viennaclcontainer;
3466447cd05SKarl Rupp     }
347*b8ced49eSKarl Rupp     A->valid_GPU_matrix = PETSC_OFFLOAD_UNALLOCATED;
3484076e183SKarl Rupp   } catch(std::exception const & ex) {
3494076e183SKarl Rupp     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
350e4a0ef16SKarl Rupp   }
3518713a8baSPatrick Sanan 
3528713a8baSPatrick Sanan   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqaij_seqaijviennacl_C",NULL);CHKERRQ(ierr);
3538713a8baSPatrick Sanan 
354e4a0ef16SKarl Rupp   /* this next line is because MatDestroy tries to PetscFree spptr if it is not zero, and PetscFree only works if the memory was allocated with PetscNew or PetscMalloc, which don't call the constructor */
355e4a0ef16SKarl Rupp   A->spptr = 0;
356e4a0ef16SKarl Rupp   ierr     = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
357e4a0ef16SKarl Rupp   PetscFunctionReturn(0);
358e4a0ef16SKarl Rupp }
359e4a0ef16SKarl Rupp 
360e4a0ef16SKarl Rupp 
361e4a0ef16SKarl Rupp PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJViennaCL(Mat B)
362e4a0ef16SKarl Rupp {
363e4a0ef16SKarl Rupp   PetscErrorCode ierr;
364e4a0ef16SKarl Rupp 
365e4a0ef16SKarl Rupp   PetscFunctionBegin;
366e4a0ef16SKarl Rupp   ierr = MatCreate_SeqAIJ(B);CHKERRQ(ierr);
3678713a8baSPatrick Sanan   ierr = MatConvert_SeqAIJ_SeqAIJViennaCL(B,MATSEQAIJVIENNACL,MAT_INPLACE_MATRIX,&B);
3688713a8baSPatrick Sanan   PetscFunctionReturn(0);
3698713a8baSPatrick Sanan }
3708713a8baSPatrick Sanan 
371c3cca76eSKarl Rupp static PetscErrorCode MatDuplicate_SeqAIJViennaCL(Mat A,MatDuplicateOption cpvalues,Mat *B)
372c3cca76eSKarl Rupp {
373c3cca76eSKarl Rupp   PetscErrorCode ierr;
374c3cca76eSKarl Rupp   Mat C;
375c3cca76eSKarl Rupp 
376c3cca76eSKarl Rupp   PetscFunctionBegin;
377c3cca76eSKarl Rupp   ierr = MatDuplicate_SeqAIJ(A,cpvalues,B);CHKERRQ(ierr);
378c3cca76eSKarl Rupp   C = *B;
379c3cca76eSKarl Rupp 
380c3cca76eSKarl Rupp   C->ops->mult        = MatMult_SeqAIJViennaCL;
381c3cca76eSKarl Rupp   C->ops->multadd     = MatMultAdd_SeqAIJViennaCL;
382c3cca76eSKarl Rupp   C->ops->assemblyend = MatAssemblyEnd_SeqAIJViennaCL;
383c3cca76eSKarl Rupp   C->ops->destroy     = MatDestroy_SeqAIJViennaCL;
384c3cca76eSKarl Rupp   C->ops->getvecs     = MatCreateVecs_SeqAIJViennaCL;
385c3cca76eSKarl Rupp   C->ops->duplicate   = MatDuplicate_SeqAIJViennaCL;
386c3cca76eSKarl Rupp 
387c3cca76eSKarl Rupp   C->spptr        = new Mat_SeqAIJViennaCL();
388c3cca76eSKarl Rupp   ((Mat_SeqAIJViennaCL*)C->spptr)->tempvec        = NULL;
389c3cca76eSKarl Rupp   ((Mat_SeqAIJViennaCL*)C->spptr)->mat            = NULL;
390c3cca76eSKarl Rupp   ((Mat_SeqAIJViennaCL*)C->spptr)->compressed_mat = NULL;
391c3cca76eSKarl Rupp 
392c3cca76eSKarl Rupp   ierr = PetscObjectChangeTypeName((PetscObject)C,MATSEQAIJVIENNACL);CHKERRQ(ierr);
393c3cca76eSKarl Rupp 
394*b8ced49eSKarl Rupp   C->valid_GPU_matrix = PETSC_OFFLOAD_UNALLOCATED;
395c3cca76eSKarl Rupp 
396c3cca76eSKarl Rupp   /* If the source matrix is already assembled, copy the destination matrix to the GPU */
397c3cca76eSKarl Rupp   if (C->assembled) {
398c3cca76eSKarl Rupp     ierr = MatViennaCLCopyToGPU(C);CHKERRQ(ierr);
399c3cca76eSKarl Rupp   }
400c3cca76eSKarl Rupp 
401c3cca76eSKarl Rupp   PetscFunctionReturn(0);
402c3cca76eSKarl Rupp }
403c3cca76eSKarl Rupp 
4048713a8baSPatrick Sanan PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJViennaCL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
4058713a8baSPatrick Sanan {
4068713a8baSPatrick Sanan   PetscErrorCode ierr;
4078713a8baSPatrick Sanan   Mat            B;
4088713a8baSPatrick Sanan   Mat_SeqAIJ     *aij;
4098713a8baSPatrick Sanan 
4108713a8baSPatrick Sanan   PetscFunctionBegin;
4118713a8baSPatrick Sanan 
4128713a8baSPatrick Sanan   if (reuse == MAT_REUSE_MATRIX) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"MAT_REUSE_MATRIX is not supported. Consider using MAT_INPLACE_MATRIX instead");
4138713a8baSPatrick Sanan 
4148713a8baSPatrick Sanan   if (reuse == MAT_INITIAL_MATRIX) {
4158713a8baSPatrick Sanan     ierr = MatDuplicate(A,MAT_COPY_VALUES,newmat);CHKERRQ(ierr);
4168713a8baSPatrick Sanan   }
4178713a8baSPatrick Sanan 
4188713a8baSPatrick Sanan   B = *newmat;
4198713a8baSPatrick Sanan 
420e4a0ef16SKarl Rupp   aij             = (Mat_SeqAIJ*)B->data;
421e4a0ef16SKarl Rupp   aij->inode.use  = PETSC_FALSE;
4228713a8baSPatrick Sanan 
423e4a0ef16SKarl Rupp   B->ops->mult    = MatMult_SeqAIJViennaCL;
424e4a0ef16SKarl Rupp   B->ops->multadd = MatMultAdd_SeqAIJViennaCL;
425e4a0ef16SKarl Rupp   B->spptr        = new Mat_SeqAIJViennaCL();
426e4a0ef16SKarl Rupp 
427a3430c56SKarl Rupp   ((Mat_SeqAIJViennaCL*)B->spptr)->tempvec        = NULL;
428a3430c56SKarl Rupp   ((Mat_SeqAIJViennaCL*)B->spptr)->mat            = NULL;
429a3430c56SKarl Rupp   ((Mat_SeqAIJViennaCL*)B->spptr)->compressed_mat = NULL;
430e4a0ef16SKarl Rupp 
431e4a0ef16SKarl Rupp   B->ops->assemblyend    = MatAssemblyEnd_SeqAIJViennaCL;
432e4a0ef16SKarl Rupp   B->ops->destroy        = MatDestroy_SeqAIJViennaCL;
4332a7a6963SBarry Smith   B->ops->getvecs        = MatCreateVecs_SeqAIJViennaCL;
434c3cca76eSKarl Rupp   B->ops->duplicate      = MatDuplicate_SeqAIJViennaCL;
435e4a0ef16SKarl Rupp 
436e4a0ef16SKarl Rupp   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJVIENNACL);CHKERRQ(ierr);
437e4a0ef16SKarl Rupp 
4388713a8baSPatrick Sanan   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqaij_seqaijviennacl_C",MatConvert_SeqAIJ_SeqAIJViennaCL);CHKERRQ(ierr);
4398713a8baSPatrick Sanan 
440*b8ced49eSKarl Rupp   B->valid_GPU_matrix = PETSC_OFFLOAD_UNALLOCATED;
4418713a8baSPatrick Sanan 
4428713a8baSPatrick Sanan   /* If the source matrix is already assembled, copy the destination matrix to the GPU */
4438713a8baSPatrick Sanan   if (B->assembled) {
4448713a8baSPatrick Sanan     ierr = MatViennaCLCopyToGPU(B);CHKERRQ(ierr);
4458713a8baSPatrick Sanan   }
4468713a8baSPatrick Sanan 
447e4a0ef16SKarl Rupp   PetscFunctionReturn(0);
448e4a0ef16SKarl Rupp }
449e4a0ef16SKarl Rupp 
450e4a0ef16SKarl Rupp 
4513ca39a21SBarry Smith /*MC
452e4a0ef16SKarl Rupp    MATSEQAIJVIENNACL - MATAIJVIENNACL = "aijviennacl" = "seqaijviennacl" - A matrix type to be used for sparse matrices.
453e4a0ef16SKarl Rupp 
454e4a0ef16SKarl Rupp    A matrix type type whose data resides on GPUs. These matrices are in CSR format by
455e4a0ef16SKarl Rupp    default. All matrix calculations are performed using the ViennaCL library.
456e4a0ef16SKarl Rupp 
457e4a0ef16SKarl Rupp    Options Database Keys:
458e4a0ef16SKarl Rupp +  -mat_type aijviennacl - sets the matrix type to "seqaijviennacl" during a call to MatSetFromOptions()
459e4a0ef16SKarl Rupp .  -mat_viennacl_storage_format csr - sets the storage format of matrices for MatMult during a call to MatSetFromOptions().
460e4a0ef16SKarl Rupp -  -mat_viennacl_mult_storage_format csr - sets the storage format of matrices for MatMult during a call to MatSetFromOptions().
461e4a0ef16SKarl Rupp 
462e4a0ef16SKarl Rupp   Level: beginner
463e4a0ef16SKarl Rupp 
464e4a0ef16SKarl Rupp .seealso: MatCreateSeqAIJViennaCL(), MATAIJVIENNACL, MatCreateAIJViennaCL()
465e4a0ef16SKarl Rupp M*/
466e4a0ef16SKarl Rupp 
46772367587SKarl Rupp 
4683ca39a21SBarry Smith PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ViennaCL(void)
46972367587SKarl Rupp {
47072367587SKarl Rupp   PetscErrorCode ierr;
47172367587SKarl Rupp 
47272367587SKarl Rupp   PetscFunctionBegin;
4733ca39a21SBarry Smith   ierr = MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL,    MAT_FACTOR_LU,MatGetFactor_seqaij_petsc);CHKERRQ(ierr);
4743ca39a21SBarry Smith   ierr = MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL,    MAT_FACTOR_CHOLESKY,MatGetFactor_seqaij_petsc);CHKERRQ(ierr);
4753ca39a21SBarry Smith   ierr = MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL,    MAT_FACTOR_ILU,MatGetFactor_seqaij_petsc);CHKERRQ(ierr);
4763ca39a21SBarry Smith   ierr = MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL,    MAT_FACTOR_ICC,MatGetFactor_seqaij_petsc);CHKERRQ(ierr);
47772367587SKarl Rupp   PetscFunctionReturn(0);
47872367587SKarl Rupp }
479