xref: /petsc/src/mat/impls/aij/seq/seqviennacl/aijviennacl.cxx (revision a76eec0e025e246efca9d3ebd46f014f5abffee5)
1 
2 
3 /*
4     Defines the basic matrix operations for the AIJ (compressed row)
5   matrix storage format.
6 */
7 
8 #include "petscconf.h"
9 #include "../src/mat/impls/aij/seq/aij.h"          /*I "petscmat.h" I*/
10 #include "petscbt.h"
11 #include "../src/vec/vec/impls/dvecimpl.h"
12 #include "petsc-private/vecimpl.h"
13 
14 #include "../src/mat/impls/aij/seq/seqviennacl/viennaclmatimpl.h"
15 
16 
17 #include <algorithm>
18 #include <vector>
19 #include <string>
20 
21 #include "viennacl/linalg/prod.hpp"
22 
23 #undef __FUNCT__
24 #define __FUNCT__ "MatViennaCLCopyToGPU"
25 PetscErrorCode MatViennaCLCopyToGPU(Mat A)
26 {
27 
28   Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr;
29   Mat_SeqAIJ         *a              = (Mat_SeqAIJ*)A->data;
30   PetscInt           *ii;
31   PetscErrorCode     ierr;
32 
33 
34   PetscFunctionBegin;
35   if (A->rmap->n > 0 && A->cmap->n > 0) { //some OpenCL SDKs have issues with buffers of size 0
36     if (A->valid_GPU_matrix == PETSC_VIENNACL_UNALLOCATED || A->valid_GPU_matrix == PETSC_VIENNACL_CPU) {
37       ierr = PetscLogEventBegin(MAT_ViennaCLCopyToGPU,A,0,0,0);CHKERRQ(ierr);
38 
39       try {
40         ierr = PetscObjectSetFromOptions_ViennaCL((PetscObject)A);CHKERRQ(ierr); /* Allows to set device type before allocating any objects */
41         viennaclstruct->mat = new ViennaCLAIJMatrix();
42         if (a->compressedrow.use) {
43           ii = a->compressedrow.i;
44 
45           viennaclstruct->mat->set(ii, a->j, a->a, A->rmap->n, A->cmap->n, a->nz);
46 
47           // TODO: Either convert to full CSR (inefficient), or hold row indices in temporary vector (requires additional bookkeeping for matrix-vector multiplications)
48           //       Cannot be reasonably supported in ViennaCL 1.4.x (custom kernels required), hence postponing until there is support for compressed CSR
49 
50           SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: Compressed CSR (only nonzero rows) not yet supported");
51         } else {
52 
53           // Since PetscInt is in general different from cl_uint, we have to convert:
54           viennacl::backend::mem_handle dummy;
55 
56           viennacl::backend::typesafe_host_array<unsigned int> row_buffer; row_buffer.raw_resize(dummy, A->rmap->n+1);
57           for (PetscInt i=0; i<=A->rmap->n; ++i)
58             row_buffer.set(i, (a->i)[i]);
59 
60           viennacl::backend::typesafe_host_array<unsigned int> col_buffer; col_buffer.raw_resize(dummy, a->nz);
61           for (PetscInt i=0; i<a->nz; ++i)
62             col_buffer.set(i, (a->j)[i]);
63 
64           viennaclstruct->mat->set(row_buffer.get(), col_buffer.get(), a->a, A->rmap->n, A->cmap->n, a->nz);
65         }
66         ViennaCLWaitForGPU();
67       } catch(std::exception const & ex) {
68         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
69       }
70 
71       A->valid_GPU_matrix = PETSC_VIENNACL_BOTH;
72 
73       ierr = PetscLogEventEnd(MAT_ViennaCLCopyToGPU,A,0,0,0);CHKERRQ(ierr);
74     }
75   }
76   PetscFunctionReturn(0);
77 }
78 
79 #undef __FUNCT__
80 #define __FUNCT__ "MatViennaCLCopyFromGPU"
81 PetscErrorCode MatViennaCLCopyFromGPU(Mat A, const ViennaCLAIJMatrix *Agpu)
82 {
83   Mat_SeqAIJ         *a              = (Mat_SeqAIJ*)A->data;
84   PetscInt           m               = A->rmap->n;
85   PetscErrorCode     ierr;
86 
87 
88   PetscFunctionBegin;
89   if (A->valid_GPU_matrix == PETSC_VIENNACL_UNALLOCATED) {
90     try {
91       if (a->compressedrow.use) {
92         SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_ARG_WRONG, "ViennaCL: Cannot handle row compression for GPU matrices");
93       } else {
94 
95         if ((PetscInt)Agpu->size1() != m) SETERRQ2(PETSC_COMM_WORLD, PETSC_ERR_ARG_SIZ, "GPU matrix has %d rows, should be %d", Agpu->size1(), m);
96         a->nz           = Agpu->nnz();
97         a->maxnz        = a->nz; /* Since we allocate exactly the right amount */
98         A->preallocated = PETSC_TRUE;
99         if (a->singlemalloc) {
100           if (a->a) {ierr = PetscFree3(a->a,a->j,a->i);CHKERRQ(ierr);}
101         } else {
102           if (a->i) {ierr = PetscFree(a->i);CHKERRQ(ierr);}
103           if (a->j) {ierr = PetscFree(a->j);CHKERRQ(ierr);}
104           if (a->a) {ierr = PetscFree(a->a);CHKERRQ(ierr);}
105         }
106         ierr = PetscMalloc3(a->nz,PetscScalar,&a->a,a->nz,PetscInt,&a->j,m+1,PetscInt,&a->i);CHKERRQ(ierr);
107         ierr = PetscLogObjectMemory((PetscObject)A, a->nz*(sizeof(PetscScalar)+sizeof(PetscInt))+(m+1)*sizeof(PetscInt));CHKERRQ(ierr);
108 
109         a->singlemalloc = PETSC_TRUE;
110 
111         /* Setup row lengths */
112         if (a->imax) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
113         ierr = PetscMalloc2(m,PetscInt,&a->imax,m,PetscInt,&a->ilen);CHKERRQ(ierr);
114         ierr = PetscLogObjectMemory((PetscObject)A, 2*m*sizeof(PetscInt));CHKERRQ(ierr);
115 
116         /* Copy data back from GPU */
117         viennacl::backend::typesafe_host_array<unsigned int> row_buffer; row_buffer.raw_resize(Agpu->handle1(), Agpu->size1() + 1);
118 
119         // copy row array
120         viennacl::backend::memory_read(Agpu->handle1(), 0, row_buffer.raw_size(), row_buffer.get());
121         (a->i)[0] = row_buffer[0];
122         for (PetscInt i = 0; i < (PetscInt)Agpu->size1(); ++i) {
123           (a->i)[i+1] = row_buffer[i+1];
124           a->imax[i]  = a->ilen[i] = a->i[i+1] - a->i[i];  //Set imax[] and ilen[] arrays at the same time as i[] for better cache reuse
125         }
126 
127         // copy column indices
128         viennacl::backend::typesafe_host_array<unsigned int> col_buffer; col_buffer.raw_resize(Agpu->handle2(), Agpu->nnz());
129         viennacl::backend::memory_read(Agpu->handle2(), 0, col_buffer.raw_size(), col_buffer.get());
130         for (PetscInt i=0; i < (PetscInt)Agpu->nnz(); ++i)
131           (a->j)[i] = col_buffer[i];
132 
133         // copy nonzero entries directly to destination (no conversion required)
134         viennacl::backend::memory_read(Agpu->handle(), 0, sizeof(PetscScalar)*Agpu->nnz(), a->a);
135 
136         ViennaCLWaitForGPU();
137         /* TODO: Once a->diag is moved out of MatAssemblyEnd(), invalidate it here. */
138       }
139     } catch(std::exception const & ex) {
140       SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
141     }
142 
143     /* This assembly prevents resetting the flag to PETSC_VIENNACL_CPU and recopying */
144     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
145     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
146 
147     A->valid_GPU_matrix = PETSC_VIENNACL_BOTH;
148   } else {
149     SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_ARG_WRONG, "ViennaCL error: Only valid for unallocated GPU matrices");
150   }
151   PetscFunctionReturn(0);
152 }
153 
154 #undef __FUNCT__
155 #define __FUNCT__ "MatGetVecs_SeqAIJViennaCL"
156 PetscErrorCode MatGetVecs_SeqAIJViennaCL(Mat mat, Vec *right, Vec *left)
157 {
158   PetscErrorCode ierr;
159 
160   PetscFunctionBegin;
161   if (right) {
162     ierr = VecCreate(PetscObjectComm((PetscObject)mat),right);CHKERRQ(ierr);
163     ierr = VecSetSizes(*right,mat->cmap->n,PETSC_DETERMINE);CHKERRQ(ierr);
164     ierr = VecSetBlockSize(*right,mat->rmap->bs);CHKERRQ(ierr);
165     ierr = VecSetType(*right,VECSEQVIENNACL);CHKERRQ(ierr);
166     ierr = PetscLayoutReference(mat->cmap,&(*right)->map);CHKERRQ(ierr);
167   }
168   if (left) {
169     ierr = VecCreate(PetscObjectComm((PetscObject)mat),left);CHKERRQ(ierr);
170     ierr = VecSetSizes(*left,mat->rmap->n,PETSC_DETERMINE);CHKERRQ(ierr);
171     ierr = VecSetBlockSize(*left,mat->rmap->bs);CHKERRQ(ierr);
172     ierr = VecSetType(*left,VECSEQVIENNACL);CHKERRQ(ierr);
173     ierr = PetscLayoutReference(mat->rmap,&(*left)->map);CHKERRQ(ierr);
174   }
175   PetscFunctionReturn(0);
176 }
177 
178 #undef __FUNCT__
179 #define __FUNCT__ "MatMult_SeqAIJViennaCL"
180 PetscErrorCode MatMult_SeqAIJViennaCL(Mat A,Vec xx,Vec yy)
181 {
182   Mat_SeqAIJ           *a = (Mat_SeqAIJ*)A->data;
183   PetscErrorCode       ierr;
184   Mat_SeqAIJViennaCL   *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr;
185   const ViennaCLVector *xgpu=NULL;
186   ViennaCLVector       *ygpu=NULL;
187 
188   PetscFunctionBegin;
189   if (A->rmap->n > 0 && A->cmap->n > 0) {
190     ierr = VecViennaCLGetArrayRead(xx,&xgpu);CHKERRQ(ierr);
191     ierr = VecViennaCLGetArrayWrite(yy,&ygpu);CHKERRQ(ierr);
192     try {
193       *ygpu = viennacl::linalg::prod(*viennaclstruct->mat,*xgpu);
194       ViennaCLWaitForGPU();
195     } catch (std::exception const & ex) {
196       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
197     }
198     ierr = VecViennaCLRestoreArrayRead(xx,&xgpu);CHKERRQ(ierr);
199     ierr = VecViennaCLRestoreArrayWrite(yy,&ygpu);CHKERRQ(ierr);
200     ierr = PetscLogFlops(2.0*a->nz - viennaclstruct->mat->nnz());CHKERRQ(ierr);
201   }
202   PetscFunctionReturn(0);
203 }
204 
205 
206 
207 #undef __FUNCT__
208 #define __FUNCT__ "MatMultAdd_SeqAIJViennaCL"
209 PetscErrorCode MatMultAdd_SeqAIJViennaCL(Mat A,Vec xx,Vec yy,Vec zz)
210 {
211   Mat_SeqAIJ           *a = (Mat_SeqAIJ*)A->data;
212   PetscErrorCode       ierr;
213   Mat_SeqAIJViennaCL   *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr;
214   const ViennaCLVector *xgpu=NULL,*ygpu=NULL;
215   ViennaCLVector       *zgpu=NULL;
216 
217   PetscFunctionBegin;
218   if (A->rmap->n > 0 && A->cmap->n > 0) {
219     try {
220       ierr = VecViennaCLGetArrayRead(xx,&xgpu);CHKERRQ(ierr);
221       ierr = VecViennaCLGetArrayRead(yy,&ygpu);CHKERRQ(ierr);
222       ierr = VecViennaCLGetArrayWrite(zz,&zgpu);CHKERRQ(ierr);
223 
224       if (a->compressedrow.use) {
225           SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: Compressed CSR (only nonzero rows) not yet supported");
226       } else {
227         if (zz == xx || zz == yy) { //temporary required
228           ViennaCLVector temp = viennacl::linalg::prod(*viennaclstruct->mat, *xgpu);
229           *zgpu = *ygpu + temp;
230           ViennaCLWaitForGPU();
231         } else {
232           *zgpu = viennacl::linalg::prod(*viennaclstruct->mat, *xgpu);
233           *zgpu += *ygpu;
234           ViennaCLWaitForGPU();
235         }
236       }
237 
238       ierr = VecViennaCLRestoreArrayRead(xx,&xgpu);CHKERRQ(ierr);
239       ierr = VecViennaCLRestoreArrayRead(yy,&ygpu);CHKERRQ(ierr);
240       ierr = VecViennaCLRestoreArrayWrite(zz,&zgpu);CHKERRQ(ierr);
241 
242     } catch(std::exception const & ex) {
243       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
244     }
245     ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
246   }
247   PetscFunctionReturn(0);
248 }
249 
250 #undef __FUNCT__
251 #define __FUNCT__ "MatAssemblyEnd_SeqAIJViennaCL"
252 PetscErrorCode MatAssemblyEnd_SeqAIJViennaCL(Mat A,MatAssemblyType mode)
253 {
254   PetscErrorCode ierr;
255 
256   PetscFunctionBegin;
257   ierr = MatAssemblyEnd_SeqAIJ(A,mode);CHKERRQ(ierr);
258   ierr = MatViennaCLCopyToGPU(A);CHKERRQ(ierr);
259   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
260   A->ops->mult    = MatMult_SeqAIJViennaCL;
261   A->ops->multadd = MatMultAdd_SeqAIJViennaCL;
262   PetscFunctionReturn(0);
263 }
264 
265 /* --------------------------------------------------------------------------------*/
266 #undef __FUNCT__
267 #define __FUNCT__ "MatCreateSeqAIJViennaCL"
268 /*@
269    MatCreateSeqAIJViennaCL - Creates a sparse matrix in AIJ (compressed row) format
270    (the default parallel PETSc format).  This matrix will ultimately be pushed down
271    to GPUs and use the ViennaCL library for calculations. For good matrix
272    assembly performance the user should preallocate the matrix storage by setting
273    the parameter nz (or the array nnz).  By setting these parameters accurately,
274    performance during matrix assembly can be increased substantially.
275 
276 
277    Collective on MPI_Comm
278 
279    Input Parameters:
280 +  comm - MPI communicator, set to PETSC_COMM_SELF
281 .  m - number of rows
282 .  n - number of columns
283 .  nz - number of nonzeros per row (same for all rows)
284 -  nnz - array containing the number of nonzeros in the various rows
285          (possibly different for each row) or NULL
286 
287    Output Parameter:
288 .  A - the matrix
289 
290    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
291    MatXXXXSetPreallocation() paradigm instead of this routine directly.
292    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
293 
294    Notes:
295    If nnz is given then nz is ignored
296 
297    The AIJ format (also called the Yale sparse matrix format or
298    compressed row storage), is fully compatible with standard Fortran 77
299    storage.  That is, the stored row and column indices can begin at
300    either one (as in Fortran) or zero.  See the users' manual for details.
301 
302    Specify the preallocated storage with either nz or nnz (not both).
303    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
304    allocation.  For large problems you MUST preallocate memory or you
305    will get TERRIBLE performance, see the users' manual chapter on matrices.
306 
307    Level: intermediate
308 
309 .seealso: MatCreate(), MatCreateAIJ(), MatCreateAIJCUSP(), MatSetValues(), MatSeqAIJSetColumnIndices(), MatCreateSeqAIJWithArrays(), MatCreateAIJ()
310 
311 @*/
312 PetscErrorCode  MatCreateSeqAIJViennaCL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
313 {
314   PetscErrorCode ierr;
315 
316   PetscFunctionBegin;
317   ierr = MatCreate(comm,A);CHKERRQ(ierr);
318   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
319   ierr = MatSetType(*A,MATSEQAIJVIENNACL);CHKERRQ(ierr);
320   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,(PetscInt*)nnz);CHKERRQ(ierr);
321   PetscFunctionReturn(0);
322 }
323 
324 
325 #undef __FUNCT__
326 #define __FUNCT__ "MatDestroy_SeqAIJViennaCL"
327 PetscErrorCode MatDestroy_SeqAIJViennaCL(Mat A)
328 {
329   PetscErrorCode ierr;
330   Mat_SeqAIJViennaCL *viennaclcontainer = (Mat_SeqAIJViennaCL*)A->spptr;
331 
332   PetscFunctionBegin;
333   try {
334     if (A->valid_GPU_matrix != PETSC_VIENNACL_UNALLOCATED) delete (ViennaCLAIJMatrix*)(viennaclcontainer->mat);
335     delete viennaclcontainer;
336     A->valid_GPU_matrix = PETSC_VIENNACL_UNALLOCATED;
337   } catch(std::exception const & ex) {
338     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
339   }
340   /* this next line is because MatDestroy tries to PetscFree spptr if it is not zero, and PetscFree only works if the memory was allocated with PetscNew or PetscMalloc, which don't call the constructor */
341   A->spptr = 0;
342   ierr     = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
343   PetscFunctionReturn(0);
344 }
345 
346 
347 #undef __FUNCT__
348 #define __FUNCT__ "MatCreate_SeqAIJViennaCL"
349 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJViennaCL(Mat B)
350 {
351   PetscErrorCode ierr;
352   Mat_SeqAIJ     *aij;
353 
354   PetscFunctionBegin;
355   ierr            = MatCreate_SeqAIJ(B);CHKERRQ(ierr);
356   aij             = (Mat_SeqAIJ*)B->data;
357   aij->inode.use  = PETSC_FALSE;
358   B->ops->mult    = MatMult_SeqAIJViennaCL;
359   B->ops->multadd = MatMultAdd_SeqAIJViennaCL;
360   B->spptr        = new Mat_SeqAIJViennaCL();
361 
362   ((Mat_SeqAIJViennaCL*)B->spptr)->mat = 0;
363 
364   B->ops->assemblyend    = MatAssemblyEnd_SeqAIJViennaCL;
365   B->ops->destroy        = MatDestroy_SeqAIJViennaCL;
366   B->ops->getvecs        = MatGetVecs_SeqAIJViennaCL;
367 
368   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJVIENNACL);CHKERRQ(ierr);
369 
370   B->valid_GPU_matrix = PETSC_VIENNACL_UNALLOCATED;
371   PetscFunctionReturn(0);
372 }
373 
374 
375 /*M
376    MATSEQAIJVIENNACL - MATAIJVIENNACL = "aijviennacl" = "seqaijviennacl" - A matrix type to be used for sparse matrices.
377 
378    A matrix type type whose data resides on GPUs. These matrices are in CSR format by
379    default. All matrix calculations are performed using the ViennaCL library.
380 
381    Options Database Keys:
382 +  -mat_type aijviennacl - sets the matrix type to "seqaijviennacl" during a call to MatSetFromOptions()
383 .  -mat_viennacl_storage_format csr - sets the storage format of matrices for MatMult during a call to MatSetFromOptions().
384 -  -mat_viennacl_mult_storage_format csr - sets the storage format of matrices for MatMult during a call to MatSetFromOptions().
385 
386   Level: beginner
387 
388 .seealso: MatCreateSeqAIJViennaCL(), MATAIJVIENNACL, MatCreateAIJViennaCL()
389 M*/
390 
391