xref: /petsc/src/mat/impls/aij/seq/aijmkl/aijmkl.c (revision a84739b8579801b46f6cd1989a1193baab1d2400)
1 /*
2   Defines basic operations for the MATSEQAIJMKL matrix class.
3   This class is derived from the MATSEQAIJ class and retains the
4   compressed row storage (aka Yale sparse matrix format) but uses
5   sparse BLAS operations from the Intel Math Kernel Library (MKL)
6   wherever possible.
7 */
8 
9 #include <../src/mat/impls/aij/seq/aij.h>
10 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h>
11 
12 /* MKL include files. */
13 #include <mkl_spblas.h>  /* Sparse BLAS */
14 
15 typedef struct {
16   /* "Handle" used by SpMV2 inspector-executor routines. */
17   sparse_matrix_t csrA;
18 } Mat_SeqAIJMKL;
19 
20 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType);
21 
22 #undef __FUNCT__
23 #define __FUNCT__ "MatConvert_SeqAIJMKL_SeqAIJ"
24 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat)
25 {
26   /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */
27   /* so we will ignore 'MatType type'. */
28   PetscErrorCode ierr;
29   Mat            B       = *newmat;
30   Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
31 
32   PetscFunctionBegin;
33   if (reuse == MAT_INITIAL_MATRIX) {
34     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
35   }
36 
37   /* Reset the original function pointers. */
38   B->ops->duplicate        = MatDuplicate_SeqAIJ;
39   B->ops->assemblyend      = MatAssemblyEnd_SeqAIJ;
40   B->ops->destroy          = MatDestroy_SeqAIJ;
41   B->ops->mult             = MatMult_SeqAIJ;
42   B->ops->multtranspose    = MatMultTranspose_SeqAIJ;
43   B->ops->multadd          = MatMultAdd_SeqAIJ;
44   B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ;
45 
46   /* Free everything in the Mat_SeqAIJMKL data structure.
47    * We don't free the Mat_SeqAIJMKL struct itself, as this will
48    * cause problems later when MatDestroy() tries to free it. */
49   /* Actually there is nothing to do here right now.
50    * When I've added use of the MKL SpMV2 inspector-executor routines, I should
51    * see if there is some way to clean up the "handle" used by SpMV2. */
52 
53   /* Change the type of B to MATSEQAIJ. */
54   ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr);
55 
56   *newmat = B;
57   PetscFunctionReturn(0);
58 }
59 
60 #undef __FUNCT__
61 #define __FUNCT__ "MatDestroy_SeqAIJMKL"
62 PetscErrorCode MatDestroy_SeqAIJMKL(Mat A)
63 {
64   PetscErrorCode ierr;
65   Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr;
66 
67   PetscFunctionBegin;
68   /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */
69   mkl_sparse_destroy(aijmkl->csrA);
70   ierr = PetscFree(A->spptr);CHKERRQ(ierr);
71 
72   /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
73    * to destroy everything that remains. */
74   ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr);
75   /* Note that I don't call MatSetType().  I believe this is because that
76    * is only to be called when *building* a matrix.  I could be wrong, but
77    * that is how things work for the SuperLU matrix class. */
78   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
79   PetscFunctionReturn(0);
80 }
81 
82 #undef __FUNCT__
83 #define __FUNCT__ "MatDuplicate_SeqAIJMKL"
84 PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
85 {
86   PetscErrorCode ierr;
87   Mat_SeqAIJMKL *aijmkl      = (Mat_SeqAIJMKL*) A->spptr;
88   Mat_SeqAIJMKL *aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr;
89 
90   PetscFunctionBegin;
91   ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr);
92   ierr = PetscMemcpy((*M)->spptr,aijmkl,sizeof(Mat_SeqAIJMKL));CHKERRQ(ierr);
93   PetscFunctionReturn(0);
94 }
95 
96 #undef __FUNCT__
97 #define __FUNCT__ "MatAssemblyEnd_SeqAIJMKL"
98 PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode)
99 {
100   PetscErrorCode ierr;
101   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
102 
103   PetscFunctionBegin;
104   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
105 
106   /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some
107    * extra information and some different methods, call the AssemblyEnd
108    * routine for a MATSEQAIJ.
109    * I'm not sure if this is the best way to do this, but it avoids
110    * a lot of code duplication.
111    * I also note that currently MATSEQAIJMKL doesn't know anything about
112    * the Mat_CompressedRow data structure that SeqAIJ now uses when there
113    * are many zero rows.  If the SeqAIJ assembly end routine decides to use
114    * this, this may break things.  (Don't know... haven't looked at it.
115    * Do I need to disable this somehow?) */
116   a->inode.use = PETSC_FALSE;  /* Must disable: otherwise the MKL routines won't get used. */
117   ierr         = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr);
118 
119   PetscFunctionReturn(0);
120 }
121 
122 #undef __FUNCT__
123 #define __FUNCT__ "MatMult_SeqAIJMKL"
124 PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy)
125 {
126   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
127   const PetscScalar *x;
128   PetscScalar       *y;
129   const MatScalar   *aa;
130   PetscErrorCode    ierr;
131   PetscInt          m=A->rmap->n;
132   const PetscInt    *aj,*ai;
133   PetscInt          i;
134 
135   /* Variables not in MatMult_SeqAIJ. */
136   char transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
137 
138   PetscFunctionBegin;
139   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
140   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
141   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
142   aa   = a->a;  /* Nonzero elements stored row-by-row. */
143   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
144 
145   /* Call MKL sparse BLAS routine to do the MatMult. */
146   mkl_cspblas_xcsrgemv(&transa,&m,aa,ai,aj,x,y);
147 
148   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
149   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
150   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
151   PetscFunctionReturn(0);
152 }
153 
154 #undef __FUNCT__
155 #define __FUNCT__ "MatMultTranspose_SeqAIJMKL"
156 PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy)
157 {
158   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
159   const PetscScalar *x;
160   PetscScalar       *y;
161   const MatScalar   *aa;
162   PetscErrorCode    ierr;
163   PetscInt          m=A->rmap->n;
164   const PetscInt    *aj,*ai;
165   PetscInt          i;
166 
167   /* Variables not in MatMultTranspose_SeqAIJ. */
168   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
169 
170   PetscFunctionBegin;
171   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
172   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
173   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
174   aa   = a->a;  /* Nonzero elements stored row-by-row. */
175   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
176 
177   /* Call MKL sparse BLAS routine to do the MatMult. */
178   mkl_cspblas_xcsrgemv(&transa,&m,aa,ai,aj,x,y);
179 
180   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
181   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
182   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
183   PetscFunctionReturn(0);
184 }
185 
186 #undef __FUNCT__
187 #define __FUNCT__ "MatMultAdd_SeqAIJMKL"
188 PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
189 {
190   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
191   const PetscScalar *x;
192   PetscScalar       *y,*z;
193   const MatScalar   *aa;
194   PetscErrorCode    ierr;
195   PetscInt          m=A->rmap->n;
196   const PetscInt    *aj,*ai;
197   PetscInt          i;
198 
199   /* Variables not in MatMultAdd_SeqAIJ. */
200   char transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
201   PetscScalar       alpha = 1.0;
202   PetscScalar       beta = 1.0;
203   char              matdescra[6];
204 
205   PetscFunctionBegin;
206   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
207   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
208 
209   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
210   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
211   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
212   aa   = a->a;  /* Nonzero elements stored row-by-row. */
213   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
214 
215   /* Call MKL sparse BLAS routine to do the MatMult. */
216   if (zz == yy) {
217     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
218     mkl_xcsrmv(&transa,&m,&m,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
219   } else {
220     /* zz and yy are different vectors, so we call mkl_cspblas_xcsrgemv(), which calculates y = A*x, and then
221      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
222     mkl_cspblas_xcsrgemv(&transa,&m,aa,ai,aj,x,z);
223     for (i=0; i<m; i++) {
224       z[i] += y[i];
225     }
226   }
227 
228   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
229   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
230   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
231   PetscFunctionReturn(0);
232 }
233 
234 #undef __FUNCT__
235 #define __FUNCT__ "MatMultTransposeAdd_SeqAIJMKL"
236 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
237 {
238   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
239   const PetscScalar *x;
240   PetscScalar       *y,*z;
241   const MatScalar   *aa;
242   PetscErrorCode    ierr;
243   PetscInt          m=A->rmap->n;
244   const PetscInt    *aj,*ai;
245   PetscInt          i;
246 
247   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
248   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
249   PetscScalar       alpha = 1.0;
250   PetscScalar       beta = 1.0;
251   char              matdescra[6];
252 
253   PetscFunctionBegin;
254   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
255   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
256 
257   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
258   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
259   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
260   aa   = a->a;  /* Nonzero elements stored row-by-row. */
261   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
262 
263   /* Call MKL sparse BLAS routine to do the MatMult. */
264   if (zz == yy) {
265     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
266     mkl_xcsrmv(&transa,&m,&m,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
267   } else {
268     /* zz and yy are different vectors, so we call mkl_cspblas_xcsrgemv(), which calculates y = A*x, and then
269      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
270     mkl_cspblas_xcsrgemv(&transa,&m,aa,ai,aj,x,z);
271     for (i=0; i<m; i++) {
272       z[i] += y[i];
273     }
274   }
275 
276   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
277   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
278   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
279   PetscFunctionReturn(0);
280 }
281 
282 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a
283  * SeqAIJMKL matrix.  This routine is called by the MatCreate_SeqMKLAIJ()
284  * routine, but can also be used to convert an assembled SeqAIJ matrix
285  * into a SeqAIJMKL one. */
286 #undef __FUNCT__
287 #define __FUNCT__ "MatConvert_SeqAIJ_SeqAIJMKL"
288 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
289 {
290   PetscErrorCode ierr;
291   Mat            B = *newmat;
292   Mat_SeqAIJMKL *aijmkl;
293 
294   PetscFunctionBegin;
295   if (reuse == MAT_INITIAL_MATRIX) {
296     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
297   }
298 
299   ierr     = PetscNewLog(B,&aijmkl);CHKERRQ(ierr);
300   B->spptr = (void*) aijmkl;
301 
302   /* Set function pointers for methods that we inherit from AIJ but override. */
303   B->ops->duplicate        = MatDuplicate_SeqAIJMKL;
304   B->ops->assemblyend      = MatAssemblyEnd_SeqAIJMKL;
305   B->ops->destroy          = MatDestroy_SeqAIJMKL;
306   B->ops->mult             = MatMult_SeqAIJMKL;
307   B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL;
308   B->ops->multadd          = MatMultAdd_SeqAIJMKL;
309   B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL;
310 
311   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr);
312 
313   ierr    = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr);
314   *newmat = B;
315   PetscFunctionReturn(0);
316 }
317 
318 #undef __FUNCT__
319 #define __FUNCT__ "MatCreateSeqAIJMKL"
320 /*@C
321    MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL.
322    This type inherits from AIJ and is largely identical, but uses sparse BLAS
323    routines from Intel MKL whenever possible.
324    Collective on MPI_Comm
325 
326    Input Parameters:
327 +  comm - MPI communicator, set to PETSC_COMM_SELF
328 .  m - number of rows
329 .  n - number of columns
330 .  nz - number of nonzeros per row (same for all rows)
331 -  nnz - array containing the number of nonzeros in the various rows
332          (possibly different for each row) or NULL
333 
334    Output Parameter:
335 .  A - the matrix
336 
337    Notes:
338    If nnz is given then nz is ignored
339 
340    Level: intermediate
341 
342 .keywords: matrix, cray, sparse, parallel
343 
344 .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues()
345 @*/
346 PetscErrorCode  MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
347 {
348   PetscErrorCode ierr;
349 
350   PetscFunctionBegin;
351   ierr = MatCreate(comm,A);CHKERRQ(ierr);
352   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
353   ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr);
354   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr);
355   PetscFunctionReturn(0);
356 }
357 
358 #undef __FUNCT__
359 #define __FUNCT__ "MatCreate_SeqAIJMKL"
360 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A)
361 {
362   PetscErrorCode ierr;
363 
364   PetscFunctionBegin;
365   ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
366   ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
367   PetscFunctionReturn(0);
368 }
369 
370 
371