xref: /petsc/src/mat/impls/aij/seq/aijmkl/aijmkl.c (revision ff03dc5341f8e08c034bb25cd05c5e9387d3fbf2)
1 /*
2   Defines basic operations for the MATSEQAIJMKL matrix class.
3   This class is derived from the MATSEQAIJ class and retains the
4   compressed row storage (aka Yale sparse matrix format) but uses
5   sparse BLAS operations from the Intel Math Kernel Library (MKL)
6   wherever possible.
7 */
8 
9 #include <../src/mat/impls/aij/seq/aij.h>
10 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h>
11 
12 /* MKL include files. */
13 #include <mkl_spblas.h>  /* Sparse BLAS */
14 
15 typedef struct {
16   /* "Handle" used by SpMV2 inspector-executor routines. */
17   sparse_matrix_t csrA;
18 } Mat_SeqAIJMKL;
19 
20 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType);
21 
22 #undef __FUNCT__
23 #define __FUNCT__ "MatConvert_SeqAIJMKL_SeqAIJ"
24 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat)
25 {
26   /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */
27   /* so we will ignore 'MatType type'. */
28   PetscErrorCode ierr;
29   Mat            B       = *newmat;
30   Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
31 
32   PetscFunctionBegin;
33   if (reuse == MAT_INITIAL_MATRIX) {
34     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
35   }
36 
37   /* Reset the original function pointers. */
38   B->ops->duplicate        = MatDuplicate_SeqAIJ;
39   B->ops->assemblyend      = MatAssemblyEnd_SeqAIJ;
40   B->ops->destroy          = MatDestroy_SeqAIJ;
41   B->ops->mult             = MatMult_SeqAIJ;
42   B->ops->multtranspose    = MatMultTranspose_SeqAIJ;
43   B->ops->multadd          = MatMultAdd_SeqAIJ;
44   B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ;
45 
46   /* Free everything in the Mat_SeqAIJMKL data structure.
47    * We don't free the Mat_SeqAIJMKL struct itself, as this will
48    * cause problems later when MatDestroy() tries to free it. */
49   /* Actually there is nothing to do here right now.
50    * When I've added use of the MKL SpMV2 inspector-executor routines, I should
51    * see if there is some way to clean up the "handle" used by SpMV2. */
52 
53   /* Change the type of B to MATSEQAIJ. */
54   ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr);
55 
56   *newmat = B;
57   PetscFunctionReturn(0);
58 }
59 
60 #undef __FUNCT__
61 #define __FUNCT__ "MatDestroy_SeqAIJMKL"
62 PetscErrorCode MatDestroy_SeqAIJMKL(Mat A)
63 {
64   PetscErrorCode ierr;
65   Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr;
66 
67   PetscFunctionBegin;
68   /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */
69   mkl_sparse_destroy(aijmkl->csrA);
70   ierr = PetscFree(A->spptr);CHKERRQ(ierr);
71 
72   /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
73    * to destroy everything that remains. */
74   ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr);
75   /* Note that I don't call MatSetType().  I believe this is because that
76    * is only to be called when *building* a matrix.  I could be wrong, but
77    * that is how things work for the SuperLU matrix class. */
78   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
79   PetscFunctionReturn(0);
80 }
81 
82 #undef __FUNCT__
83 #define __FUNCT__ "MatDuplicate_SeqAIJMKL"
84 PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
85 {
86   PetscErrorCode ierr;
87   Mat_SeqAIJMKL *aijmkl      = (Mat_SeqAIJMKL*) A->spptr;
88   Mat_SeqAIJMKL *aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr;
89 
90   PetscFunctionBegin;
91   ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr);
92   ierr = PetscMemcpy((*M)->spptr,aijmkl,sizeof(Mat_SeqAIJMKL));CHKERRQ(ierr);
93   PetscFunctionReturn(0);
94 }
95 
96 #undef __FUNCT__
97 #define __FUNCT__ "MatAssemblyEnd_SeqAIJMKL"
98 PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode)
99 {
100   PetscErrorCode ierr;
101   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
102 
103   PetscFunctionBegin;
104   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
105 
106   /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some
107    * extra information and some different methods, call the AssemblyEnd
108    * routine for a MATSEQAIJ.
109    * I'm not sure if this is the best way to do this, but it avoids
110    * a lot of code duplication.
111    * I also note that currently MATSEQAIJMKL doesn't know anything about
112    * the Mat_CompressedRow data structure that SeqAIJ now uses when there
113    * are many zero rows.  If the SeqAIJ assembly end routine decides to use
114    * this, this may break things.  (Don't know... haven't looked at it.
115    * Do I need to disable this somehow?) */
116   a->inode.use = PETSC_FALSE;  /* Must disable: otherwise the MKL routines won't get used. */
117   ierr         = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr);
118 
119   PetscFunctionReturn(0);
120 }
121 
122 #undef __FUNCT__
123 #define __FUNCT__ "MatMult_SeqAIJMKL"
124 PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy)
125 {
126   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
127   const PetscScalar *x;
128   PetscScalar       *y;
129   const MatScalar   *aa;
130   PetscErrorCode    ierr;
131   PetscInt          m=A->rmap->n;
132   const PetscInt    *aj,*ai;
133   PetscInt          i;
134 
135   /* Variables not in MatMult_SeqAIJ. */
136   char transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
137 
138   PetscFunctionBegin;
139   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
140   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
141   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
142   aa   = a->a;  /* Nonzero elements stored row-by-row. */
143   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
144 
145   /* Call MKL sparse BLAS routine to do the MatMult. */
146   mkl_cspblas_xcsrgemv(&transa,&m,aa,ai,aj,x,y);
147 
148   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
149   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
150   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
151   PetscFunctionReturn(0);
152 }
153 
154 #undef __FUNCT__
155 #define __FUNCT__ "MatMultTranspose_SeqAIJMKL"
156 PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy)
157 {
158   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
159   const PetscScalar *x;
160   PetscScalar       *y;
161   const MatScalar   *aa;
162   PetscErrorCode    ierr;
163   PetscInt          m=A->rmap->n;
164   const PetscInt    *aj,*ai;
165   PetscInt          i;
166 
167   /* Variables not in MatMultTranspose_SeqAIJ. */
168   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
169 
170   PetscFunctionBegin;
171   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
172   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
173   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
174   aa   = a->a;  /* Nonzero elements stored row-by-row. */
175   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
176 
177   /* Call MKL sparse BLAS routine to do the MatMult. */
178   mkl_cspblas_xcsrgemv(&transa,&m,aa,ai,aj,x,y);
179 
180   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
181   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
182   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
183   PetscFunctionReturn(0);
184 }
185 
186 #undef __FUNCT__
187 #define __FUNCT__ "MatMultAdd_SeqAIJMKL"
188 PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
189 {
190   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
191   const PetscScalar *x;
192   PetscScalar       *y,*z;
193   const MatScalar   *aa;
194   PetscErrorCode    ierr;
195   PetscInt          m=A->rmap->n;
196   const PetscInt    *aj,*ai;
197   PetscInt          i;
198 
199   /* Variables not in MatMultAdd_SeqAIJ. */
200   char transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
201 
202   PetscFunctionBegin;
203   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
204   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
205   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
206   aa   = a->a;  /* Nonzero elements stored row-by-row. */
207   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
208 
209   /* Call MKL sparse BLAS routine to do the MatMult. */
210   mkl_cspblas_xcsrgemv(&transa,&m,aa,ai,aj,x,z);
211 
212   /* Now add the vector y; MKL sparse BLAS does not have a MatMultAdd equivalent. */
213   for (i=0; i<m; i++) {
214     z[i] += y[i];
215   }
216 
217   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
218   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
219   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
220   PetscFunctionReturn(0);
221 }
222 
223 #undef __FUNCT__
224 #define __FUNCT__ "MatMultTransposeAdd_SeqAIJMKL"
225 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
226 {
227   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
228   const PetscScalar *x;
229   PetscScalar       *y,*z;
230   const MatScalar   *aa;
231   PetscErrorCode    ierr;
232   PetscInt          m=A->rmap->n;
233   const PetscInt    *aj,*ai;
234   PetscInt          i;
235 
236   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
237   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
238 
239   PetscFunctionBegin;
240   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
241   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
242   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
243   aa   = a->a;  /* Nonzero elements stored row-by-row. */
244   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
245 
246   /* Call MKL sparse BLAS routine to do the MatMult. */
247   mkl_cspblas_xcsrgemv(&transa,&m,aa,ai,aj,x,z);
248 
249   /* Now add the vector y; MKL sparse BLAS does not have a MatMultAdd equivalent. */
250   for (i=0; i<m; i++) {
251     z[i] += y[i];
252   }
253 
254   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
255   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
256   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
257   PetscFunctionReturn(0);
258 }
259 
260 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a
261  * SeqAIJMKL matrix.  This routine is called by the MatCreate_SeqMKLAIJ()
262  * routine, but can also be used to convert an assembled SeqAIJ matrix
263  * into a SeqAIJMKL one. */
264 #undef __FUNCT__
265 #define __FUNCT__ "MatConvert_SeqAIJ_SeqAIJMKL"
266 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
267 {
268   PetscErrorCode ierr;
269   Mat            B = *newmat;
270   Mat_SeqAIJMKL *aijmkl;
271 
272   PetscFunctionBegin;
273   if (reuse == MAT_INITIAL_MATRIX) {
274     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
275   }
276 
277   ierr     = PetscNewLog(B,&aijmkl);CHKERRQ(ierr);
278   B->spptr = (void*) aijmkl;
279 
280   /* Set function pointers for methods that we inherit from AIJ but override. */
281   B->ops->duplicate        = MatDuplicate_SeqAIJMKL;
282   B->ops->assemblyend      = MatAssemblyEnd_SeqAIJMKL;
283   B->ops->destroy          = MatDestroy_SeqAIJMKL;
284   B->ops->mult             = MatMult_SeqAIJMKL;
285   B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL;
286   B->ops->multadd          = MatMultAdd_SeqAIJMKL;
287   B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL;
288 
289   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr);
290 
291   ierr    = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr);
292   *newmat = B;
293   PetscFunctionReturn(0);
294 }
295 
296 #undef __FUNCT__
297 #define __FUNCT__ "MatCreateSeqAIJMKL"
298 /*@C
299    MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL.
300    This type inherits from AIJ and is largely identical, but uses sparse BLAS
301    routines from Intel MKL whenever possible.
302    Collective on MPI_Comm
303 
304    Input Parameters:
305 +  comm - MPI communicator, set to PETSC_COMM_SELF
306 .  m - number of rows
307 .  n - number of columns
308 .  nz - number of nonzeros per row (same for all rows)
309 -  nnz - array containing the number of nonzeros in the various rows
310          (possibly different for each row) or NULL
311 
312    Output Parameter:
313 .  A - the matrix
314 
315    Notes:
316    If nnz is given then nz is ignored
317 
318    Level: intermediate
319 
320 .keywords: matrix, cray, sparse, parallel
321 
322 .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues()
323 @*/
324 PetscErrorCode  MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
325 {
326   PetscErrorCode ierr;
327 
328   PetscFunctionBegin;
329   ierr = MatCreate(comm,A);CHKERRQ(ierr);
330   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
331   ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr);
332   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr);
333   PetscFunctionReturn(0);
334 }
335 
336 #undef __FUNCT__
337 #define __FUNCT__ "MatCreate_SeqAIJMKL"
338 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A)
339 {
340   PetscErrorCode ierr;
341 
342   PetscFunctionBegin;
343   ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
344   ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
345   PetscFunctionReturn(0);
346 }
347 
348 
349