xref: /petsc/src/mat/impls/aij/seq/aijmkl/aijmkl.c (revision bb42e86ab3e4720245217a7cdcd02146569f06da)
1 /*
2   Defines basic operations for the MATSEQAIJMKL matrix class.
3   This class is derived from the MATSEQAIJ class and retains the
4   compressed row storage (aka Yale sparse matrix format) but uses
5   sparse BLAS operations from the Intel Math Kernel Library (MKL)
6   wherever possible.
7 */
8 
9 #include <../src/mat/impls/aij/seq/aij.h>
10 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h>
11 
12 /* MKL include files. */
13 #include <mkl_spblas.h>  /* Sparse BLAS */
14 
15 typedef struct {
16   PetscBool no_SpMV2;  /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */
17   PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
18 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
19   sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */
20   struct matrix_descr descr;
21 #endif
22 } Mat_SeqAIJMKL;
23 
24 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType);
25 
26 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat)
27 {
28   /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */
29   /* so we will ignore 'MatType type'. */
30   PetscErrorCode ierr;
31   Mat            B       = *newmat;
32 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
33   Mat_SeqAIJMKL  *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
34 #endif
35 
36   PetscFunctionBegin;
37   if (reuse == MAT_INITIAL_MATRIX) {
38     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
39   }
40 
41   /* Reset the original function pointers. */
42   B->ops->duplicate        = MatDuplicate_SeqAIJ;
43   B->ops->assemblyend      = MatAssemblyEnd_SeqAIJ;
44   B->ops->destroy          = MatDestroy_SeqAIJ;
45   B->ops->mult             = MatMult_SeqAIJ;
46   B->ops->multtranspose    = MatMultTranspose_SeqAIJ;
47   B->ops->multadd          = MatMultAdd_SeqAIJ;
48   B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ;
49   B->ops->scale            = MatScale_SeqAIJ;
50   B->ops->diagonalscale    = MatDiagonalScale_SeqAIJ;
51   B->ops->diagonalset      = MatDiagonalSet_SeqAIJ;
52   B->ops->axpy             = MatAXPY_SeqAIJ;
53 
54   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);CHKERRQ(ierr);
55   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr);
56   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr);
57   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr);
58 
59   /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this
60    * simply involves destroying the MKL sparse matrix handle and then freeing
61    * the spptr pointer. */
62 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
63   if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr;
64 
65   if (aijmkl->sparse_optimized) {
66     sparse_status_t stat;
67     stat = mkl_sparse_destroy(aijmkl->csrA);
68     if (stat != SPARSE_STATUS_SUCCESS) {
69       PetscFunctionReturn(PETSC_ERR_LIB);
70     }
71   }
72 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
73   ierr = PetscFree(B->spptr);CHKERRQ(ierr);
74 
75   /* Change the type of B to MATSEQAIJ. */
76   ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr);
77 
78   *newmat = B;
79   PetscFunctionReturn(0);
80 }
81 
82 PetscErrorCode MatDestroy_SeqAIJMKL(Mat A)
83 {
84   PetscErrorCode ierr;
85   Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr;
86 
87   PetscFunctionBegin;
88 
89   /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an
90    * spptr pointer. */
91   if (aijmkl) {
92     /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */
93 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
94     if (aijmkl->sparse_optimized) {
95       sparse_status_t stat = SPARSE_STATUS_SUCCESS;
96       stat = mkl_sparse_destroy(aijmkl->csrA);
97       if (stat != SPARSE_STATUS_SUCCESS) {
98         PetscFunctionReturn(PETSC_ERR_LIB);
99       }
100     }
101 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
102     ierr = PetscFree(A->spptr);CHKERRQ(ierr);
103   }
104 
105   /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
106    * to destroy everything that remains. */
107   ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr);
108   /* Note that I don't call MatSetType().  I believe this is because that
109    * is only to be called when *building* a matrix.  I could be wrong, but
110    * that is how things work for the SuperLU matrix class. */
111   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
112   PetscFunctionReturn(0);
113 }
114 
115 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A)
116 {
117 #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
118   /* If the MKL library does not have mkl_sparse_optimize(), then this routine
119    * does nothing. We make it callable anyway in this case because it cuts
120    * down on littering the code with #ifdefs. */
121   PetscFunctionReturn(0);
122 #else
123   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
124   Mat_SeqAIJMKL   *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
125   PetscInt        m,n;
126   MatScalar       *aa;
127   PetscInt        *aj,*ai;
128   sparse_status_t stat;
129 
130   PetscFunctionBegin;
131   if (aijmkl->no_SpMV2) PetscFunctionReturn(0);
132 
133   if (aijmkl->sparse_optimized) {
134     /* Matrix has been previously assembled and optimized. Must destroy old
135      * matrix handle before running the optimization step again. */
136     stat = mkl_sparse_destroy(aijmkl->csrA);
137     if (stat != SPARSE_STATUS_SUCCESS) {
138       PetscFunctionReturn(PETSC_ERR_LIB);
139     }
140   }
141   aijmkl->sparse_optimized = PETSC_FALSE;
142 
143   /* Now perform the SpMV2 setup and matrix optimization. */
144   aijmkl->descr.type        = SPARSE_MATRIX_TYPE_GENERAL;
145   aijmkl->descr.mode        = SPARSE_FILL_MODE_LOWER;
146   aijmkl->descr.diag        = SPARSE_DIAG_NON_UNIT;
147   m = A->rmap->n;
148   n = A->cmap->n;
149   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
150   aa   = a->a;  /* Nonzero elements stored row-by-row. */
151   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
152   if ((a->nz!=0) & !(A->structure_only)) {
153     /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
154      * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
155     stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa);
156     stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
157     stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
158     stat = mkl_sparse_optimize(aijmkl->csrA);
159     if (stat != SPARSE_STATUS_SUCCESS) {
160       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle/complete mkl_sparse_optimize");
161       PetscFunctionReturn(PETSC_ERR_LIB);
162     }
163     aijmkl->sparse_optimized = PETSC_TRUE;
164   }
165 
166   PetscFunctionReturn(0);
167 #endif
168 }
169 
170 PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
171 {
172   PetscErrorCode ierr;
173   Mat_SeqAIJMKL *aijmkl;
174   Mat_SeqAIJMKL *aijmkl_dest;
175 
176   PetscFunctionBegin;
177   ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr);
178   aijmkl      = (Mat_SeqAIJMKL*) A->spptr;
179   aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr;
180   ierr = PetscMemcpy(aijmkl_dest,aijmkl,sizeof(Mat_SeqAIJMKL));CHKERRQ(ierr);
181   aijmkl_dest->sparse_optimized = PETSC_FALSE;
182   ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
183   PetscFunctionReturn(0);
184 }
185 
186 PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode)
187 {
188   PetscErrorCode  ierr;
189   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
190 
191   PetscFunctionBegin;
192   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
193 
194   /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some
195    * extra information and some different methods, call the AssemblyEnd
196    * routine for a MATSEQAIJ.
197    * I'm not sure if this is the best way to do this, but it avoids
198    * a lot of code duplication. */
199   a->inode.use = PETSC_FALSE;  /* Must disable: otherwise the MKL routines won't get used. */
200   ierr         = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr);
201 
202   /* Now create the MKL sparse handle (if needed; the function checks). */
203   ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
204 
205   PetscFunctionReturn(0);
206 }
207 
208 PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy)
209 {
210   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
211   const PetscScalar *x;
212   PetscScalar       *y;
213   const MatScalar   *aa;
214   PetscErrorCode    ierr;
215   PetscInt          m=A->rmap->n;
216   PetscInt          n=A->cmap->n;
217   PetscScalar       alpha = 1.0;
218   PetscScalar       beta = 0.0;
219   const PetscInt    *aj,*ai;
220   char              matdescra[6];
221 
222 
223   /* Variables not in MatMult_SeqAIJ. */
224   char transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
225 
226   PetscFunctionBegin;
227   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
228   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
229   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
230   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
231   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
232   aa   = a->a;  /* Nonzero elements stored row-by-row. */
233   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
234 
235   /* Call MKL sparse BLAS routine to do the MatMult. */
236   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
237 
238   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
239   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
240   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
241   PetscFunctionReturn(0);
242 }
243 
244 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
245 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
246 {
247   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
248   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
249   const PetscScalar *x;
250   PetscScalar       *y;
251   PetscErrorCode    ierr;
252   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
253 
254   PetscFunctionBegin;
255 
256   /* If there are no nonzero entries, zero yy and return immediately. */
257   if(!a->nz) {
258     PetscInt i;
259     PetscInt m=A->rmap->n;
260     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
261     for (i=0; i<m; i++) {
262       y[i] = 0.0;
263     }
264     ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
265     PetscFunctionReturn(0);
266   }
267 
268   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
269   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
270 
271   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
272    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
273    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
274   if (!aijmkl->sparse_optimized) {
275     MatSeqAIJMKL_create_mkl_handle(A);
276   }
277 
278   /* Call MKL SpMV2 executor routine to do the MatMult. */
279   stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
280 
281   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
282   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
283   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
284   if (stat != SPARSE_STATUS_SUCCESS) {
285     PetscFunctionReturn(PETSC_ERR_LIB);
286   }
287   PetscFunctionReturn(0);
288 }
289 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
290 
291 PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy)
292 {
293   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
294   const PetscScalar *x;
295   PetscScalar       *y;
296   const MatScalar   *aa;
297   PetscErrorCode    ierr;
298   PetscInt          m=A->rmap->n;
299   PetscInt          n=A->cmap->n;
300   PetscScalar       alpha = 1.0;
301   PetscScalar       beta = 0.0;
302   const PetscInt    *aj,*ai;
303   char              matdescra[6];
304 
305   /* Variables not in MatMultTranspose_SeqAIJ. */
306   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
307 
308   PetscFunctionBegin;
309   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
310   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
311   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
312   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
313   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
314   aa   = a->a;  /* Nonzero elements stored row-by-row. */
315   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
316 
317   /* Call MKL sparse BLAS routine to do the MatMult. */
318   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
319 
320   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
321   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
322   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
323   PetscFunctionReturn(0);
324 }
325 
326 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
327 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
328 {
329   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
330   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
331   const PetscScalar *x;
332   PetscScalar       *y;
333   PetscErrorCode    ierr;
334   sparse_status_t   stat;
335 
336   PetscFunctionBegin;
337 
338   /* If there are no nonzero entries, zero yy and return immediately. */
339   if(!a->nz) {
340     PetscInt i;
341     PetscInt n=A->cmap->n;
342     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
343     for (i=0; i<n; i++) {
344       y[i] = 0.0;
345     }
346     ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
347     PetscFunctionReturn(0);
348   }
349 
350   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
351   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
352 
353   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
354    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
355    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
356   if (!aijmkl->sparse_optimized) {
357     MatSeqAIJMKL_create_mkl_handle(A);
358   }
359 
360   /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
361   stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
362 
363   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
364   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
365   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
366   if (stat != SPARSE_STATUS_SUCCESS) {
367     PetscFunctionReturn(PETSC_ERR_LIB);
368   }
369   PetscFunctionReturn(0);
370 }
371 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
372 
373 PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
374 {
375   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
376   const PetscScalar *x;
377   PetscScalar       *y,*z;
378   const MatScalar   *aa;
379   PetscErrorCode    ierr;
380   PetscInt          m=A->rmap->n;
381   PetscInt          n=A->cmap->n;
382   const PetscInt    *aj,*ai;
383   PetscInt          i;
384 
385   /* Variables not in MatMultAdd_SeqAIJ. */
386   char transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
387   PetscScalar       alpha = 1.0;
388   PetscScalar       beta;
389   char              matdescra[6];
390 
391   PetscFunctionBegin;
392   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
393   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
394 
395   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
396   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
397   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
398   aa   = a->a;  /* Nonzero elements stored row-by-row. */
399   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
400 
401   /* Call MKL sparse BLAS routine to do the MatMult. */
402   if (zz == yy) {
403     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
404     beta = 1.0;
405     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
406   } else {
407     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
408      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
409     beta = 0.0;
410     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
411     for (i=0; i<m; i++) {
412       z[i] += y[i];
413     }
414   }
415 
416   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
417   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
418   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
419   PetscFunctionReturn(0);
420 }
421 
422 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
423 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
424 {
425   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
426   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
427   const PetscScalar *x;
428   PetscScalar       *y,*z;
429   PetscErrorCode    ierr;
430   PetscInt          m=A->rmap->n;
431   PetscInt          i;
432 
433   /* Variables not in MatMultAdd_SeqAIJ. */
434   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
435 
436   PetscFunctionBegin;
437 
438   /* If there are no nonzero entries, set zz = yy and return immediately. */
439   if(!a->nz) {
440     PetscInt i;
441     ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
442     for (i=0; i<m; i++) {
443       z[i] = y[i];
444     }
445     ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
446     PetscFunctionReturn(0);
447   }
448 
449   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
450   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
451 
452   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
453    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
454    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
455   if (!aijmkl->sparse_optimized) {
456     MatSeqAIJMKL_create_mkl_handle(A);
457   }
458 
459   /* Call MKL sparse BLAS routine to do the MatMult. */
460   if (zz == yy) {
461     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
462      * with alpha and beta both set to 1.0. */
463     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
464   } else {
465     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
466      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
467     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
468     for (i=0; i<m; i++) {
469       z[i] += y[i];
470     }
471   }
472 
473   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
474   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
475   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
476   if (stat != SPARSE_STATUS_SUCCESS) {
477     PetscFunctionReturn(PETSC_ERR_LIB);
478   }
479   PetscFunctionReturn(0);
480 }
481 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
482 
483 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
484 {
485   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
486   const PetscScalar *x;
487   PetscScalar       *y,*z;
488   const MatScalar   *aa;
489   PetscErrorCode    ierr;
490   PetscInt          m=A->rmap->n;
491   PetscInt          n=A->cmap->n;
492   const PetscInt    *aj,*ai;
493   PetscInt          i;
494 
495   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
496   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
497   PetscScalar       alpha = 1.0;
498   PetscScalar       beta;
499   char              matdescra[6];
500 
501   PetscFunctionBegin;
502   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
503   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
504 
505   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
506   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
507   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
508   aa   = a->a;  /* Nonzero elements stored row-by-row. */
509   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
510 
511   /* Call MKL sparse BLAS routine to do the MatMult. */
512   if (zz == yy) {
513     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
514     beta = 1.0;
515     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
516   } else {
517     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
518      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
519     beta = 0.0;
520     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
521     for (i=0; i<n; i++) {
522       z[i] += y[i];
523     }
524   }
525 
526   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
527   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
528   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
529   PetscFunctionReturn(0);
530 }
531 
532 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
533 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
534 {
535   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
536   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
537   const PetscScalar *x;
538   PetscScalar       *y,*z;
539   PetscErrorCode    ierr;
540   PetscInt          n=A->cmap->n;
541   PetscInt          i;
542 
543   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
544   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
545 
546   PetscFunctionBegin;
547 
548   /* If there are no nonzero entries, set zz = yy and return immediately. */
549   if(!a->nz) {
550     PetscInt i;
551     ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
552     for (i=0; i<n; i++) {
553       z[i] = y[i];
554     }
555     ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
556     PetscFunctionReturn(0);
557   }
558 
559   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
560   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
561 
562   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
563    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
564    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
565   if (!aijmkl->sparse_optimized) {
566     MatSeqAIJMKL_create_mkl_handle(A);
567   }
568 
569   /* Call MKL sparse BLAS routine to do the MatMult. */
570   if (zz == yy) {
571     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
572      * with alpha and beta both set to 1.0. */
573     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
574   } else {
575     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
576      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
577     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
578     for (i=0; i<n; i++) {
579       z[i] += y[i];
580     }
581   }
582 
583   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
584   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
585   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
586   if (stat != SPARSE_STATUS_SUCCESS) {
587     PetscFunctionReturn(PETSC_ERR_LIB);
588   }
589   PetscFunctionReturn(0);
590 }
591 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
592 
593 PetscErrorCode MatScale_SeqAIJMKL(Mat inA,PetscScalar alpha)
594 {
595   PetscErrorCode ierr;
596 
597   PetscFunctionBegin;
598   ierr = MatScale_SeqAIJ(inA,alpha);CHKERRQ(ierr);
599   ierr = MatSeqAIJMKL_create_mkl_handle(inA);CHKERRQ(ierr);
600   PetscFunctionReturn(0);
601 }
602 
603 PetscErrorCode MatDiagonalScale_SeqAIJMKL(Mat A,Vec ll,Vec rr)
604 {
605   PetscErrorCode ierr;
606 
607   PetscFunctionBegin;
608   ierr = MatDiagonalScale_SeqAIJ(A,ll,rr);CHKERRQ(ierr);
609   ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
610   PetscFunctionReturn(0);
611 }
612 
613 PetscErrorCode MatDiagonalSet_SeqAIJMKL(Mat Y,Vec D,InsertMode is)
614 {
615   PetscErrorCode ierr;
616 
617   PetscFunctionBegin;
618   ierr = MatDiagonalSet_SeqAIJ(Y,D,is);CHKERRQ(ierr);
619   ierr = MatSeqAIJMKL_create_mkl_handle(Y);CHKERRQ(ierr);
620   PetscFunctionReturn(0);
621 }
622 
623 PetscErrorCode MatAXPY_SeqAIJMKL(Mat Y,PetscScalar a,Mat X,MatStructure str)
624 {
625   PetscErrorCode ierr;
626 
627   PetscFunctionBegin;
628   ierr = MatAXPY_SeqAIJ(Y,a,X,str);CHKERRQ(ierr);
629   if (str == SAME_NONZERO_PATTERN) {
630     /* MatAssemblyEnd() is not called if SAME_NONZERO_PATTERN, so we need to force update of the MKL matrix handle. */
631     ierr = MatSeqAIJMKL_create_mkl_handle(Y);CHKERRQ(ierr);
632   }
633   PetscFunctionReturn(0);
634 }
635 
636 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a
637  * SeqAIJMKL matrix.  This routine is called by the MatCreate_SeqMKLAIJ()
638  * routine, but can also be used to convert an assembled SeqAIJ matrix
639  * into a SeqAIJMKL one. */
640 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
641 {
642   PetscErrorCode ierr;
643   Mat            B = *newmat;
644   Mat_SeqAIJMKL  *aijmkl;
645   PetscBool      set;
646   PetscBool      sametype;
647 
648   PetscFunctionBegin;
649   if (reuse == MAT_INITIAL_MATRIX) {
650     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
651   }
652 
653   ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr);
654   if (sametype) PetscFunctionReturn(0);
655 
656   ierr     = PetscNewLog(B,&aijmkl);CHKERRQ(ierr);
657   B->spptr = (void*) aijmkl;
658 
659   /* Set function pointers for methods that we inherit from AIJ but override.
660    * We also parse some command line options below, since those determine some of the methods we point to. */
661   B->ops->duplicate        = MatDuplicate_SeqAIJMKL;
662   B->ops->assemblyend      = MatAssemblyEnd_SeqAIJMKL;
663   B->ops->destroy          = MatDestroy_SeqAIJMKL;
664 
665   aijmkl->sparse_optimized = PETSC_FALSE;
666 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
667   aijmkl->no_SpMV2 = PETSC_FALSE;  /* Default to using the SpMV2 routines if our MKL supports them. */
668 #else
669   aijmkl->no_SpMV2 = PETSC_TRUE;
670 #endif
671 
672   /* Parse command line options. */
673   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");CHKERRQ(ierr);
674   ierr = PetscOptionsBool("-mat_aijmkl_no_spmv2","NoSPMV2","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);CHKERRQ(ierr);
675   ierr = PetscOptionsEnd();CHKERRQ(ierr);
676 #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
677   if(!aijmkl->no_SpMV2) {
678     ierr = PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize();  defaulting to non-SpMV2 routines.\n");
679     aijmkl->no_SpMV2 = PETSC_TRUE;
680   }
681 #endif
682 
683   if(!aijmkl->no_SpMV2) {
684 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
685     B->ops->mult             = MatMult_SeqAIJMKL_SpMV2;
686     B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL_SpMV2;
687     B->ops->multadd          = MatMultAdd_SeqAIJMKL_SpMV2;
688     B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2;
689 #endif
690   } else {
691     B->ops->mult             = MatMult_SeqAIJMKL;
692     B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL;
693     B->ops->multadd          = MatMultAdd_SeqAIJMKL;
694     B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL;
695   }
696 
697   B->ops->scale              = MatScale_SeqAIJMKL;
698   B->ops->diagonalscale      = MatDiagonalScale_SeqAIJMKL;
699   B->ops->diagonalset        = MatDiagonalSet_SeqAIJMKL;
700   B->ops->axpy               = MatAXPY_SeqAIJMKL;
701 
702   ierr = PetscObjectComposeFunction((PetscObject)B,"MatScale_SeqAIJMKL_C",MatScale_SeqAIJMKL);CHKERRQ(ierr);
703   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr);
704   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",MatMatMult_SeqDense_SeqAIJ);CHKERRQ(ierr);
705   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",MatMatMultSymbolic_SeqDense_SeqAIJ);CHKERRQ(ierr);
706   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",MatMatMultNumeric_SeqDense_SeqAIJ);CHKERRQ(ierr);
707 
708   ierr    = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr);
709   *newmat = B;
710   PetscFunctionReturn(0);
711 }
712 
713 /*@C
714    MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL.
715    This type inherits from AIJ and is largely identical, but uses sparse BLAS
716    routines from Intel MKL whenever possible.
717    MatMult, MatMultAdd, MatMultTranspose, and MatMultTransposeAdd
718    operations are currently supported.
719    If the installed version of MKL supports the "SpMV2" sparse
720    inspector-executor routines, then those are used by default.
721 
722    Collective on MPI_Comm
723 
724    Input Parameters:
725 +  comm - MPI communicator, set to PETSC_COMM_SELF
726 .  m - number of rows
727 .  n - number of columns
728 .  nz - number of nonzeros per row (same for all rows)
729 -  nnz - array containing the number of nonzeros in the various rows
730          (possibly different for each row) or NULL
731 
732    Output Parameter:
733 .  A - the matrix
734 
735    Options Database Keys:
736 .  -mat_aijmkl_no_spmv2 - disables use of the SpMV2 inspector-executor routines
737 
738    Notes:
739    If nnz is given then nz is ignored
740 
741    Level: intermediate
742 
743 .keywords: matrix, MKL, sparse, parallel
744 
745 .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues()
746 @*/
747 PetscErrorCode  MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
748 {
749   PetscErrorCode ierr;
750 
751   PetscFunctionBegin;
752   ierr = MatCreate(comm,A);CHKERRQ(ierr);
753   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
754   ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr);
755   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr);
756   PetscFunctionReturn(0);
757 }
758 
759 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A)
760 {
761   PetscErrorCode ierr;
762 
763   PetscFunctionBegin;
764   ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
765   ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
766   PetscFunctionReturn(0);
767 }
768