xref: /petsc/src/mat/impls/aij/seq/aijmkl/aijmkl.c (revision 7225e97a43487a62bc92b33f0ee10d2b5383a2bb)
1 /*
2   Defines basic operations for the MATSEQAIJMKL matrix class.
3   This class is derived from the MATSEQAIJ class and retains the
4   compressed row storage (aka Yale sparse matrix format) but uses
5   sparse BLAS operations from the Intel Math Kernel Library (MKL)
6   wherever possible.
7 */
8 
9 #include <../src/mat/impls/aij/seq/aij.h>
10 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h>
11 
12 /* MKL include files. */
13 #include <mkl_spblas.h>  /* Sparse BLAS */
14 
15 typedef struct {
16   PetscBool no_SpMV2;  /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */
17   PetscBool eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */
18   PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
19 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
20   sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */
21   struct matrix_descr descr;
22 #endif
23 } Mat_SeqAIJMKL;
24 
25 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType);
26 
27 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat)
28 {
29   /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */
30   /* so we will ignore 'MatType type'. */
31   PetscErrorCode ierr;
32   Mat            B       = *newmat;
33 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
34   Mat_SeqAIJMKL  *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
35 #endif
36 
37   PetscFunctionBegin;
38   if (reuse == MAT_INITIAL_MATRIX) {
39     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
40   }
41 
42   /* Reset the original function pointers. */
43   B->ops->duplicate        = MatDuplicate_SeqAIJ;
44   B->ops->assemblyend      = MatAssemblyEnd_SeqAIJ;
45   B->ops->destroy          = MatDestroy_SeqAIJ;
46   B->ops->mult             = MatMult_SeqAIJ;
47   B->ops->multtranspose    = MatMultTranspose_SeqAIJ;
48   B->ops->multadd          = MatMultAdd_SeqAIJ;
49   B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ;
50   B->ops->matmult          = MatMatMult_SeqAIJ_SeqAIJ;
51   B->ops->transposematmult = MatTransposeMatMult_SeqAIJ_SeqAIJ;
52   B->ops->scale            = MatScale_SeqAIJ;
53   B->ops->diagonalscale    = MatDiagonalScale_SeqAIJ;
54   B->ops->diagonalset      = MatDiagonalSet_SeqAIJ;
55   B->ops->axpy             = MatAXPY_SeqAIJ;
56 
57   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);CHKERRQ(ierr);
58   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr);
59   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr);
60   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr);
61   if(!aijmkl->no_SpMV2) {
62 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
63     ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr);
64     ierr = PetscObjectComposeFunction((PetscObject)B,"MatTransposeMatMult_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr);
65 #endif
66   }
67 
68   /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this
69    * simply involves destroying the MKL sparse matrix handle and then freeing
70    * the spptr pointer. */
71 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
72   if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr;
73 
74   if (aijmkl->sparse_optimized) {
75     sparse_status_t stat;
76     stat = mkl_sparse_destroy(aijmkl->csrA);
77     if (stat != SPARSE_STATUS_SUCCESS) {
78       PetscFunctionReturn(PETSC_ERR_LIB);
79     }
80   }
81 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
82   ierr = PetscFree(B->spptr);CHKERRQ(ierr);
83 
84   /* Change the type of B to MATSEQAIJ. */
85   ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr);
86 
87   *newmat = B;
88   PetscFunctionReturn(0);
89 }
90 
91 PetscErrorCode MatDestroy_SeqAIJMKL(Mat A)
92 {
93   PetscErrorCode ierr;
94   Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr;
95 
96   PetscFunctionBegin;
97 
98   /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an
99    * spptr pointer. */
100   if (aijmkl) {
101     /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */
102 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
103     if (aijmkl->sparse_optimized) {
104       sparse_status_t stat = SPARSE_STATUS_SUCCESS;
105       stat = mkl_sparse_destroy(aijmkl->csrA);
106       if (stat != SPARSE_STATUS_SUCCESS) {
107         PetscFunctionReturn(PETSC_ERR_LIB);
108       }
109     }
110 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
111     ierr = PetscFree(A->spptr);CHKERRQ(ierr);
112   }
113 
114   /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
115    * to destroy everything that remains. */
116   ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr);
117   /* Note that I don't call MatSetType().  I believe this is because that
118    * is only to be called when *building* a matrix.  I could be wrong, but
119    * that is how things work for the SuperLU matrix class. */
120   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
121   PetscFunctionReturn(0);
122 }
123 
124 /* MatSeqAIJKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it,
125  * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize().
126  * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix
127  * handle, creates a new one, and then calls mkl_sparse_optimize().
128  * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been
129  * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of
130  * an unoptimized matrix handle here. */
131 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A)
132 {
133 #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
134   /* If the MKL library does not have mkl_sparse_optimize(), then this routine
135    * does nothing. We make it callable anyway in this case because it cuts
136    * down on littering the code with #ifdefs. */
137   PetscFunctionBegin;
138   PetscFunctionReturn(0);
139 #else
140   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
141   Mat_SeqAIJMKL   *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
142   PetscInt        m,n;
143   MatScalar       *aa;
144   PetscInt        *aj,*ai;
145   sparse_status_t stat;
146 
147   PetscFunctionBegin;
148   if (aijmkl->no_SpMV2) PetscFunctionReturn(0);
149 
150   if (aijmkl->sparse_optimized) {
151     /* Matrix has been previously assembled and optimized. Must destroy old
152      * matrix handle before running the optimization step again. */
153     stat = mkl_sparse_destroy(aijmkl->csrA);
154     if (stat != SPARSE_STATUS_SUCCESS) {
155       PetscFunctionReturn(PETSC_ERR_LIB);
156     }
157   }
158   aijmkl->sparse_optimized = PETSC_FALSE;
159 
160   /* Now perform the SpMV2 setup and matrix optimization. */
161   aijmkl->descr.type        = SPARSE_MATRIX_TYPE_GENERAL;
162   aijmkl->descr.mode        = SPARSE_FILL_MODE_LOWER;
163   aijmkl->descr.diag        = SPARSE_DIAG_NON_UNIT;
164   m = A->rmap->n;
165   n = A->cmap->n;
166   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
167   aa   = a->a;  /* Nonzero elements stored row-by-row. */
168   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
169   if ((a->nz!=0) & !(A->structure_only)) {
170     /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
171      * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
172     stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa);
173     stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
174     stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
175     stat = mkl_sparse_optimize(aijmkl->csrA);
176     if (stat != SPARSE_STATUS_SUCCESS) {
177       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle/complete mkl_sparse_optimize");
178       PetscFunctionReturn(PETSC_ERR_LIB);
179     }
180     aijmkl->sparse_optimized = PETSC_TRUE;
181   }
182 
183   PetscFunctionReturn(0);
184 #endif
185 }
186 
187 /* MatSeqAIJMKL_create_from_mkl_handle() creates a sequential AIJMKL matrix from an MKL sparse matrix handle.
188  * We need this to implement MatMatMult() using the MKL inspector-executor routines, which return an (unoptimized)
189  * matrix handle.
190  * Note: This routine supports replacing the numerical values in an existing matrix that has the same sparsity
191  * structure as in the MKL handle. However, this code currently doesn't actually get used when MatMatMult()
192  * is called with MAT_REUSE_MATRIX, because the MatMatMult() interface code calls MatMatMultNumeric() in this case.
193  * MKL has no notion of separately callable symbolic vs. numeric phases of sparse matrix-matrix multiply, so in the
194  * MAT_REUSE_MATRIX case, the SeqAIJ routines end up being used. Even though this means that the (hopefully more
195  * optimized) MKL routines do not get used, this probably is best because the MKL routines would waste time re-computing
196  * the symbolic portion, whereas the native PETSc SeqAIJ routines will avoid this. */
197 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
198 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,MatReuse reuse,Mat *mat)
199 {
200   PetscErrorCode ierr;
201   sparse_status_t stat;
202   sparse_index_base_t indexing;
203   PetscInt nrows, ncols;
204   PetscInt *aj,*ai,*dummy;
205   MatScalar *aa;
206   Mat A;
207   Mat_SeqAIJ *a;
208   Mat_SeqAIJMKL *aijmkl;
209 
210   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
211   stat = mkl_sparse_x_export_csr(csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);
212   if (stat != SPARSE_STATUS_SUCCESS) {
213     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");
214     PetscFunctionReturn(PETSC_ERR_LIB);
215   }
216 
217   if (reuse == MAT_INITIAL_MATRIX) {
218     ierr = MatCreate(comm,&A);CHKERRQ(ierr);
219     ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
220     ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,nrows,ncols);CHKERRQ(ierr);
221     ierr = MatSeqAIJSetPreallocationCSR(A,ai,aj,aa);CHKERRQ(ierr);
222 
223     /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle.
224      * Now turn it into a MATSEQAIJMKL. */
225     ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
226   } else {
227     A = *mat;
228   }
229 
230   a = (Mat_SeqAIJ*)A->data;
231   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
232 
233   if (reuse == MAT_REUSE_MATRIX) {
234     /* Need to destroy old MKL handle. */
235     stat = mkl_sparse_destroy(aijmkl->csrA);
236     if (stat != SPARSE_STATUS_SUCCESS) {
237       PetscFunctionReturn(PETSC_ERR_LIB);
238     }
239 
240     /* The new matrix is supposed to have the same sparsity pattern, so copy only the array of numerical values. */
241     ierr = PetscMemcpy(a->a,aa,sizeof(MatScalar)*a->nz);CHKERRQ(ierr);
242   }
243   aijmkl->csrA = csrA;
244 
245   /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but
246    * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one,
247    * and just need to be able to run the MKL optimization step. */
248   aijmkl->descr.type        = SPARSE_MATRIX_TYPE_GENERAL;
249   aijmkl->descr.mode        = SPARSE_FILL_MODE_LOWER;
250   aijmkl->descr.diag        = SPARSE_DIAG_NON_UNIT;
251   stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
252   stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
253   stat = mkl_sparse_optimize(aijmkl->csrA);
254   if (stat != SPARSE_STATUS_SUCCESS) {
255     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set hints/complete mkl_sparse_optimize");
256     PetscFunctionReturn(PETSC_ERR_LIB);
257   }
258   aijmkl->sparse_optimized = PETSC_TRUE;
259 
260   *mat = A;
261   PetscFunctionReturn(0);
262 }
263 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
264 
265 PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
266 {
267   PetscErrorCode ierr;
268   Mat_SeqAIJMKL *aijmkl;
269   Mat_SeqAIJMKL *aijmkl_dest;
270 
271   PetscFunctionBegin;
272   ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr);
273   aijmkl      = (Mat_SeqAIJMKL*) A->spptr;
274   aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr;
275   ierr = PetscMemcpy(aijmkl_dest,aijmkl,sizeof(Mat_SeqAIJMKL));CHKERRQ(ierr);
276   aijmkl_dest->sparse_optimized = PETSC_FALSE;
277   if (aijmkl->eager_inspection) {
278     ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
279   }
280   PetscFunctionReturn(0);
281 }
282 
283 PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode)
284 {
285   PetscErrorCode  ierr;
286   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
287   Mat_SeqAIJMKL *aijmkl;
288 
289   PetscFunctionBegin;
290   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
291 
292   /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some
293    * extra information and some different methods, call the AssemblyEnd
294    * routine for a MATSEQAIJ.
295    * I'm not sure if this is the best way to do this, but it avoids
296    * a lot of code duplication. */
297   a->inode.use = PETSC_FALSE;  /* Must disable: otherwise the MKL routines won't get used. */
298   ierr = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr);
299 
300   /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks).
301    * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */
302   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
303   if (aijmkl->eager_inspection) {
304     ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
305   } else if (aijmkl->sparse_optimized) {
306     /* If doing lazy inspection and there is an optimized MKL handle, we need to destroy it, so that it will be
307      * rebuilt later when needed. Otherwise, some SeqAIJ implementations that we depend on for some operations
308      * (such as MatMatMultNumeric()) can modify the result matrix without the matrix handle being rebuilt.
309      * (The SeqAIJ version MatMatMultNumeric() knows nothing about matrix handles, but it *does* call MatAssemblyEnd().) */
310     sparse_status_t stat = mkl_sparse_destroy(aijmkl->csrA);
311     if (stat != SPARSE_STATUS_SUCCESS) {
312       PetscFunctionReturn(PETSC_ERR_LIB);
313     }
314     aijmkl->sparse_optimized = PETSC_FALSE;
315   }
316 
317   PetscFunctionReturn(0);
318 }
319 
320 PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy)
321 {
322   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
323   const PetscScalar *x;
324   PetscScalar       *y;
325   const MatScalar   *aa;
326   PetscErrorCode    ierr;
327   PetscInt          m=A->rmap->n;
328   PetscInt          n=A->cmap->n;
329   PetscScalar       alpha = 1.0;
330   PetscScalar       beta = 0.0;
331   const PetscInt    *aj,*ai;
332   char              matdescra[6];
333 
334 
335   /* Variables not in MatMult_SeqAIJ. */
336   char transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
337 
338   PetscFunctionBegin;
339   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
340   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
341   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
342   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
343   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
344   aa   = a->a;  /* Nonzero elements stored row-by-row. */
345   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
346 
347   /* Call MKL sparse BLAS routine to do the MatMult. */
348   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
349 
350   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
351   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
352   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
353   PetscFunctionReturn(0);
354 }
355 
356 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
357 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
358 {
359   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
360   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
361   const PetscScalar *x;
362   PetscScalar       *y;
363   PetscErrorCode    ierr;
364   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
365 
366   PetscFunctionBegin;
367 
368   /* If there are no nonzero entries, zero yy and return immediately. */
369   if(!a->nz) {
370     PetscInt i;
371     PetscInt m=A->rmap->n;
372     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
373     for (i=0; i<m; i++) {
374       y[i] = 0.0;
375     }
376     ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
377     PetscFunctionReturn(0);
378   }
379 
380   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
381   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
382 
383   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
384    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
385    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
386   if (!aijmkl->sparse_optimized) {
387     MatSeqAIJMKL_create_mkl_handle(A);
388   }
389 
390   /* Call MKL SpMV2 executor routine to do the MatMult. */
391   stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
392 
393   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
394   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
395   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
396   if (stat != SPARSE_STATUS_SUCCESS) {
397     PetscFunctionReturn(PETSC_ERR_LIB);
398   }
399   PetscFunctionReturn(0);
400 }
401 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
402 
403 PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy)
404 {
405   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
406   const PetscScalar *x;
407   PetscScalar       *y;
408   const MatScalar   *aa;
409   PetscErrorCode    ierr;
410   PetscInt          m=A->rmap->n;
411   PetscInt          n=A->cmap->n;
412   PetscScalar       alpha = 1.0;
413   PetscScalar       beta = 0.0;
414   const PetscInt    *aj,*ai;
415   char              matdescra[6];
416 
417   /* Variables not in MatMultTranspose_SeqAIJ. */
418   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
419 
420   PetscFunctionBegin;
421   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
422   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
423   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
424   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
425   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
426   aa   = a->a;  /* Nonzero elements stored row-by-row. */
427   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
428 
429   /* Call MKL sparse BLAS routine to do the MatMult. */
430   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
431 
432   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
433   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
434   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
435   PetscFunctionReturn(0);
436 }
437 
438 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
439 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
440 {
441   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
442   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
443   const PetscScalar *x;
444   PetscScalar       *y;
445   PetscErrorCode    ierr;
446   sparse_status_t   stat;
447 
448   PetscFunctionBegin;
449 
450   /* If there are no nonzero entries, zero yy and return immediately. */
451   if(!a->nz) {
452     PetscInt i;
453     PetscInt n=A->cmap->n;
454     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
455     for (i=0; i<n; i++) {
456       y[i] = 0.0;
457     }
458     ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
459     PetscFunctionReturn(0);
460   }
461 
462   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
463   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
464 
465   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
466    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
467    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
468   if (!aijmkl->sparse_optimized) {
469     MatSeqAIJMKL_create_mkl_handle(A);
470   }
471 
472   /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
473   stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
474 
475   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
476   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
477   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
478   if (stat != SPARSE_STATUS_SUCCESS) {
479     PetscFunctionReturn(PETSC_ERR_LIB);
480   }
481   PetscFunctionReturn(0);
482 }
483 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
484 
485 PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
486 {
487   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
488   const PetscScalar *x;
489   PetscScalar       *y,*z;
490   const MatScalar   *aa;
491   PetscErrorCode    ierr;
492   PetscInt          m=A->rmap->n;
493   PetscInt          n=A->cmap->n;
494   const PetscInt    *aj,*ai;
495   PetscInt          i;
496 
497   /* Variables not in MatMultAdd_SeqAIJ. */
498   char transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
499   PetscScalar       alpha = 1.0;
500   PetscScalar       beta;
501   char              matdescra[6];
502 
503   PetscFunctionBegin;
504   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
505   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
506 
507   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
508   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
509   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
510   aa   = a->a;  /* Nonzero elements stored row-by-row. */
511   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
512 
513   /* Call MKL sparse BLAS routine to do the MatMult. */
514   if (zz == yy) {
515     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
516     beta = 1.0;
517     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
518   } else {
519     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
520      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
521     beta = 0.0;
522     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
523     for (i=0; i<m; i++) {
524       z[i] += y[i];
525     }
526   }
527 
528   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
529   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
530   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
531   PetscFunctionReturn(0);
532 }
533 
534 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
535 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
536 {
537   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
538   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
539   const PetscScalar *x;
540   PetscScalar       *y,*z;
541   PetscErrorCode    ierr;
542   PetscInt          m=A->rmap->n;
543   PetscInt          i;
544 
545   /* Variables not in MatMultAdd_SeqAIJ. */
546   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
547 
548   PetscFunctionBegin;
549 
550   /* If there are no nonzero entries, set zz = yy and return immediately. */
551   if(!a->nz) {
552     PetscInt i;
553     ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
554     for (i=0; i<m; i++) {
555       z[i] = y[i];
556     }
557     ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
558     PetscFunctionReturn(0);
559   }
560 
561   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
562   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
563 
564   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
565    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
566    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
567   if (!aijmkl->sparse_optimized) {
568     MatSeqAIJMKL_create_mkl_handle(A);
569   }
570 
571   /* Call MKL sparse BLAS routine to do the MatMult. */
572   if (zz == yy) {
573     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
574      * with alpha and beta both set to 1.0. */
575     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
576   } else {
577     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
578      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
579     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
580     for (i=0; i<m; i++) {
581       z[i] += y[i];
582     }
583   }
584 
585   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
586   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
587   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
588   if (stat != SPARSE_STATUS_SUCCESS) {
589     PetscFunctionReturn(PETSC_ERR_LIB);
590   }
591   PetscFunctionReturn(0);
592 }
593 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
594 
595 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
596 {
597   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
598   const PetscScalar *x;
599   PetscScalar       *y,*z;
600   const MatScalar   *aa;
601   PetscErrorCode    ierr;
602   PetscInt          m=A->rmap->n;
603   PetscInt          n=A->cmap->n;
604   const PetscInt    *aj,*ai;
605   PetscInt          i;
606 
607   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
608   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
609   PetscScalar       alpha = 1.0;
610   PetscScalar       beta;
611   char              matdescra[6];
612 
613   PetscFunctionBegin;
614   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
615   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
616 
617   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
618   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
619   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
620   aa   = a->a;  /* Nonzero elements stored row-by-row. */
621   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
622 
623   /* Call MKL sparse BLAS routine to do the MatMult. */
624   if (zz == yy) {
625     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
626     beta = 1.0;
627     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
628   } else {
629     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
630      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
631     beta = 0.0;
632     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
633     for (i=0; i<n; i++) {
634       z[i] += y[i];
635     }
636   }
637 
638   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
639   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
640   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
641   PetscFunctionReturn(0);
642 }
643 
644 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
645 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
646 {
647   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
648   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
649   const PetscScalar *x;
650   PetscScalar       *y,*z;
651   PetscErrorCode    ierr;
652   PetscInt          n=A->cmap->n;
653   PetscInt          i;
654 
655   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
656   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
657 
658   PetscFunctionBegin;
659 
660   /* If there are no nonzero entries, set zz = yy and return immediately. */
661   if(!a->nz) {
662     PetscInt i;
663     ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
664     for (i=0; i<n; i++) {
665       z[i] = y[i];
666     }
667     ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
668     PetscFunctionReturn(0);
669   }
670 
671   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
672   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
673 
674   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
675    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
676    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
677   if (!aijmkl->sparse_optimized) {
678     MatSeqAIJMKL_create_mkl_handle(A);
679   }
680 
681   /* Call MKL sparse BLAS routine to do the MatMult. */
682   if (zz == yy) {
683     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
684      * with alpha and beta both set to 1.0. */
685     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
686   } else {
687     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
688      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
689     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
690     for (i=0; i<n; i++) {
691       z[i] += y[i];
692     }
693   }
694 
695   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
696   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
697   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
698   if (stat != SPARSE_STATUS_SUCCESS) {
699     PetscFunctionReturn(PETSC_ERR_LIB);
700   }
701   PetscFunctionReturn(0);
702 }
703 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
704 
705 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
706 PetscErrorCode MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat*C)
707 {
708   Mat_SeqAIJMKL *a, *b;
709   sparse_matrix_t csrA, csrB, csrC;
710   PetscErrorCode ierr;
711   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
712 
713   PetscFunctionBegin;
714   a = (Mat_SeqAIJMKL*)A->spptr;
715   b = (Mat_SeqAIJMKL*)B->spptr;
716   if (!a->sparse_optimized) {
717     MatSeqAIJMKL_create_mkl_handle(A);
718   }
719   if (!b->sparse_optimized) {
720     MatSeqAIJMKL_create_mkl_handle(B);
721   }
722   csrA = a->csrA;
723   csrB = b->csrA;
724 
725   stat = mkl_sparse_spmm(SPARSE_OPERATION_NON_TRANSPOSE,csrA,csrB,&csrC);
726   if (stat != SPARSE_STATUS_SUCCESS) {
727     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete sparse matrix-matrix multiply");
728     PetscFunctionReturn(PETSC_ERR_LIB);
729   }
730 
731   ierr = MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);CHKERRQ(ierr);
732 
733   PetscFunctionReturn(0);
734 }
735 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
736 
737 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
738 PetscErrorCode MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat*C)
739 {
740   Mat_SeqAIJMKL *a, *b;
741   sparse_matrix_t csrA, csrB, csrC;
742   PetscErrorCode ierr;
743   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
744 
745   PetscFunctionBegin;
746   a = (Mat_SeqAIJMKL*)A->spptr;
747   b = (Mat_SeqAIJMKL*)B->spptr;
748   if (!a->sparse_optimized) {
749     MatSeqAIJMKL_create_mkl_handle(A);
750   }
751   if (!b->sparse_optimized) {
752     MatSeqAIJMKL_create_mkl_handle(B);
753   }
754   csrA = a->csrA;
755   csrB = b->csrA;
756 
757   stat = mkl_sparse_spmm(SPARSE_OPERATION_TRANSPOSE,csrA,csrB,&csrC);
758   if (stat != SPARSE_STATUS_SUCCESS) {
759     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete sparse matrix-matrix multiply");
760     PetscFunctionReturn(PETSC_ERR_LIB);
761   }
762 
763   ierr = MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);CHKERRQ(ierr);
764 
765   PetscFunctionReturn(0);
766 }
767 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
768 
769 PetscErrorCode MatScale_SeqAIJMKL(Mat inA,PetscScalar alpha)
770 {
771   PetscErrorCode ierr;
772 
773   PetscFunctionBegin;
774   ierr = MatScale_SeqAIJ(inA,alpha);CHKERRQ(ierr);
775   ierr = MatSeqAIJMKL_create_mkl_handle(inA);CHKERRQ(ierr);
776   PetscFunctionReturn(0);
777 }
778 
779 PetscErrorCode MatDiagonalScale_SeqAIJMKL(Mat A,Vec ll,Vec rr)
780 {
781   PetscErrorCode ierr;
782 
783   PetscFunctionBegin;
784   ierr = MatDiagonalScale_SeqAIJ(A,ll,rr);CHKERRQ(ierr);
785   ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
786   PetscFunctionReturn(0);
787 }
788 
789 PetscErrorCode MatDiagonalSet_SeqAIJMKL(Mat Y,Vec D,InsertMode is)
790 {
791   PetscErrorCode ierr;
792 
793   PetscFunctionBegin;
794   ierr = MatDiagonalSet_SeqAIJ(Y,D,is);CHKERRQ(ierr);
795   ierr = MatSeqAIJMKL_create_mkl_handle(Y);CHKERRQ(ierr);
796   PetscFunctionReturn(0);
797 }
798 
799 PetscErrorCode MatAXPY_SeqAIJMKL(Mat Y,PetscScalar a,Mat X,MatStructure str)
800 {
801   PetscErrorCode ierr;
802 
803   PetscFunctionBegin;
804   ierr = MatAXPY_SeqAIJ(Y,a,X,str);CHKERRQ(ierr);
805   if (str == SAME_NONZERO_PATTERN) {
806     /* MatAssemblyEnd() is not called if SAME_NONZERO_PATTERN, so we need to force update of the MKL matrix handle. */
807     ierr = MatSeqAIJMKL_create_mkl_handle(Y);CHKERRQ(ierr);
808   }
809   PetscFunctionReturn(0);
810 }
811 
812 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a
813  * SeqAIJMKL matrix.  This routine is called by the MatCreate_SeqMKLAIJ()
814  * routine, but can also be used to convert an assembled SeqAIJ matrix
815  * into a SeqAIJMKL one. */
816 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
817 {
818   PetscErrorCode ierr;
819   Mat            B = *newmat;
820   Mat_SeqAIJMKL  *aijmkl;
821   PetscBool      set;
822   PetscBool      sametype;
823 
824   PetscFunctionBegin;
825   if (reuse == MAT_INITIAL_MATRIX) {
826     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
827   }
828 
829   ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr);
830   if (sametype) PetscFunctionReturn(0);
831 
832   ierr     = PetscNewLog(B,&aijmkl);CHKERRQ(ierr);
833   B->spptr = (void*) aijmkl;
834 
835   /* Set function pointers for methods that we inherit from AIJ but override.
836    * We also parse some command line options below, since those determine some of the methods we point to. */
837   B->ops->duplicate        = MatDuplicate_SeqAIJMKL;
838   B->ops->assemblyend      = MatAssemblyEnd_SeqAIJMKL;
839   B->ops->destroy          = MatDestroy_SeqAIJMKL;
840 
841   aijmkl->sparse_optimized = PETSC_FALSE;
842 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
843   aijmkl->no_SpMV2 = PETSC_FALSE;  /* Default to using the SpMV2 routines if our MKL supports them. */
844 #else
845   aijmkl->no_SpMV2 = PETSC_TRUE;
846 #endif
847   aijmkl->eager_inspection = PETSC_FALSE;
848 
849   /* Parse command line options. */
850   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");CHKERRQ(ierr);
851   ierr = PetscOptionsBool("-mat_aijmkl_no_spmv2","NoSPMV2","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);CHKERRQ(ierr);
852   ierr = PetscOptionsBool("-mat_aijmkl_eager_inspection","Eager Inspection","None",(PetscBool)aijmkl->eager_inspection,(PetscBool*)&aijmkl->eager_inspection,&set);CHKERRQ(ierr);
853   ierr = PetscOptionsEnd();CHKERRQ(ierr);
854 #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
855   if(!aijmkl->no_SpMV2) {
856     ierr = PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize();  defaulting to non-SpMV2 routines.\n");
857     aijmkl->no_SpMV2 = PETSC_TRUE;
858   }
859 #endif
860 
861   if(!aijmkl->no_SpMV2) {
862 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
863     B->ops->mult             = MatMult_SeqAIJMKL_SpMV2;
864     B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL_SpMV2;
865     B->ops->multadd          = MatMultAdd_SeqAIJMKL_SpMV2;
866     B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2;
867     B->ops->matmult          = MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2;
868     B->ops->mattransposemult = MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2;
869 #endif
870   } else {
871     B->ops->mult             = MatMult_SeqAIJMKL;
872     B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL;
873     B->ops->multadd          = MatMultAdd_SeqAIJMKL;
874     B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL;
875   }
876 
877   B->ops->scale              = MatScale_SeqAIJMKL;
878   B->ops->diagonalscale      = MatDiagonalScale_SeqAIJMKL;
879   B->ops->diagonalset        = MatDiagonalSet_SeqAIJMKL;
880   B->ops->axpy               = MatAXPY_SeqAIJMKL;
881 
882   ierr = PetscObjectComposeFunction((PetscObject)B,"MatScale_SeqAIJMKL_C",MatScale_SeqAIJMKL);CHKERRQ(ierr);
883   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr);
884   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",MatMatMult_SeqDense_SeqAIJ);CHKERRQ(ierr);
885   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",MatMatMultSymbolic_SeqDense_SeqAIJ);CHKERRQ(ierr);
886   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",MatMatMultNumeric_SeqDense_SeqAIJ);CHKERRQ(ierr);
887   if(!aijmkl->no_SpMV2) {
888 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
889     ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqaijmkl_seqaijmkl_C",MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2);CHKERRQ(ierr);
890     ierr = PetscObjectComposeFunction((PetscObject)B,"MatTransposeMatMult_seqaijmkl_seqaijmkl_C",MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2);CHKERRQ(ierr);
891 #endif
892   }
893 
894   ierr    = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr);
895   *newmat = B;
896   PetscFunctionReturn(0);
897 }
898 
899 /*@C
900    MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL.
901    This type inherits from AIJ and is largely identical, but uses sparse BLAS
902    routines from Intel MKL whenever possible.
903    MatMult, MatMultAdd, MatMultTranspose, and MatMultTransposeAdd
904    operations are currently supported.
905    If the installed version of MKL supports the "SpMV2" sparse
906    inspector-executor routines, then those are used by default.
907 
908    Collective on MPI_Comm
909 
910    Input Parameters:
911 +  comm - MPI communicator, set to PETSC_COMM_SELF
912 .  m - number of rows
913 .  n - number of columns
914 .  nz - number of nonzeros per row (same for all rows)
915 -  nnz - array containing the number of nonzeros in the various rows
916          (possibly different for each row) or NULL
917 
918    Output Parameter:
919 .  A - the matrix
920 
921    Options Database Keys:
922 .  -mat_aijmkl_no_spmv2 - disables use of the SpMV2 inspector-executor routines
923 
924    Notes:
925    If nnz is given then nz is ignored
926 
927    Level: intermediate
928 
929 .keywords: matrix, MKL, sparse, parallel
930 
931 .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues()
932 @*/
933 PetscErrorCode  MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
934 {
935   PetscErrorCode ierr;
936 
937   PetscFunctionBegin;
938   ierr = MatCreate(comm,A);CHKERRQ(ierr);
939   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
940   ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr);
941   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr);
942   PetscFunctionReturn(0);
943 }
944 
945 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A)
946 {
947   PetscErrorCode ierr;
948 
949   PetscFunctionBegin;
950   ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
951   ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
952   PetscFunctionReturn(0);
953 }
954