xref: /petsc/src/mat/impls/aij/seq/aijmkl/aijmkl.c (revision 431879ec409417d744fb9e7bc2d5f08b9a65cd7f)
1 
2 /*
3   Defines basic operations for the MATSEQAIJMKL matrix class.
4   This class is derived from the MATSEQAIJ class and retains the
5   compressed row storage (aka Yale sparse matrix format) but uses
6   sparse BLAS operations from the Intel Math Kernel Library (MKL)
7   wherever possible.
8 */
9 
10 #include <../src/mat/impls/aij/seq/aij.h>
11 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h>
12 #include <mkl_spblas.h>
13 
14 typedef struct {
15   PetscBool           no_SpMV2;  /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */
16   PetscBool           eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */
17   PetscBool           sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
18   PetscObjectState    state;
19 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
20   sparse_matrix_t     csrA; /* "Handle" used by SpMV2 inspector-executor routines. */
21   struct matrix_descr descr;
22 #endif
23 } Mat_SeqAIJMKL;
24 
25 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType);
26 
27 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat)
28 {
29   /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */
30   /* so we will ignore 'MatType type'. */
31   PetscErrorCode ierr;
32   Mat            B       = *newmat;
33 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
34   Mat_SeqAIJMKL  *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
35 #endif
36 
37   PetscFunctionBegin;
38   if (reuse == MAT_INITIAL_MATRIX) {
39     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
40   }
41 
42   /* Reset the original function pointers. */
43   B->ops->duplicate        = MatDuplicate_SeqAIJ;
44   B->ops->assemblyend      = MatAssemblyEnd_SeqAIJ;
45   B->ops->destroy          = MatDestroy_SeqAIJ;
46   B->ops->mult             = MatMult_SeqAIJ;
47   B->ops->multtranspose    = MatMultTranspose_SeqAIJ;
48   B->ops->multadd          = MatMultAdd_SeqAIJ;
49   B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ;
50   B->ops->matmultsymbolic  = MatMatMultSymbolic_SeqAIJ_SeqAIJ;
51   B->ops->matmultnumeric   = MatMatMultNumeric_SeqAIJ_SeqAIJ;
52   B->ops->ptapnumeric      = MatPtAPNumeric_SeqAIJ_SeqAIJ;
53 
54   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);CHKERRQ(ierr);
55 
56 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
57   /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this
58    * simply involves destroying the MKL sparse matrix handle and then freeing
59    * the spptr pointer. */
60   if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr;
61 
62   if (aijmkl->sparse_optimized) {
63     sparse_status_t stat;
64     stat = mkl_sparse_destroy(aijmkl->csrA);
65     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set hints/complete mkl_sparse_optimize");
66   }
67 #endif
68   ierr = PetscFree(B->spptr);CHKERRQ(ierr);
69 
70   /* Change the type of B to MATSEQAIJ. */
71   ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr);
72 
73   *newmat = B;
74   PetscFunctionReturn(0);
75 }
76 
77 PetscErrorCode MatDestroy_SeqAIJMKL(Mat A)
78 {
79   PetscErrorCode ierr;
80   Mat_SeqAIJMKL  *aijmkl = (Mat_SeqAIJMKL*) A->spptr;
81 
82   PetscFunctionBegin;
83 
84   /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an
85    * spptr pointer. */
86   if (aijmkl) {
87     /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */
88 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
89     if (aijmkl->sparse_optimized) {
90       sparse_status_t stat = SPARSE_STATUS_SUCCESS;
91       stat = mkl_sparse_destroy(aijmkl->csrA);
92       if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy");
93     }
94 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
95     ierr = PetscFree(A->spptr);CHKERRQ(ierr);
96   }
97 
98   /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
99    * to destroy everything that remains. */
100   ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr);
101   /* Note that I don't call MatSetType().  I believe this is because that
102    * is only to be called when *building* a matrix.  I could be wrong, but
103    * that is how things work for the SuperLU matrix class. */
104   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
105   PetscFunctionReturn(0);
106 }
107 
108 /* MatSeqAIJKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it,
109  * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize().
110  * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix
111  * handle, creates a new one, and then calls mkl_sparse_optimize().
112  * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been
113  * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of
114  * an unoptimized matrix handle here. */
115 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A)
116 {
117 #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
118   /* If the MKL library does not have mkl_sparse_optimize(), then this routine
119    * does nothing. We make it callable anyway in this case because it cuts
120    * down on littering the code with #ifdefs. */
121   PetscFunctionBegin;
122   PetscFunctionReturn(0);
123 #else
124   Mat_SeqAIJ       *a = (Mat_SeqAIJ*)A->data;
125   Mat_SeqAIJMKL    *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
126   PetscInt         m,n;
127   MatScalar        *aa;
128   PetscInt         *aj,*ai;
129   sparse_status_t  stat;
130   PetscErrorCode   ierr;
131 
132   PetscFunctionBegin;
133 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
134   /* For MKL versions that still support the old, non-inspector-executor interfaces versions, we simply exit here if the no_SpMV2
135    * option has been specified. For versions that have deprecated the old interfaces (version 18, update 2 and later), we must
136    * use the new inspector-executor interfaces, but we can still use the old, non-inspector-executor code by not calling
137    * mkl_sparse_optimize() later. */
138   if (aijmkl->no_SpMV2) PetscFunctionReturn(0);
139 #endif
140 
141   if (aijmkl->sparse_optimized) {
142     /* Matrix has been previously assembled and optimized. Must destroy old
143      * matrix handle before running the optimization step again. */
144     stat = mkl_sparse_destroy(aijmkl->csrA);
145     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy");
146   }
147   aijmkl->sparse_optimized = PETSC_FALSE;
148 
149   /* Now perform the SpMV2 setup and matrix optimization. */
150   aijmkl->descr.type        = SPARSE_MATRIX_TYPE_GENERAL;
151   aijmkl->descr.mode        = SPARSE_FILL_MODE_LOWER;
152   aijmkl->descr.diag        = SPARSE_DIAG_NON_UNIT;
153   m = A->rmap->n;
154   n = A->cmap->n;
155   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
156   aa   = a->a;  /* Nonzero elements stored row-by-row. */
157   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
158   if ((a->nz!=0) && aa && !(A->structure_only)) {
159     /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
160      * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
161     stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa);
162     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle");
163     stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
164     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set mv_hint");
165     stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
166     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set memory_hint");
167     if (!aijmkl->no_SpMV2) {
168       stat = mkl_sparse_optimize(aijmkl->csrA);
169       if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_optimize");
170     }
171     aijmkl->sparse_optimized = PETSC_TRUE;
172     ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr);
173   }
174 
175   PetscFunctionReturn(0);
176 #endif
177 }
178 
179 /* MatSeqAIJMKL_create_from_mkl_handle() creates a sequential AIJMKL matrix from an MKL sparse matrix handle.
180  * We need this to implement MatMatMult() using the MKL inspector-executor routines, which return an (unoptimized)
181  * matrix handle.
182  * Note: This routine simply destroys and replaces the original matrix if MAT_REUSE_MATRIX has been specified, as
183  * there is no good alternative. */
184 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
185 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,MatReuse reuse,Mat *mat)
186 {
187   PetscErrorCode      ierr;
188   sparse_status_t     stat;
189   sparse_index_base_t indexing;
190   PetscInt            nrows, ncols;
191   PetscInt            *aj,*ai,*dummy;
192   MatScalar           *aa;
193   Mat                 A;
194   Mat_SeqAIJMKL       *aijmkl;
195 
196   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
197   stat = mkl_sparse_x_export_csr(csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);
198   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");
199 
200   if (reuse == MAT_REUSE_MATRIX) {
201     ierr = MatDestroy(mat);CHKERRQ(ierr);
202   }
203   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
204   ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
205   ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,nrows,ncols);CHKERRQ(ierr);
206   /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported
207    * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and
208    * they will be destroyed when the MKL handle is destroyed.
209    * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */
210   ierr = MatSeqAIJSetPreallocationCSR(A,ai,aj,aa);CHKERRQ(ierr);
211 
212   /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle.
213    * Now turn it into a MATSEQAIJMKL. */
214   ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
215 
216   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
217   aijmkl->csrA = csrA;
218 
219   /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but
220    * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one,
221    * and just need to be able to run the MKL optimization step. */
222   aijmkl->descr.type        = SPARSE_MATRIX_TYPE_GENERAL;
223   aijmkl->descr.mode        = SPARSE_FILL_MODE_LOWER;
224   aijmkl->descr.diag        = SPARSE_DIAG_NON_UNIT;
225   stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
226   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set mv_hint");
227   stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
228   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set memory_hint");
229   if (!aijmkl->no_SpMV2) {
230     stat = mkl_sparse_optimize(aijmkl->csrA);
231     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_optimize");
232   }
233   aijmkl->sparse_optimized = PETSC_TRUE;
234   ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr);
235 
236   *mat = A;
237   PetscFunctionReturn(0);
238 }
239 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
240 
241 /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle.
242  * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in
243  * MatMatMultNumeric(). */
244 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
245 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A)
246 {
247   PetscInt            i;
248   PetscInt            nrows,ncols;
249   PetscInt            nz;
250   PetscInt            *ai,*aj,*dummy;
251   PetscScalar         *aa;
252   PetscErrorCode      ierr;
253   Mat_SeqAIJMKL       *aijmkl;
254   sparse_status_t     stat;
255   sparse_index_base_t indexing;
256 
257   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
258 
259   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
260   stat = mkl_sparse_x_export_csr(aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);
261   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");
262 
263   /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc
264    * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */
265   for (i=0; i<nrows; i++) {
266     nz = ai[i+1] - ai[i];
267     ierr = MatSetValues_SeqAIJ(A, 1, &i, nz, aj+ai[i], aa+ai[i], INSERT_VALUES);CHKERRQ(ierr);
268   }
269 
270   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
271   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
272 
273   ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr);
274   /* We mark our matrix as having a valid, optimized MKL handle.
275    * TODO: It is valid, but I am not sure if it is optimized. Need to ask MKL developers. */
276   aijmkl->sparse_optimized = PETSC_TRUE;
277 
278   PetscFunctionReturn(0);
279 }
280 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
281 
282 PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
283 {
284   PetscErrorCode ierr;
285   Mat_SeqAIJMKL  *aijmkl;
286   Mat_SeqAIJMKL  *aijmkl_dest;
287 
288   PetscFunctionBegin;
289   ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr);
290   aijmkl      = (Mat_SeqAIJMKL*) A->spptr;
291   aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr;
292   ierr = PetscArraycpy(aijmkl_dest,aijmkl,1);CHKERRQ(ierr);
293   aijmkl_dest->sparse_optimized = PETSC_FALSE;
294   if (aijmkl->eager_inspection) {
295     ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
296   }
297   PetscFunctionReturn(0);
298 }
299 
300 PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode)
301 {
302   PetscErrorCode  ierr;
303   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
304   Mat_SeqAIJMKL   *aijmkl;
305 
306   PetscFunctionBegin;
307   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
308 
309   /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some
310    * extra information and some different methods, call the AssemblyEnd
311    * routine for a MATSEQAIJ.
312    * I'm not sure if this is the best way to do this, but it avoids
313    * a lot of code duplication. */
314   a->inode.use = PETSC_FALSE;  /* Must disable: otherwise the MKL routines won't get used. */
315   ierr = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr);
316 
317   /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks).
318    * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */
319   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
320   if (aijmkl->eager_inspection) {
321     ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
322   }
323 
324   PetscFunctionReturn(0);
325 }
326 
327 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
328 PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy)
329 {
330   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
331   const PetscScalar *x;
332   PetscScalar       *y;
333   const MatScalar   *aa;
334   PetscErrorCode    ierr;
335   PetscInt          m=A->rmap->n;
336   PetscInt          n=A->cmap->n;
337   PetscScalar       alpha = 1.0;
338   PetscScalar       beta = 0.0;
339   const PetscInt    *aj,*ai;
340   char              matdescra[6];
341 
342 
343   /* Variables not in MatMult_SeqAIJ. */
344   char transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
345 
346   PetscFunctionBegin;
347   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
348   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
349   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
350   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
351   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
352   aa   = a->a;  /* Nonzero elements stored row-by-row. */
353   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
354 
355   /* Call MKL sparse BLAS routine to do the MatMult. */
356   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
357 
358   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
359   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
360   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
361   PetscFunctionReturn(0);
362 }
363 #endif
364 
365 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
366 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
367 {
368   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
369   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
370   const PetscScalar *x;
371   PetscScalar       *y;
372   PetscErrorCode    ierr;
373   sparse_status_t   stat = SPARSE_STATUS_SUCCESS;
374   PetscObjectState  state;
375 
376   PetscFunctionBegin;
377 
378   /* If there are no nonzero entries, zero yy and return immediately. */
379   if(!a->nz) {
380     PetscInt i;
381     PetscInt m=A->rmap->n;
382     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
383     for (i=0; i<m; i++) {
384       y[i] = 0.0;
385     }
386     ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
387     PetscFunctionReturn(0);
388   }
389 
390   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
391   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
392 
393   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
394    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
395    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
396   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
397   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
398     MatSeqAIJMKL_create_mkl_handle(A);
399   }
400 
401   /* Call MKL SpMV2 executor routine to do the MatMult. */
402   stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
403   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
404 
405   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
406   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
407   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
408   PetscFunctionReturn(0);
409 }
410 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
411 
412 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
413 PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy)
414 {
415   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
416   const PetscScalar *x;
417   PetscScalar       *y;
418   const MatScalar   *aa;
419   PetscErrorCode    ierr;
420   PetscInt          m=A->rmap->n;
421   PetscInt          n=A->cmap->n;
422   PetscScalar       alpha = 1.0;
423   PetscScalar       beta = 0.0;
424   const PetscInt    *aj,*ai;
425   char              matdescra[6];
426 
427   /* Variables not in MatMultTranspose_SeqAIJ. */
428   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
429 
430   PetscFunctionBegin;
431   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
432   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
433   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
434   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
435   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
436   aa   = a->a;  /* Nonzero elements stored row-by-row. */
437   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
438 
439   /* Call MKL sparse BLAS routine to do the MatMult. */
440   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
441 
442   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
443   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
444   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
445   PetscFunctionReturn(0);
446 }
447 #endif
448 
449 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
450 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
451 {
452   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
453   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
454   const PetscScalar *x;
455   PetscScalar       *y;
456   PetscErrorCode    ierr;
457   sparse_status_t   stat;
458   PetscObjectState  state;
459 
460   PetscFunctionBegin;
461 
462   /* If there are no nonzero entries, zero yy and return immediately. */
463   if(!a->nz) {
464     PetscInt i;
465     PetscInt n=A->cmap->n;
466     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
467     for (i=0; i<n; i++) {
468       y[i] = 0.0;
469     }
470     ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
471     PetscFunctionReturn(0);
472   }
473 
474   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
475   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
476 
477   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
478    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
479    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
480   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
481   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
482     MatSeqAIJMKL_create_mkl_handle(A);
483   }
484 
485   /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
486   stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
487   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
488 
489   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
490   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
491   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
492   PetscFunctionReturn(0);
493 }
494 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
495 
496 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
497 PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
498 {
499   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
500   const PetscScalar *x;
501   PetscScalar       *y,*z;
502   const MatScalar   *aa;
503   PetscErrorCode    ierr;
504   PetscInt          m=A->rmap->n;
505   PetscInt          n=A->cmap->n;
506   const PetscInt    *aj,*ai;
507   PetscInt          i;
508 
509   /* Variables not in MatMultAdd_SeqAIJ. */
510   char              transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
511   PetscScalar       alpha = 1.0;
512   PetscScalar       beta;
513   char              matdescra[6];
514 
515   PetscFunctionBegin;
516   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
517   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
518 
519   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
520   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
521   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
522   aa   = a->a;  /* Nonzero elements stored row-by-row. */
523   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
524 
525   /* Call MKL sparse BLAS routine to do the MatMult. */
526   if (zz == yy) {
527     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
528     beta = 1.0;
529     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
530   } else {
531     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
532      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
533     beta = 0.0;
534     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
535     for (i=0; i<m; i++) {
536       z[i] += y[i];
537     }
538   }
539 
540   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
541   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
542   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
543   PetscFunctionReturn(0);
544 }
545 #endif
546 
547 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
548 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
549 {
550   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
551   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
552   const PetscScalar *x;
553   PetscScalar       *y,*z;
554   PetscErrorCode    ierr;
555   PetscInt          m=A->rmap->n;
556   PetscInt          i;
557 
558   /* Variables not in MatMultAdd_SeqAIJ. */
559   sparse_status_t   stat = SPARSE_STATUS_SUCCESS;
560   PetscObjectState  state;
561 
562   PetscFunctionBegin;
563 
564   /* If there are no nonzero entries, set zz = yy and return immediately. */
565   if(!a->nz) {
566     PetscInt i;
567     ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
568     for (i=0; i<m; i++) {
569       z[i] = y[i];
570     }
571     ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
572     PetscFunctionReturn(0);
573   }
574 
575   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
576   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
577 
578   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
579    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
580    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
581   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
582   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
583     MatSeqAIJMKL_create_mkl_handle(A);
584   }
585 
586   /* Call MKL sparse BLAS routine to do the MatMult. */
587   if (zz == yy) {
588     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
589      * with alpha and beta both set to 1.0. */
590     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
591     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
592   } else {
593     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
594      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
595     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
596     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
597     for (i=0; i<m; i++) {
598       z[i] += y[i];
599     }
600   }
601 
602   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
603   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
604   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
605   PetscFunctionReturn(0);
606 }
607 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
608 
609 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
610 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
611 {
612   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
613   const PetscScalar *x;
614   PetscScalar       *y,*z;
615   const MatScalar   *aa;
616   PetscErrorCode    ierr;
617   PetscInt          m=A->rmap->n;
618   PetscInt          n=A->cmap->n;
619   const PetscInt    *aj,*ai;
620   PetscInt          i;
621 
622   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
623   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
624   PetscScalar       alpha = 1.0;
625   PetscScalar       beta;
626   char              matdescra[6];
627 
628   PetscFunctionBegin;
629   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
630   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
631 
632   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
633   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
634   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
635   aa   = a->a;  /* Nonzero elements stored row-by-row. */
636   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
637 
638   /* Call MKL sparse BLAS routine to do the MatMult. */
639   if (zz == yy) {
640     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
641     beta = 1.0;
642     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
643   } else {
644     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
645      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
646     beta = 0.0;
647     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
648     for (i=0; i<n; i++) {
649       z[i] += y[i];
650     }
651   }
652 
653   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
654   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
655   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
656   PetscFunctionReturn(0);
657 }
658 #endif
659 
660 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
661 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
662 {
663   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
664   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
665   const PetscScalar *x;
666   PetscScalar       *y,*z;
667   PetscErrorCode    ierr;
668   PetscInt          n=A->cmap->n;
669   PetscInt          i;
670   PetscObjectState  state;
671 
672   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
673   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
674 
675   PetscFunctionBegin;
676 
677   /* If there are no nonzero entries, set zz = yy and return immediately. */
678   if(!a->nz) {
679     PetscInt i;
680     ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
681     for (i=0; i<n; i++) {
682       z[i] = y[i];
683     }
684     ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
685     PetscFunctionReturn(0);
686   }
687 
688   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
689   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
690 
691   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
692    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
693    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
694   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
695   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
696     MatSeqAIJMKL_create_mkl_handle(A);
697   }
698 
699   /* Call MKL sparse BLAS routine to do the MatMult. */
700   if (zz == yy) {
701     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
702      * with alpha and beta both set to 1.0. */
703     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
704     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
705   } else {
706     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
707      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
708     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
709     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
710     for (i=0; i<n; i++) {
711       z[i] += y[i];
712     }
713   }
714 
715   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
716   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
717   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
718   PetscFunctionReturn(0);
719 }
720 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
721 
722 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
723 PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,PetscReal fill,Mat C)
724 {
725   Mat_SeqAIJMKL       *a, *b, *c;
726   sparse_matrix_t     csrA, csrB, csrC;
727   PetscErrorCode      ierr;
728   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
729   struct matrix_descr descr_type_gen;
730   PetscObjectState    state;
731 
732   PetscFunctionBegin;
733   a = (Mat_SeqAIJMKL*)A->spptr;
734   b = (Mat_SeqAIJMKL*)B->spptr;
735   c = (Mat_SeqAIJMKL*)C->spptr;
736   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
737   if (!a->sparse_optimized || a->state != state) {
738     MatSeqAIJMKL_create_mkl_handle(A);
739   }
740   ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr);
741   if (!b->sparse_optimized || b->state != state) {
742     MatSeqAIJMKL_create_mkl_handle(B);
743   }
744   csrA = a->csrA;
745   csrB = b->csrA;
746   csrC = c->csrA;
747   descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
748 
749   stat = mkl_sparse_sp2m(SPARSE_OPERATION_NON_TRANSPOSE,descr_type_gen,csrA,
750                          SPARSE_OPERATION_NON_TRANSPOSE,descr_type_gen,csrB,
751                          SPARSE_STAGE_FULL_MULT_NO_VAL,&csrC);
752 
753   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete symbolic stage of sparse matrix-matrix multiply");
754 
755   PetscFunctionReturn(0);
756 }
757 
758 PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,Mat C)
759 {
760   Mat_SeqAIJMKL       *a, *b, *c;
761   sparse_matrix_t     csrA, csrB, csrC;
762   PetscErrorCode      ierr;
763   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
764   struct matrix_descr descr_type_gen;
765   PetscObjectState    state;
766 
767   PetscFunctionBegin;
768   a = (Mat_SeqAIJMKL*)A->spptr;
769   b = (Mat_SeqAIJMKL*)B->spptr;
770   c = (Mat_SeqAIJMKL*)C->spptr;
771   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
772   if (!a->sparse_optimized || a->state != state) {
773     MatSeqAIJMKL_create_mkl_handle(A);
774   }
775   ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr);
776   if (!b->sparse_optimized || b->state != state) {
777     MatSeqAIJMKL_create_mkl_handle(B);
778   }
779   csrA = a->csrA;
780   csrB = b->csrA;
781   csrC = c->csrA;
782   descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
783 
784   stat = mkl_sparse_sp2m(SPARSE_OPERATION_NON_TRANSPOSE,descr_type_gen,csrA,
785                          SPARSE_OPERATION_NON_TRANSPOSE,descr_type_gen,csrB,
786                          SPARSE_STAGE_FINALIZE_MULT,&csrC);
787 
788   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete numerical stage of sparse matrix-matrix multiply");
789 
790   /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */
791   ierr = MatSeqAIJMKL_update_from_mkl_handle(C);CHKERRQ(ierr);
792 
793   PetscFunctionReturn(0);
794 }
795 
796 PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat P,Mat C)
797 {
798   Mat_SeqAIJMKL       *a, *p, *c;
799   sparse_matrix_t     csrA, csrP, csrC;
800   PetscBool           set, flag;
801   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
802   struct matrix_descr descr_type_sym;
803   PetscObjectState    state;
804   PetscErrorCode      ierr;
805 
806   PetscFunctionBegin;
807   ierr = MatIsSymmetricKnown(A,&set,&flag);
808   if (!set || (set && !flag)) {
809     ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,C);CHKERRQ(ierr);
810     PetscFunctionReturn(0);
811   }
812 
813   a = (Mat_SeqAIJMKL*)A->spptr;
814   p = (Mat_SeqAIJMKL*)P->spptr;
815   c = (Mat_SeqAIJMKL*)C->spptr;
816   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
817   if (!a->sparse_optimized || a->state != state) {
818     MatSeqAIJMKL_create_mkl_handle(A);
819   }
820   ierr = PetscObjectStateGet((PetscObject)P,&state);CHKERRQ(ierr);
821   if (!p->sparse_optimized || p->state != state) {
822     MatSeqAIJMKL_create_mkl_handle(P);
823   }
824   csrA = a->csrA;
825   csrP = p->csrA;
826   csrC = c->csrA;
827   descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
828   descr_type_sym.mode = SPARSE_FILL_MODE_LOWER;
829   descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
830 
831   /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
832   stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FINALIZE_MULT);
833   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to finalize mkl_sparse_sypr");
834 
835   /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */
836   ierr = MatSeqAIJMKL_update_from_mkl_handle(C);CHKERRQ(ierr);
837 
838   PetscFunctionReturn(0);
839 }
840 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
841 
842 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a
843  * SeqAIJMKL matrix.  This routine is called by the MatCreate_SeqAIJMKL()
844  * routine, but can also be used to convert an assembled SeqAIJ matrix
845  * into a SeqAIJMKL one. */
846 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
847 {
848   PetscErrorCode ierr;
849   Mat            B = *newmat;
850   Mat_SeqAIJMKL  *aijmkl;
851   PetscBool      set;
852   PetscBool      sametype;
853 
854   PetscFunctionBegin;
855   if (reuse == MAT_INITIAL_MATRIX) {
856     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
857   }
858 
859   ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr);
860   if (sametype) PetscFunctionReturn(0);
861 
862   ierr     = PetscNewLog(B,&aijmkl);CHKERRQ(ierr);
863   B->spptr = (void*) aijmkl;
864 
865   /* Set function pointers for methods that we inherit from AIJ but override.
866    * We also parse some command line options below, since those determine some of the methods we point to. */
867   B->ops->duplicate        = MatDuplicate_SeqAIJMKL;
868   B->ops->assemblyend      = MatAssemblyEnd_SeqAIJMKL;
869   B->ops->destroy          = MatDestroy_SeqAIJMKL;
870 
871   aijmkl->sparse_optimized = PETSC_FALSE;
872 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
873   aijmkl->no_SpMV2 = PETSC_FALSE;  /* Default to using the SpMV2 routines if our MKL supports them. */
874 #else
875   aijmkl->no_SpMV2 = PETSC_TRUE;
876 #endif
877   aijmkl->eager_inspection = PETSC_FALSE;
878 
879   /* Parse command line options. */
880   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");CHKERRQ(ierr);
881   ierr = PetscOptionsBool("-mat_aijmkl_no_spmv2","Disable use of inspector-executor (SpMV 2) routines","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);CHKERRQ(ierr);
882   ierr = PetscOptionsBool("-mat_aijmkl_eager_inspection","Run inspection at matrix assembly time, instead of waiting until needed by an operation","None",(PetscBool)aijmkl->eager_inspection,(PetscBool*)&aijmkl->eager_inspection,&set);CHKERRQ(ierr);
883   ierr = PetscOptionsEnd();CHKERRQ(ierr);
884 #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
885   if(!aijmkl->no_SpMV2) {
886     ierr = PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize();  defaulting to non-SpMV2 routines.\n");
887     aijmkl->no_SpMV2 = PETSC_TRUE;
888   }
889 #endif
890 
891 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
892   B->ops->mult             = MatMult_SeqAIJMKL_SpMV2;
893   B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL_SpMV2;
894   B->ops->multadd          = MatMultAdd_SeqAIJMKL_SpMV2;
895   B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2;
896 # if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
897   B->ops->matmultsymbolic  = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_SpMV2;
898   B->ops->matmultnumeric   = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2;
899 #   if !defined(PETSC_USE_COMPLEX)
900   B->ops->ptapnumeric      = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2;
901 #   endif
902 # endif
903 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
904 
905 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
906   /* In MKL version 18, update 2, the old sparse BLAS interfaces were marked as deprecated. If "no_SpMV2" has been specified by the
907    * user and the old SpBLAS interfaces are deprecated in our MKL version, we use the new _SpMV2 routines (set above), but do not
908    * call mkl_sparse_optimize(), which results in the old numerical kernels (without the inspector-executor model) being used. For
909    * versions in which the older interface has not been deprecated, we use the old interface. */
910   if (aijmkl->no_SpMV2) {
911     B->ops->mult             = MatMult_SeqAIJMKL;
912     B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL;
913     B->ops->multadd          = MatMultAdd_SeqAIJMKL;
914     B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL;
915   }
916 #endif
917 
918   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr);
919 
920   ierr    = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr);
921   *newmat = B;
922   PetscFunctionReturn(0);
923 }
924 
925 /*@C
926    MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL.
927    This type inherits from AIJ and is largely identical, but uses sparse BLAS
928    routines from Intel MKL whenever possible.
929    If the installed version of MKL supports the "SpMV2" sparse
930    inspector-executor routines, then those are used by default.
931    MatMult, MatMultAdd, MatMultTranspose, MatMultTransposeAdd, MatMatMult, MatTransposeMatMult, and MatPtAP (for
932    symmetric A) operations are currently supported.
933    Note that MKL version 18, update 2 or later is required for MatPtAP/MatPtAPNumeric and MatMatMultNumeric.
934 
935    Collective
936 
937    Input Parameters:
938 +  comm - MPI communicator, set to PETSC_COMM_SELF
939 .  m - number of rows
940 .  n - number of columns
941 .  nz - number of nonzeros per row (same for all rows)
942 -  nnz - array containing the number of nonzeros in the various rows
943          (possibly different for each row) or NULL
944 
945    Output Parameter:
946 .  A - the matrix
947 
948    Options Database Keys:
949 +  -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines
950 -  -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, performing this step the first time the matrix is applied
951 
952    Notes:
953    If nnz is given then nz is ignored
954 
955    Level: intermediate
956 
957 .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues()
958 @*/
959 PetscErrorCode  MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
960 {
961   PetscErrorCode ierr;
962 
963   PetscFunctionBegin;
964   ierr = MatCreate(comm,A);CHKERRQ(ierr);
965   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
966   ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr);
967   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr);
968   PetscFunctionReturn(0);
969 }
970 
971 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A)
972 {
973   PetscErrorCode ierr;
974 
975   PetscFunctionBegin;
976   ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
977   ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
978   PetscFunctionReturn(0);
979 }
980