xref: /petsc/src/mat/impls/aij/seq/aijmkl/aijmkl.c (revision 117ef88edefbfc12e7c19efe87a19a2e1b0acd4f)
1 
2 /*
3   Defines basic operations for the MATSEQAIJMKL matrix class.
4   This class is derived from the MATSEQAIJ class and retains the
5   compressed row storage (aka Yale sparse matrix format) but uses
6   sparse BLAS operations from the Intel Math Kernel Library (MKL)
7   wherever possible.
8 */
9 
10 #include <../src/mat/impls/aij/seq/aij.h>
11 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h>
12 #include <mkl_spblas.h>
13 
14 typedef struct {
15   PetscBool           no_SpMV2;  /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */
16   PetscBool           eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */
17   PetscBool           sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
18   PetscObjectState    state;
19 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
20   sparse_matrix_t     csrA; /* "Handle" used by SpMV2 inspector-executor routines. */
21   struct matrix_descr descr;
22 #endif
23 } Mat_SeqAIJMKL;
24 
25 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType);
26 
27 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat)
28 {
29   /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */
30   /* so we will ignore 'MatType type'. */
31   PetscErrorCode ierr;
32   Mat            B       = *newmat;
33 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
34   Mat_SeqAIJMKL  *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
35 #endif
36 
37   PetscFunctionBegin;
38   if (reuse == MAT_INITIAL_MATRIX) {
39     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
40   }
41 
42   /* Reset the original function pointers. */
43   B->ops->duplicate        = MatDuplicate_SeqAIJ;
44   B->ops->assemblyend      = MatAssemblyEnd_SeqAIJ;
45   B->ops->destroy          = MatDestroy_SeqAIJ;
46   B->ops->mult             = MatMult_SeqAIJ;
47   B->ops->multtranspose    = MatMultTranspose_SeqAIJ;
48   B->ops->multadd          = MatMultAdd_SeqAIJ;
49   B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ;
50   B->ops->matmultnumeric   = MatMatMultNumeric_SeqAIJ_SeqAIJ;
51   B->ops->ptapnumeric      = MatPtAPNumeric_SeqAIJ_SeqAIJ;
52 
53   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);CHKERRQ(ierr);
54 
55 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
56   /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this
57    * simply involves destroying the MKL sparse matrix handle and then freeing
58    * the spptr pointer. */
59   if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr;
60 
61   if (aijmkl->sparse_optimized) {
62     sparse_status_t stat;
63     stat = mkl_sparse_destroy(aijmkl->csrA);
64     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set hints/complete mkl_sparse_optimize");
65   }
66 #endif
67   ierr = PetscFree(B->spptr);CHKERRQ(ierr);
68 
69   /* Change the type of B to MATSEQAIJ. */
70   ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr);
71 
72   *newmat = B;
73   PetscFunctionReturn(0);
74 }
75 
76 PetscErrorCode MatDestroy_SeqAIJMKL(Mat A)
77 {
78   PetscErrorCode ierr;
79   Mat_SeqAIJMKL  *aijmkl = (Mat_SeqAIJMKL*) A->spptr;
80 
81   PetscFunctionBegin;
82 
83   /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an
84    * spptr pointer. */
85   if (aijmkl) {
86     /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */
87 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
88     if (aijmkl->sparse_optimized) {
89       sparse_status_t stat = SPARSE_STATUS_SUCCESS;
90       stat = mkl_sparse_destroy(aijmkl->csrA);
91       if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy");
92     }
93 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
94     ierr = PetscFree(A->spptr);CHKERRQ(ierr);
95   }
96 
97   /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
98    * to destroy everything that remains. */
99   ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr);
100   /* Note that I don't call MatSetType().  I believe this is because that
101    * is only to be called when *building* a matrix.  I could be wrong, but
102    * that is how things work for the SuperLU matrix class. */
103   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
104   PetscFunctionReturn(0);
105 }
106 
107 /* MatSeqAIJKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it,
108  * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize().
109  * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix
110  * handle, creates a new one, and then calls mkl_sparse_optimize().
111  * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been
112  * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of
113  * an unoptimized matrix handle here. */
114 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A)
115 {
116 #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
117   /* If the MKL library does not have mkl_sparse_optimize(), then this routine
118    * does nothing. We make it callable anyway in this case because it cuts
119    * down on littering the code with #ifdefs. */
120   PetscFunctionBegin;
121   PetscFunctionReturn(0);
122 #else
123   Mat_SeqAIJ       *a = (Mat_SeqAIJ*)A->data;
124   Mat_SeqAIJMKL    *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
125   PetscInt         m,n;
126   MatScalar        *aa;
127   PetscInt         *aj,*ai;
128   sparse_status_t  stat;
129   PetscErrorCode   ierr;
130 
131   PetscFunctionBegin;
132 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
133   /* For MKL versions that still support the old, non-inspector-executor interfaces versions, we simply exit here if the no_SpMV2
134    * option has been specified. For versions that have deprecated the old interfaces (version 18, update 2 and later), we must
135    * use the new inspector-executor interfaces, but we can still use the old, non-inspector-executor code by not calling
136    * mkl_sparse_optimize() later. */
137   if (aijmkl->no_SpMV2) PetscFunctionReturn(0);
138 #endif
139 
140   if (aijmkl->sparse_optimized) {
141     /* Matrix has been previously assembled and optimized. Must destroy old
142      * matrix handle before running the optimization step again. */
143     stat = mkl_sparse_destroy(aijmkl->csrA);
144     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy");
145   }
146   aijmkl->sparse_optimized = PETSC_FALSE;
147 
148   /* Now perform the SpMV2 setup and matrix optimization. */
149   aijmkl->descr.type        = SPARSE_MATRIX_TYPE_GENERAL;
150   aijmkl->descr.mode        = SPARSE_FILL_MODE_LOWER;
151   aijmkl->descr.diag        = SPARSE_DIAG_NON_UNIT;
152   m = A->rmap->n;
153   n = A->cmap->n;
154   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
155   aa   = a->a;  /* Nonzero elements stored row-by-row. */
156   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
157   if ((a->nz!=0) & !(A->structure_only)) {
158     /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
159      * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
160     stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa);
161     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle");
162     stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
163     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set mv_hint");
164     stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
165     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set memory_hint");
166     if (!aijmkl->no_SpMV2) {
167       stat = mkl_sparse_optimize(aijmkl->csrA);
168       if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_optimize");
169     }
170     aijmkl->sparse_optimized = PETSC_TRUE;
171     ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr);
172   }
173 
174   PetscFunctionReturn(0);
175 #endif
176 }
177 
178 /* MatSeqAIJMKL_create_from_mkl_handle() creates a sequential AIJMKL matrix from an MKL sparse matrix handle.
179  * We need this to implement MatMatMult() using the MKL inspector-executor routines, which return an (unoptimized)
180  * matrix handle.
181  * Note: This routine simply destroys and replaces the original matrix if MAT_REUSE_MATRIX has been specified, as
182  * there is no good alternative. */
183 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
184 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,MatReuse reuse,Mat *mat)
185 {
186   PetscErrorCode      ierr;
187   sparse_status_t     stat;
188   sparse_index_base_t indexing;
189   PetscInt            nrows, ncols;
190   PetscInt            *aj,*ai,*dummy;
191   MatScalar           *aa;
192   Mat                 A;
193   Mat_SeqAIJMKL       *aijmkl;
194 
195   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
196   stat = mkl_sparse_x_export_csr(csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);
197   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");
198 
199   if (reuse == MAT_REUSE_MATRIX) {
200     ierr = MatDestroy(mat);CHKERRQ(ierr);
201   }
202   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
203   ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
204   ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,nrows,ncols);CHKERRQ(ierr);
205   /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported
206    * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and
207    * they will be destroyed when the MKL handle is destroyed.
208    * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */
209   ierr = MatSeqAIJSetPreallocationCSR(A,ai,aj,aa);CHKERRQ(ierr);
210 
211   /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle.
212    * Now turn it into a MATSEQAIJMKL. */
213   ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
214 
215   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
216   aijmkl->csrA = csrA;
217 
218   /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but
219    * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one,
220    * and just need to be able to run the MKL optimization step. */
221   aijmkl->descr.type        = SPARSE_MATRIX_TYPE_GENERAL;
222   aijmkl->descr.mode        = SPARSE_FILL_MODE_LOWER;
223   aijmkl->descr.diag        = SPARSE_DIAG_NON_UNIT;
224   stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
225   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set mv_hint");
226   stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
227   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set memory_hint");
228   if (!aijmkl->no_SpMV2) {
229     stat = mkl_sparse_optimize(aijmkl->csrA);
230     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_optimize");
231   }
232   aijmkl->sparse_optimized = PETSC_TRUE;
233   ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr);
234 
235   *mat = A;
236   PetscFunctionReturn(0);
237 }
238 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
239 
240 /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle.
241  * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in
242  * MatMatMultNumeric(). */
243 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
244 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A)
245 {
246   PetscInt            i;
247   PetscInt            nrows,ncols;
248   PetscInt            nz;
249   PetscInt            *ai,*aj,*dummy;
250   PetscScalar         *aa;
251   PetscErrorCode      ierr;
252   Mat_SeqAIJMKL       *aijmkl;
253   sparse_status_t     stat;
254   sparse_index_base_t indexing;
255 
256   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
257 
258   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
259   stat = mkl_sparse_x_export_csr(aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);
260   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");
261 
262   /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc
263    * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */
264   for (i=0; i<nrows; i++) {
265     nz = ai[i+1] - ai[i];
266     ierr = MatSetValues_SeqAIJ(A, 1, &i, nz, aj+ai[i], aa+ai[i], INSERT_VALUES);CHKERRQ(ierr);
267   }
268 
269   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
270   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
271 
272   ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr);
273   /* We mark our matrix as having a valid, optimized MKL handle.
274    * TODO: It is valid, but I am not sure if it is optimized. Need to ask MKL developers. */
275   aijmkl->sparse_optimized = PETSC_TRUE;
276 
277   PetscFunctionReturn(0);
278 }
279 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
280 
281 PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
282 {
283   PetscErrorCode ierr;
284   Mat_SeqAIJMKL  *aijmkl;
285   Mat_SeqAIJMKL  *aijmkl_dest;
286 
287   PetscFunctionBegin;
288   ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr);
289   aijmkl      = (Mat_SeqAIJMKL*) A->spptr;
290   aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr;
291   ierr = PetscArraycpy(aijmkl_dest,aijmkl,1);CHKERRQ(ierr);
292   aijmkl_dest->sparse_optimized = PETSC_FALSE;
293   if (aijmkl->eager_inspection) {
294     ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
295   }
296   PetscFunctionReturn(0);
297 }
298 
299 PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode)
300 {
301   PetscErrorCode  ierr;
302   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
303   Mat_SeqAIJMKL   *aijmkl;
304 
305   PetscFunctionBegin;
306   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
307 
308   /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some
309    * extra information and some different methods, call the AssemblyEnd
310    * routine for a MATSEQAIJ.
311    * I'm not sure if this is the best way to do this, but it avoids
312    * a lot of code duplication. */
313   a->inode.use = PETSC_FALSE;  /* Must disable: otherwise the MKL routines won't get used. */
314   ierr = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr);
315 
316   /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks).
317    * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */
318   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
319   if (aijmkl->eager_inspection) {
320     ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
321   }
322 
323   PetscFunctionReturn(0);
324 }
325 
326 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
327 PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy)
328 {
329   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
330   const PetscScalar *x;
331   PetscScalar       *y;
332   const MatScalar   *aa;
333   PetscErrorCode    ierr;
334   PetscInt          m=A->rmap->n;
335   PetscInt          n=A->cmap->n;
336   PetscScalar       alpha = 1.0;
337   PetscScalar       beta = 0.0;
338   const PetscInt    *aj,*ai;
339   char              matdescra[6];
340 
341 
342   /* Variables not in MatMult_SeqAIJ. */
343   char transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
344 
345   PetscFunctionBegin;
346   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
347   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
348   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
349   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
350   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
351   aa   = a->a;  /* Nonzero elements stored row-by-row. */
352   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
353 
354   /* Call MKL sparse BLAS routine to do the MatMult. */
355   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
356 
357   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
358   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
359   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
360   PetscFunctionReturn(0);
361 }
362 #endif
363 
364 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
365 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
366 {
367   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
368   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
369   const PetscScalar *x;
370   PetscScalar       *y;
371   PetscErrorCode    ierr;
372   sparse_status_t   stat = SPARSE_STATUS_SUCCESS;
373   PetscObjectState  state;
374 
375   PetscFunctionBegin;
376 
377   /* If there are no nonzero entries, zero yy and return immediately. */
378   if(!a->nz) {
379     PetscInt i;
380     PetscInt m=A->rmap->n;
381     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
382     for (i=0; i<m; i++) {
383       y[i] = 0.0;
384     }
385     ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
386     PetscFunctionReturn(0);
387   }
388 
389   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
390   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
391 
392   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
393    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
394    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
395   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
396   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
397     MatSeqAIJMKL_create_mkl_handle(A);
398   }
399 
400   /* Call MKL SpMV2 executor routine to do the MatMult. */
401   stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
402   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
403 
404   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
405   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
406   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
407   PetscFunctionReturn(0);
408 }
409 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
410 
411 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
412 PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy)
413 {
414   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
415   const PetscScalar *x;
416   PetscScalar       *y;
417   const MatScalar   *aa;
418   PetscErrorCode    ierr;
419   PetscInt          m=A->rmap->n;
420   PetscInt          n=A->cmap->n;
421   PetscScalar       alpha = 1.0;
422   PetscScalar       beta = 0.0;
423   const PetscInt    *aj,*ai;
424   char              matdescra[6];
425 
426   /* Variables not in MatMultTranspose_SeqAIJ. */
427   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
428 
429   PetscFunctionBegin;
430   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
431   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
432   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
433   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
434   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
435   aa   = a->a;  /* Nonzero elements stored row-by-row. */
436   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
437 
438   /* Call MKL sparse BLAS routine to do the MatMult. */
439   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
440 
441   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
442   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
443   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
444   PetscFunctionReturn(0);
445 }
446 #endif
447 
448 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
449 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
450 {
451   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
452   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
453   const PetscScalar *x;
454   PetscScalar       *y;
455   PetscErrorCode    ierr;
456   sparse_status_t   stat;
457   PetscObjectState  state;
458 
459   PetscFunctionBegin;
460 
461   /* If there are no nonzero entries, zero yy and return immediately. */
462   if(!a->nz) {
463     PetscInt i;
464     PetscInt n=A->cmap->n;
465     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
466     for (i=0; i<n; i++) {
467       y[i] = 0.0;
468     }
469     ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
470     PetscFunctionReturn(0);
471   }
472 
473   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
474   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
475 
476   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
477    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
478    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
479   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
480   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
481     MatSeqAIJMKL_create_mkl_handle(A);
482   }
483 
484   /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
485   stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
486   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
487 
488   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
489   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
490   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
491   PetscFunctionReturn(0);
492 }
493 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
494 
495 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
496 PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
497 {
498   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
499   const PetscScalar *x;
500   PetscScalar       *y,*z;
501   const MatScalar   *aa;
502   PetscErrorCode    ierr;
503   PetscInt          m=A->rmap->n;
504   PetscInt          n=A->cmap->n;
505   const PetscInt    *aj,*ai;
506   PetscInt          i;
507 
508   /* Variables not in MatMultAdd_SeqAIJ. */
509   char              transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
510   PetscScalar       alpha = 1.0;
511   PetscScalar       beta;
512   char              matdescra[6];
513 
514   PetscFunctionBegin;
515   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
516   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
517 
518   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
519   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
520   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
521   aa   = a->a;  /* Nonzero elements stored row-by-row. */
522   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
523 
524   /* Call MKL sparse BLAS routine to do the MatMult. */
525   if (zz == yy) {
526     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
527     beta = 1.0;
528     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
529   } else {
530     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
531      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
532     beta = 0.0;
533     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
534     for (i=0; i<m; i++) {
535       z[i] += y[i];
536     }
537   }
538 
539   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
540   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
541   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
542   PetscFunctionReturn(0);
543 }
544 #endif
545 
546 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
547 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
548 {
549   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
550   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
551   const PetscScalar *x;
552   PetscScalar       *y,*z;
553   PetscErrorCode    ierr;
554   PetscInt          m=A->rmap->n;
555   PetscInt          i;
556 
557   /* Variables not in MatMultAdd_SeqAIJ. */
558   sparse_status_t   stat = SPARSE_STATUS_SUCCESS;
559   PetscObjectState  state;
560 
561   PetscFunctionBegin;
562 
563   /* If there are no nonzero entries, set zz = yy and return immediately. */
564   if(!a->nz) {
565     PetscInt i;
566     ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
567     for (i=0; i<m; i++) {
568       z[i] = y[i];
569     }
570     ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
571     PetscFunctionReturn(0);
572   }
573 
574   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
575   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
576 
577   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
578    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
579    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
580   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
581   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
582     MatSeqAIJMKL_create_mkl_handle(A);
583   }
584 
585   /* Call MKL sparse BLAS routine to do the MatMult. */
586   if (zz == yy) {
587     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
588      * with alpha and beta both set to 1.0. */
589     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
590     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
591   } else {
592     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
593      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
594     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
595     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
596     for (i=0; i<m; i++) {
597       z[i] += y[i];
598     }
599   }
600 
601   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
602   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
603   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
604   PetscFunctionReturn(0);
605 }
606 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
607 
608 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
609 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
610 {
611   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
612   const PetscScalar *x;
613   PetscScalar       *y,*z;
614   const MatScalar   *aa;
615   PetscErrorCode    ierr;
616   PetscInt          m=A->rmap->n;
617   PetscInt          n=A->cmap->n;
618   const PetscInt    *aj,*ai;
619   PetscInt          i;
620 
621   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
622   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
623   PetscScalar       alpha = 1.0;
624   PetscScalar       beta;
625   char              matdescra[6];
626 
627   PetscFunctionBegin;
628   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
629   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
630 
631   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
632   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
633   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
634   aa   = a->a;  /* Nonzero elements stored row-by-row. */
635   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
636 
637   /* Call MKL sparse BLAS routine to do the MatMult. */
638   if (zz == yy) {
639     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
640     beta = 1.0;
641     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
642   } else {
643     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
644      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
645     beta = 0.0;
646     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
647     for (i=0; i<n; i++) {
648       z[i] += y[i];
649     }
650   }
651 
652   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
653   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
654   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
655   PetscFunctionReturn(0);
656 }
657 #endif
658 
659 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
660 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
661 {
662   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
663   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
664   const PetscScalar *x;
665   PetscScalar       *y,*z;
666   PetscErrorCode    ierr;
667   PetscInt          n=A->cmap->n;
668   PetscInt          i;
669   PetscObjectState  state;
670 
671   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
672   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
673 
674   PetscFunctionBegin;
675 
676   /* If there are no nonzero entries, set zz = yy and return immediately. */
677   if(!a->nz) {
678     PetscInt i;
679     ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
680     for (i=0; i<n; i++) {
681       z[i] = y[i];
682     }
683     ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
684     PetscFunctionReturn(0);
685   }
686 
687   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
688   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
689 
690   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
691    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
692    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
693   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
694   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
695     MatSeqAIJMKL_create_mkl_handle(A);
696   }
697 
698   /* Call MKL sparse BLAS routine to do the MatMult. */
699   if (zz == yy) {
700     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
701      * with alpha and beta both set to 1.0. */
702     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
703     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
704   } else {
705     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
706      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
707     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
708     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
709     for (i=0; i<n; i++) {
710       z[i] += y[i];
711     }
712   }
713 
714   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
715   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
716   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
717   PetscFunctionReturn(0);
718 }
719 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
720 
721 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
722 PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,Mat C)
723 {
724   Mat_SeqAIJMKL       *a, *b, *c;
725   sparse_matrix_t     csrA, csrB, csrC;
726   PetscErrorCode      ierr;
727   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
728   struct matrix_descr descr_type_gen;
729   PetscObjectState    state;
730 
731   PetscFunctionBegin;
732   a = (Mat_SeqAIJMKL*)A->spptr;
733   b = (Mat_SeqAIJMKL*)B->spptr;
734   c = (Mat_SeqAIJMKL*)C->spptr;
735   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
736   if (!a->sparse_optimized || a->state != state) {
737     MatSeqAIJMKL_create_mkl_handle(A);
738   }
739   ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr);
740   if (!b->sparse_optimized || b->state != state) {
741     MatSeqAIJMKL_create_mkl_handle(B);
742   }
743   csrA = a->csrA;
744   csrB = b->csrA;
745   csrC = c->csrA;
746   descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
747 
748   stat = mkl_sparse_sp2m(SPARSE_OPERATION_NON_TRANSPOSE,descr_type_gen,csrA,
749                          SPARSE_OPERATION_NON_TRANSPOSE,descr_type_gen,csrB,
750                          SPARSE_STAGE_FINALIZE_MULT,&csrC);
751 
752   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete numerical stage of sparse matrix-matrix multiply");
753 
754   /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */
755   ierr = MatSeqAIJMKL_update_from_mkl_handle(C);CHKERRQ(ierr);
756 
757   PetscFunctionReturn(0);
758 }
759 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
760 
761 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
762 PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat P,Mat C)
763 {
764   Mat_SeqAIJMKL       *a, *p, *c;
765   sparse_matrix_t     csrA, csrP, csrC;
766   PetscBool           set, flag;
767   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
768   struct matrix_descr descr_type_sym;
769   PetscObjectState    state;
770   PetscErrorCode      ierr;
771 
772   PetscFunctionBegin;
773   ierr = MatIsSymmetricKnown(A,&set,&flag);
774   if (!set || (set && !flag)) {
775     ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,C);CHKERRQ(ierr);
776     PetscFunctionReturn(0);
777   }
778 
779   a = (Mat_SeqAIJMKL*)A->spptr;
780   p = (Mat_SeqAIJMKL*)P->spptr;
781   c = (Mat_SeqAIJMKL*)C->spptr;
782   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
783   if (!a->sparse_optimized || a->state != state) {
784     MatSeqAIJMKL_create_mkl_handle(A);
785   }
786   ierr = PetscObjectStateGet((PetscObject)P,&state);CHKERRQ(ierr);
787   if (!p->sparse_optimized || p->state != state) {
788     MatSeqAIJMKL_create_mkl_handle(P);
789   }
790   csrA = a->csrA;
791   csrP = p->csrA;
792   csrC = c->csrA;
793   descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
794   descr_type_sym.mode = SPARSE_FILL_MODE_LOWER;
795   descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
796 
797   /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
798   stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FINALIZE_MULT);
799   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to finalize mkl_sparse_sypr");
800 
801   /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */
802   ierr = MatSeqAIJMKL_update_from_mkl_handle(C);CHKERRQ(ierr);
803 
804   PetscFunctionReturn(0);
805 }
806 #endif
807 
808 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a
809  * SeqAIJMKL matrix.  This routine is called by the MatCreate_SeqAIJMKL()
810  * routine, but can also be used to convert an assembled SeqAIJ matrix
811  * into a SeqAIJMKL one. */
812 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
813 {
814   PetscErrorCode ierr;
815   Mat            B = *newmat;
816   Mat_SeqAIJMKL  *aijmkl;
817   PetscBool      set;
818   PetscBool      sametype;
819 
820   PetscFunctionBegin;
821   if (reuse == MAT_INITIAL_MATRIX) {
822     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
823   }
824 
825   ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr);
826   if (sametype) PetscFunctionReturn(0);
827 
828   ierr     = PetscNewLog(B,&aijmkl);CHKERRQ(ierr);
829   B->spptr = (void*) aijmkl;
830 
831   /* Set function pointers for methods that we inherit from AIJ but override.
832    * We also parse some command line options below, since those determine some of the methods we point to. */
833   B->ops->duplicate        = MatDuplicate_SeqAIJMKL;
834   B->ops->assemblyend      = MatAssemblyEnd_SeqAIJMKL;
835   B->ops->destroy          = MatDestroy_SeqAIJMKL;
836 
837   aijmkl->sparse_optimized = PETSC_FALSE;
838 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
839   aijmkl->no_SpMV2 = PETSC_FALSE;  /* Default to using the SpMV2 routines if our MKL supports them. */
840 #else
841   aijmkl->no_SpMV2 = PETSC_TRUE;
842 #endif
843   aijmkl->eager_inspection = PETSC_FALSE;
844 
845   /* Parse command line options. */
846   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");CHKERRQ(ierr);
847   ierr = PetscOptionsBool("-mat_aijmkl_no_spmv2","Disable use of inspector-executor (SpMV 2) routines","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);CHKERRQ(ierr);
848   ierr = PetscOptionsBool("-mat_aijmkl_eager_inspection","Run inspection at matrix assembly time, instead of waiting until needed by an operation","None",(PetscBool)aijmkl->eager_inspection,(PetscBool*)&aijmkl->eager_inspection,&set);CHKERRQ(ierr);
849   ierr = PetscOptionsEnd();CHKERRQ(ierr);
850 #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
851   if(!aijmkl->no_SpMV2) {
852     ierr = PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize();  defaulting to non-SpMV2 routines.\n");
853     aijmkl->no_SpMV2 = PETSC_TRUE;
854   }
855 #endif
856 
857 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
858   B->ops->mult             = MatMult_SeqAIJMKL_SpMV2;
859   B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL_SpMV2;
860   B->ops->multadd          = MatMultAdd_SeqAIJMKL_SpMV2;
861   B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2;
862 # if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
863   B->ops->matmultnumeric   = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2;
864 #   if !defined(PETSC_USE_COMPLEX)
865   B->ops->ptapnumeric      = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2;
866 #   endif
867 # endif
868 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
869 
870 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
871   /* In MKL version 18, update 2, the old sparse BLAS interfaces were marked as deprecated. If "no_SpMV2" has been specified by the
872    * user and the old SpBLAS interfaces are deprecated in our MKL version, we use the new _SpMV2 routines (set above), but do not
873    * call mkl_sparse_optimize(), which results in the old numerical kernels (without the inspector-executor model) being used. For
874    * versions in which the older interface has not been deprecated, we use the old interface. */
875   if (aijmkl->no_SpMV2) {
876     B->ops->mult             = MatMult_SeqAIJMKL;
877     B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL;
878     B->ops->multadd          = MatMultAdd_SeqAIJMKL;
879     B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL;
880   }
881 #endif
882 
883   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr);
884 
885   ierr    = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr);
886   *newmat = B;
887   PetscFunctionReturn(0);
888 }
889 
890 /*@C
891    MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL.
892    This type inherits from AIJ and is largely identical, but uses sparse BLAS
893    routines from Intel MKL whenever possible.
894    If the installed version of MKL supports the "SpMV2" sparse
895    inspector-executor routines, then those are used by default.
896    MatMult, MatMultAdd, MatMultTranspose, MatMultTransposeAdd, MatMatMult, MatTransposeMatMult, and MatPtAP (for
897    symmetric A) operations are currently supported.
898    Note that MKL version 18, update 2 or later is required for MatPtAP/MatPtAPNumeric and MatMatMultNumeric.
899 
900    Collective
901 
902    Input Parameters:
903 +  comm - MPI communicator, set to PETSC_COMM_SELF
904 .  m - number of rows
905 .  n - number of columns
906 .  nz - number of nonzeros per row (same for all rows)
907 -  nnz - array containing the number of nonzeros in the various rows
908          (possibly different for each row) or NULL
909 
910    Output Parameter:
911 .  A - the matrix
912 
913    Options Database Keys:
914 +  -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines
915 -  -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, performing this step the first time the matrix is applied
916 
917    Notes:
918    If nnz is given then nz is ignored
919 
920    Level: intermediate
921 
922 .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues()
923 @*/
924 PetscErrorCode  MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
925 {
926   PetscErrorCode ierr;
927 
928   PetscFunctionBegin;
929   ierr = MatCreate(comm,A);CHKERRQ(ierr);
930   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
931   ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr);
932   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr);
933   PetscFunctionReturn(0);
934 }
935 
936 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A)
937 {
938   PetscErrorCode ierr;
939 
940   PetscFunctionBegin;
941   ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
942   ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
943   PetscFunctionReturn(0);
944 }
945