xref: /petsc/src/mat/impls/aij/seq/aijmkl/aijmkl.c (revision f2ed2dc71a2ab9ffda85eae8afa0cbea9ed570de)
1 
2 /*
3   Defines basic operations for the MATSEQAIJMKL matrix class.
4   This class is derived from the MATSEQAIJ class and retains the
5   compressed row storage (aka Yale sparse matrix format) but uses
6   sparse BLAS operations from the Intel Math Kernel Library (MKL)
7   wherever possible.
8 */
9 
10 #include <../src/mat/impls/aij/seq/aij.h>
11 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h>
12 #include <mkl_spblas.h>
13 
14 typedef struct {
15   PetscBool           no_SpMV2;  /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */
16   PetscBool           eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */
17   PetscBool           sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
18   PetscObjectState    state;
19 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
20   sparse_matrix_t     csrA; /* "Handle" used by SpMV2 inspector-executor routines. */
21   struct matrix_descr descr;
22 #endif
23 } Mat_SeqAIJMKL;
24 
25 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType);
26 
27 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat)
28 {
29   /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */
30   /* so we will ignore 'MatType type'. */
31   PetscErrorCode ierr;
32   Mat            B       = *newmat;
33 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
34   Mat_SeqAIJMKL  *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
35 #endif
36 
37   PetscFunctionBegin;
38   if (reuse == MAT_INITIAL_MATRIX) {
39     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
40   }
41 
42   /* Reset the original function pointers. */
43   B->ops->duplicate               = MatDuplicate_SeqAIJ;
44   B->ops->assemblyend             = MatAssemblyEnd_SeqAIJ;
45   B->ops->destroy                 = MatDestroy_SeqAIJ;
46   B->ops->mult                    = MatMult_SeqAIJ;
47   B->ops->multtranspose           = MatMultTranspose_SeqAIJ;
48   B->ops->multadd                 = MatMultAdd_SeqAIJ;
49   B->ops->multtransposeadd        = MatMultTransposeAdd_SeqAIJ;
50   B->ops->productsetfromoptions   = MatProductSetFromOptions_SeqAIJ;
51   B->ops->matmultsymbolic         = MatMatMultSymbolic_SeqAIJ_SeqAIJ;
52   B->ops->matmultnumeric          = MatMatMultNumeric_SeqAIJ_SeqAIJ;
53   B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ;
54   B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ;
55   B->ops->ptapnumeric             = MatPtAPNumeric_SeqAIJ_SeqAIJ;
56 
57   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);CHKERRQ(ierr);
58 
59 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
60   if (!aijmkl->no_SpMV2) {
61 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
62     ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr);
63 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
64   }
65 
66   /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this
67    * simply involves destroying the MKL sparse matrix handle and then freeing
68    * the spptr pointer. */
69   if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr;
70 
71   if (aijmkl->sparse_optimized) {
72     sparse_status_t stat;
73     stat = mkl_sparse_destroy(aijmkl->csrA);
74     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set hints/complete mkl_sparse_optimize()");
75   }
76 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
77   ierr = PetscFree(B->spptr);CHKERRQ(ierr);
78 
79   /* Change the type of B to MATSEQAIJ. */
80   ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr);
81 
82   *newmat = B;
83   PetscFunctionReturn(0);
84 }
85 
86 PetscErrorCode MatDestroy_SeqAIJMKL(Mat A)
87 {
88   PetscErrorCode ierr;
89   Mat_SeqAIJMKL  *aijmkl = (Mat_SeqAIJMKL*) A->spptr;
90 
91   PetscFunctionBegin;
92 
93   /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an spptr pointer. */
94   if (aijmkl) {
95     /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */
96 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
97     if (aijmkl->sparse_optimized) {
98       sparse_status_t stat = SPARSE_STATUS_SUCCESS;
99       stat = mkl_sparse_destroy(aijmkl->csrA);
100       if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_destroy()");
101     }
102 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
103     ierr = PetscFree(A->spptr);CHKERRQ(ierr);
104   }
105 
106   /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
107    * to destroy everything that remains. */
108   ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr);
109   /* Note that I don't call MatSetType().  I believe this is because that
110    * is only to be called when *building* a matrix.  I could be wrong, but
111    * that is how things work for the SuperLU matrix class. */
112   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
113   PetscFunctionReturn(0);
114 }
115 
116 /* MatSeqAIJMKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it,
117  * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize().
118  * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix
119  * handle, creates a new one, and then calls mkl_sparse_optimize().
120  * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been
121  * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of
122  * an unoptimized matrix handle here. */
123 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A)
124 {
125 #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
126   /* If the MKL library does not have mkl_sparse_optimize(), then this routine
127    * does nothing. We make it callable anyway in this case because it cuts
128    * down on littering the code with #ifdefs. */
129   PetscFunctionBegin;
130   PetscFunctionReturn(0);
131 #else
132   Mat_SeqAIJ       *a = (Mat_SeqAIJ*)A->data;
133   Mat_SeqAIJMKL    *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
134   PetscInt         m,n;
135   MatScalar        *aa;
136   PetscInt         *aj,*ai;
137   sparse_status_t  stat;
138   PetscErrorCode   ierr;
139 
140   PetscFunctionBegin;
141 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
142   /* For MKL versions that still support the old, non-inspector-executor interfaces versions, we simply exit here if the no_SpMV2
143    * option has been specified. For versions that have deprecated the old interfaces (version 18, update 2 and later), we must
144    * use the new inspector-executor interfaces, but we can still use the old, non-inspector-executor code by not calling
145    * mkl_sparse_optimize() later. */
146   if (aijmkl->no_SpMV2) PetscFunctionReturn(0);
147 #endif
148 
149   if (aijmkl->sparse_optimized) {
150     /* Matrix has been previously assembled and optimized. Must destroy old
151      * matrix handle before running the optimization step again. */
152     stat = mkl_sparse_destroy(aijmkl->csrA);
153     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_destroy()");
154   }
155   aijmkl->sparse_optimized = PETSC_FALSE;
156 
157   /* Now perform the SpMV2 setup and matrix optimization. */
158   aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
159   aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
160   aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
161   m = A->rmap->n;
162   n = A->cmap->n;
163   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
164   aa   = a->a;  /* Nonzero elements stored row-by-row. */
165   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
166   if (a->nz && aa && !A->structure_only) {
167     /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
168      * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
169     stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa);
170     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle, mkl_sparse_x_create_csr()");
171     stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
172     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_set_mv_hint()");
173     stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
174     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_set_memory_hint()");
175     if (!aijmkl->no_SpMV2) {
176       stat = mkl_sparse_optimize(aijmkl->csrA);
177       if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_optimize()");
178     }
179     aijmkl->sparse_optimized = PETSC_TRUE;
180     ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr);
181   } else {
182     aijmkl->csrA = PETSC_NULL;
183   }
184 
185   PetscFunctionReturn(0);
186 #endif
187 }
188 
189 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
190 /* Take an already created but empty matrix and set up the nonzero structure from an MKL sparse matrix handle. */
191 static PetscErrorCode MatSeqAIJMKL_setup_structure_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,PetscInt nrows,PetscInt ncols,Mat A)
192 {
193   PetscErrorCode      ierr;
194   sparse_status_t     stat;
195   sparse_index_base_t indexing;
196   PetscInt            m,n;
197   PetscInt            *aj,*ai,*dummy;
198   MatScalar           *aa;
199   Mat_SeqAIJMKL       *aijmkl;
200 
201   if (csrA) {
202     /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
203     stat = mkl_sparse_x_export_csr(csrA,&indexing,&m,&n,&ai,&dummy,&aj,&aa);
204     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");
205     if ((m != nrows) || (n != ncols)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Number of rows/columns does not match those from mkl_sparse_x_export_csr()");
206   } else {
207     aj = ai = PETSC_NULL;
208     aa = PETSC_NULL;
209   }
210 
211   ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
212   ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,nrows,ncols);CHKERRQ(ierr);
213   /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported
214    * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and
215    * they will be destroyed when the MKL handle is destroyed.
216    * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */
217   if (csrA) {
218     ierr = MatSeqAIJSetPreallocationCSR(A,ai,aj,NULL);CHKERRQ(ierr);
219   } else {
220     /* Since MatSeqAIJSetPreallocationCSR does initial set up and assembly begin/end, we must do that ourselves here. */
221     ierr = MatSetUp(A);CHKERRQ(ierr);
222     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
223     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
224   }
225 
226   /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle.
227    * Now turn it into a MATSEQAIJMKL. */
228   ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
229 
230   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
231   aijmkl->csrA = csrA;
232 
233   /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but
234    * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one,
235    * and just need to be able to run the MKL optimization step. */
236   aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
237   aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
238   aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
239   if (csrA) {
240     stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
241     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_set_mv_hint()");
242     stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
243     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_set_memory_hint()");
244   }
245   ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr);
246   PetscFunctionReturn(0);
247 }
248 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
249 
250 /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle.
251  * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in
252  * MatMatMultNumeric(). */
253 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
254 static PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A)
255 {
256   PetscInt            i;
257   PetscInt            nrows,ncols;
258   PetscInt            nz;
259   PetscInt            *ai,*aj,*dummy;
260   PetscScalar         *aa;
261   PetscErrorCode      ierr;
262   Mat_SeqAIJMKL       *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
263   sparse_status_t     stat;
264   sparse_index_base_t indexing;
265 
266   /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */
267   if (!aijmkl->csrA) PetscFunctionReturn(0);
268 
269   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
270   stat = mkl_sparse_x_export_csr(aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);
271   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");
272 
273   /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc
274    * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */
275   for (i=0; i<nrows; i++) {
276     nz = ai[i+1] - ai[i];
277     ierr = MatSetValues_SeqAIJ(A, 1, &i, nz, aj+ai[i], aa+ai[i], INSERT_VALUES);CHKERRQ(ierr);
278   }
279 
280   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
281   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
282 
283   ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr);
284   /* We mark our matrix as having a valid, optimized MKL handle.
285    * TODO: It is valid, but I am not sure if it is optimized. Need to ask MKL developers. */
286   aijmkl->sparse_optimized = PETSC_TRUE;
287   PetscFunctionReturn(0);
288 }
289 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
290 
291 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
292 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_view_mkl_handle(Mat A,PetscViewer viewer)
293 {
294   PetscInt            i,j,k;
295   PetscInt            nrows,ncols;
296   PetscInt            nz;
297   PetscInt            *ai,*aj,*dummy;
298   PetscScalar         *aa;
299   PetscErrorCode      ierr;
300   Mat_SeqAIJMKL       *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
301   sparse_status_t     stat;
302   sparse_index_base_t indexing;
303 
304   ierr = PetscViewerASCIIPrintf(viewer,"Contents of MKL sparse matrix handle for MATSEQAIJMKL object:\n");CHKERRQ(ierr);
305 
306   /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */
307   if (!aijmkl->csrA) {
308     ierr = PetscViewerASCIIPrintf(viewer,"MKL matrix handle is NULL\n");CHKERRQ(ierr);
309     PetscFunctionReturn(0);
310   }
311 
312   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
313   stat = mkl_sparse_x_export_csr(aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);
314   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");
315 
316   k = 0;
317   for (i=0; i<nrows; i++) {
318     ierr = PetscViewerASCIIPrintf(viewer,"row %D: ",i);CHKERRQ(ierr);
319     nz = ai[i+1] - ai[i];
320     for (j=0; j<nz; j++) {
321       if (aa) {
322         ierr = PetscViewerASCIIPrintf(viewer,"(%D, %g)  ",aj[k],aa[k]);CHKERRQ(ierr);
323       } else {
324         ierr = PetscViewerASCIIPrintf(viewer,"(%D, NULL)",aj[k]);CHKERRQ(ierr);
325       }
326       k++;
327     }
328     ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
329   }
330   PetscFunctionReturn(0);
331 }
332 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
333 
334 PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
335 {
336   PetscErrorCode ierr;
337   Mat_SeqAIJMKL  *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
338   Mat_SeqAIJMKL  *aijmkl_dest;
339 
340   PetscFunctionBegin;
341   ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr);
342   aijmkl_dest = (Mat_SeqAIJMKL*)(*M)->spptr;
343   ierr = PetscArraycpy(aijmkl_dest,aijmkl,1);CHKERRQ(ierr);
344   aijmkl_dest->sparse_optimized = PETSC_FALSE;
345   if (aijmkl->eager_inspection) {
346     ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
347   }
348   PetscFunctionReturn(0);
349 }
350 
351 PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode)
352 {
353   PetscErrorCode  ierr;
354   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
355   Mat_SeqAIJMKL   *aijmkl;
356 
357   PetscFunctionBegin;
358   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
359 
360   /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some
361    * extra information and some different methods, call the AssemblyEnd
362    * routine for a MATSEQAIJ.
363    * I'm not sure if this is the best way to do this, but it avoids
364    * a lot of code duplication. */
365   a->inode.use = PETSC_FALSE;  /* Must disable: otherwise the MKL routines won't get used. */
366   ierr = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr);
367 
368   /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks).
369    * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */
370   aijmkl = (Mat_SeqAIJMKL*)A->spptr;
371   if (aijmkl->eager_inspection) {
372     ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
373   }
374 
375   PetscFunctionReturn(0);
376 }
377 
378 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
379 PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy)
380 {
381   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
382   const PetscScalar *x;
383   PetscScalar       *y;
384   const MatScalar   *aa;
385   PetscErrorCode    ierr;
386   PetscInt          m = A->rmap->n;
387   PetscInt          n = A->cmap->n;
388   PetscScalar       alpha = 1.0;
389   PetscScalar       beta = 0.0;
390   const PetscInt    *aj,*ai;
391   char              matdescra[6];
392 
393 
394   /* Variables not in MatMult_SeqAIJ. */
395   char transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
396 
397   PetscFunctionBegin;
398   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
399   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
400   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
401   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
402   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
403   aa   = a->a;  /* Nonzero elements stored row-by-row. */
404   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
405 
406   /* Call MKL sparse BLAS routine to do the MatMult. */
407   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
408 
409   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
410   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
411   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
412   PetscFunctionReturn(0);
413 }
414 #endif
415 
416 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
417 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
418 {
419   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
420   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
421   const PetscScalar *x;
422   PetscScalar       *y;
423   PetscErrorCode    ierr;
424   sparse_status_t   stat = SPARSE_STATUS_SUCCESS;
425   PetscObjectState  state;
426 
427   PetscFunctionBegin;
428 
429   /* If there are no nonzero entries, zero yy and return immediately. */
430   if(!a->nz) {
431     PetscInt i;
432     PetscInt m=A->rmap->n;
433     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
434     for (i=0; i<m; i++) {
435       y[i] = 0.0;
436     }
437     ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
438     PetscFunctionReturn(0);
439   }
440 
441   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
442   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
443 
444   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
445    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
446    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
447   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
448   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
449     MatSeqAIJMKL_create_mkl_handle(A);
450   }
451 
452   /* Call MKL SpMV2 executor routine to do the MatMult. */
453   stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
454   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()");
455 
456   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
457   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
458   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
459   PetscFunctionReturn(0);
460 }
461 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
462 
463 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
464 PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy)
465 {
466   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
467   const PetscScalar *x;
468   PetscScalar       *y;
469   const MatScalar   *aa;
470   PetscErrorCode    ierr;
471   PetscInt          m = A->rmap->n;
472   PetscInt          n = A->cmap->n;
473   PetscScalar       alpha = 1.0;
474   PetscScalar       beta = 0.0;
475   const PetscInt    *aj,*ai;
476   char              matdescra[6];
477 
478   /* Variables not in MatMultTranspose_SeqAIJ. */
479   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
480 
481   PetscFunctionBegin;
482   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
483   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
484   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
485   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
486   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
487   aa   = a->a;  /* Nonzero elements stored row-by-row. */
488   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
489 
490   /* Call MKL sparse BLAS routine to do the MatMult. */
491   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
492 
493   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
494   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
495   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
496   PetscFunctionReturn(0);
497 }
498 #endif
499 
500 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
501 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
502 {
503   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
504   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
505   const PetscScalar *x;
506   PetscScalar       *y;
507   PetscErrorCode    ierr;
508   sparse_status_t   stat;
509   PetscObjectState  state;
510 
511   PetscFunctionBegin;
512 
513   /* If there are no nonzero entries, zero yy and return immediately. */
514   if(!a->nz) {
515     PetscInt i;
516     PetscInt n=A->cmap->n;
517     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
518     for (i=0; i<n; i++) {
519       y[i] = 0.0;
520     }
521     ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
522     PetscFunctionReturn(0);
523   }
524 
525   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
526   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
527 
528   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
529    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
530    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
531   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
532   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
533     MatSeqAIJMKL_create_mkl_handle(A);
534   }
535 
536   /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
537   stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
538   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()");
539 
540   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
541   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
542   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
543   PetscFunctionReturn(0);
544 }
545 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
546 
547 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
548 PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
549 {
550   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
551   const PetscScalar *x;
552   PetscScalar       *y,*z;
553   const MatScalar   *aa;
554   PetscErrorCode    ierr;
555   PetscInt          m = A->rmap->n;
556   PetscInt          n = A->cmap->n;
557   const PetscInt    *aj,*ai;
558   PetscInt          i;
559 
560   /* Variables not in MatMultAdd_SeqAIJ. */
561   char              transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
562   PetscScalar       alpha = 1.0;
563   PetscScalar       beta;
564   char              matdescra[6];
565 
566   PetscFunctionBegin;
567   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
568   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
569 
570   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
571   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
572   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
573   aa   = a->a;  /* Nonzero elements stored row-by-row. */
574   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
575 
576   /* Call MKL sparse BLAS routine to do the MatMult. */
577   if (zz == yy) {
578     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
579     beta = 1.0;
580     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
581   } else {
582     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
583      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
584     beta = 0.0;
585     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
586     for (i=0; i<m; i++) {
587       z[i] += y[i];
588     }
589   }
590 
591   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
592   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
593   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
594   PetscFunctionReturn(0);
595 }
596 #endif
597 
598 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
599 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
600 {
601   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
602   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
603   const PetscScalar *x;
604   PetscScalar       *y,*z;
605   PetscErrorCode    ierr;
606   PetscInt          m = A->rmap->n;
607   PetscInt          i;
608 
609   /* Variables not in MatMultAdd_SeqAIJ. */
610   sparse_status_t   stat = SPARSE_STATUS_SUCCESS;
611   PetscObjectState  state;
612 
613   PetscFunctionBegin;
614 
615   /* If there are no nonzero entries, set zz = yy and return immediately. */
616   if(!a->nz) {
617     PetscInt i;
618     ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
619     for (i=0; i<m; i++) {
620       z[i] = y[i];
621     }
622     ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
623     PetscFunctionReturn(0);
624   }
625 
626   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
627   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
628 
629   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
630    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
631    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
632   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
633   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
634     MatSeqAIJMKL_create_mkl_handle(A);
635   }
636 
637   /* Call MKL sparse BLAS routine to do the MatMult. */
638   if (zz == yy) {
639     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
640      * with alpha and beta both set to 1.0. */
641     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
642     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()");
643   } else {
644     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
645      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
646     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
647     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()");
648     for (i=0; i<m; i++) {
649       z[i] += y[i];
650     }
651   }
652 
653   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
654   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
655   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
656   PetscFunctionReturn(0);
657 }
658 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
659 
660 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
661 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
662 {
663   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
664   const PetscScalar *x;
665   PetscScalar       *y,*z;
666   const MatScalar   *aa;
667   PetscErrorCode    ierr;
668   PetscInt          m = A->rmap->n;
669   PetscInt          n = A->cmap->n;
670   const PetscInt    *aj,*ai;
671   PetscInt          i;
672 
673   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
674   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
675   PetscScalar       alpha = 1.0;
676   PetscScalar       beta;
677   char              matdescra[6];
678 
679   PetscFunctionBegin;
680   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
681   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
682 
683   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
684   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
685   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
686   aa   = a->a;  /* Nonzero elements stored row-by-row. */
687   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
688 
689   /* Call MKL sparse BLAS routine to do the MatMult. */
690   if (zz == yy) {
691     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
692     beta = 1.0;
693     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
694   } else {
695     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
696      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
697     beta = 0.0;
698     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
699     for (i=0; i<n; i++) {
700       z[i] += y[i];
701     }
702   }
703 
704   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
705   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
706   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
707   PetscFunctionReturn(0);
708 }
709 #endif
710 
711 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
712 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
713 {
714   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
715   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
716   const PetscScalar *x;
717   PetscScalar       *y,*z;
718   PetscErrorCode    ierr;
719   PetscInt          n = A->cmap->n;
720   PetscInt          i;
721   PetscObjectState  state;
722 
723   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
724   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
725 
726   PetscFunctionBegin;
727 
728   /* If there are no nonzero entries, set zz = yy and return immediately. */
729   if(!a->nz) {
730     PetscInt i;
731     ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
732     for (i=0; i<n; i++) {
733       z[i] = y[i];
734     }
735     ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
736     PetscFunctionReturn(0);
737   }
738 
739   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
740   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
741 
742   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
743    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
744    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
745   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
746   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
747     MatSeqAIJMKL_create_mkl_handle(A);
748   }
749 
750   /* Call MKL sparse BLAS routine to do the MatMult. */
751   if (zz == yy) {
752     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
753      * with alpha and beta both set to 1.0. */
754     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
755     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()");
756   } else {
757     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
758      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
759     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
760     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()");
761     for (i=0; i<n; i++) {
762       z[i] += y[i];
763     }
764   }
765 
766   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
767   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
768   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
769   PetscFunctionReturn(0);
770 }
771 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
772 
773 /* -------------------------- MatProduct code -------------------------- */
774 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
775 static PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(Mat A,const sparse_operation_t transA,Mat B,const sparse_operation_t transB,Mat C)
776 {
777   Mat_SeqAIJMKL       *a = (Mat_SeqAIJMKL*)A->spptr,*b = (Mat_SeqAIJMKL*)B->spptr;
778   sparse_matrix_t     csrA,csrB,csrC;
779   PetscInt            nrows,ncols;
780   PetscErrorCode      ierr;
781   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
782   struct matrix_descr descr_type_gen;
783   PetscObjectState    state;
784 
785   PetscFunctionBegin;
786   /* Determine the number of rows and columns that the result matrix C will have. We have to do this ourselves because MKL does
787    * not handle sparse matrices with zero rows or columns. */
788   if (transA == SPARSE_OPERATION_NON_TRANSPOSE) nrows = A->rmap->N;
789   else nrows = A->cmap->N;
790   if (transB == SPARSE_OPERATION_NON_TRANSPOSE) ncols = B->cmap->N;
791   else ncols = B->rmap->N;
792 
793   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
794   if (!a->sparse_optimized || a->state != state) {
795     MatSeqAIJMKL_create_mkl_handle(A);
796   }
797   ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr);
798   if (!b->sparse_optimized || b->state != state) {
799     MatSeqAIJMKL_create_mkl_handle(B);
800   }
801   csrA = a->csrA;
802   csrB = b->csrA;
803   descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
804 
805   if (csrA && csrB) {
806     stat = mkl_sparse_sp2m(transA,descr_type_gen,csrA,transB,descr_type_gen,csrB,SPARSE_STAGE_FULL_MULT_NO_VAL,&csrC);
807     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete symbolic stage of sparse matrix-matrix multiply in mkl_sparse_sp2m()");
808   } else {
809     csrC = PETSC_NULL;
810   }
811 
812   ierr = MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF,csrC,nrows,ncols,C);CHKERRQ(ierr);
813 
814   PetscFunctionReturn(0);
815 }
816 
817 PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(Mat A,const sparse_operation_t transA,Mat B,const sparse_operation_t transB,Mat C)
818 {
819   Mat_SeqAIJMKL       *a = (Mat_SeqAIJMKL*)A->spptr,*b = (Mat_SeqAIJMKL*)B->spptr,*c = (Mat_SeqAIJMKL*)C->spptr;
820   sparse_matrix_t     csrA, csrB, csrC;
821   PetscErrorCode      ierr;
822   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
823   struct matrix_descr descr_type_gen;
824   PetscObjectState    state;
825 
826   PetscFunctionBegin;
827   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
828   if (!a->sparse_optimized || a->state != state) {
829     MatSeqAIJMKL_create_mkl_handle(A);
830   }
831   ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr);
832   if (!b->sparse_optimized || b->state != state) {
833     MatSeqAIJMKL_create_mkl_handle(B);
834   }
835   csrA = a->csrA;
836   csrB = b->csrA;
837   csrC = c->csrA;
838   descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
839 
840   if (csrA && csrB) {
841     stat = mkl_sparse_sp2m(transA,descr_type_gen,csrA,transB,descr_type_gen,csrB,SPARSE_STAGE_FINALIZE_MULT,&csrC);
842     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete numerical stage of sparse matrix-matrix multiply in mkl_sparse_sp2m()");
843   } else {
844     csrC = PETSC_NULL;
845   }
846 
847   /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */
848   ierr = MatSeqAIJMKL_update_from_mkl_handle(C);CHKERRQ(ierr);
849 
850   PetscFunctionReturn(0);
851 }
852 
853 PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C)
854 {
855   PetscErrorCode ierr;
856 
857   PetscFunctionBegin;
858   ierr = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);CHKERRQ(ierr);
859   PetscFunctionReturn(0);
860 }
861 
862 PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C)
863 {
864   PetscErrorCode ierr;
865 
866   PetscFunctionBegin;
867   ierr = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);CHKERRQ(ierr);
868   PetscFunctionReturn(0);
869 }
870 
871 PetscErrorCode MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C)
872 {
873   PetscErrorCode ierr;
874 
875   PetscFunctionBegin;
876   ierr = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);CHKERRQ(ierr);
877   PetscFunctionReturn(0);
878 }
879 
880 PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C)
881 {
882   PetscErrorCode ierr;
883 
884   PetscFunctionBegin;
885   ierr = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);CHKERRQ(ierr);
886   PetscFunctionReturn(0);
887 }
888 
889 PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C)
890 {
891   PetscErrorCode ierr;
892 
893   PetscFunctionBegin;
894   ierr = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_TRANSPOSE,C);CHKERRQ(ierr);
895   PetscFunctionReturn(0);
896 }
897 
898 PetscErrorCode MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C)
899 {
900   PetscErrorCode ierr;
901 
902   PetscFunctionBegin;
903   ierr = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_TRANSPOSE,C);CHKERRQ(ierr);
904   PetscFunctionReturn(0);
905 }
906 
907 static PetscErrorCode MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL(Mat C)
908 {
909   PetscErrorCode ierr;
910   Mat_Product    *product = C->product;
911   Mat            A = product->A,B = product->B;
912 
913   PetscFunctionBegin;
914   ierr = MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(A,B,C);CHKERRQ(ierr);
915   PetscFunctionReturn(0);
916 }
917 
918 static PetscErrorCode MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL(Mat C)
919 {
920   PetscErrorCode ierr;
921   Mat_Product    *product = C->product;
922   Mat            A = product->A,B = product->B;
923   PetscReal      fill = product->fill;
924 
925   PetscFunctionBegin;
926   ierr = MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(A,B,fill,C);CHKERRQ(ierr);
927   C->ops->productnumeric = MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL;
928   PetscFunctionReturn(0);
929 }
930 
931 PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat A,Mat P,Mat C)
932 {
933   Mat                 Ct;
934   Vec                 zeros;
935   Mat_SeqAIJMKL       *a = (Mat_SeqAIJMKL*)A->spptr,*p = (Mat_SeqAIJMKL*)P->spptr,*c = (Mat_SeqAIJMKL*)C->spptr;
936   sparse_matrix_t     csrA, csrP, csrC;
937   PetscBool           set, flag;
938   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
939   struct matrix_descr descr_type_sym;
940   PetscObjectState    state;
941   PetscErrorCode      ierr;
942 
943   PetscFunctionBegin;
944   ierr = MatIsSymmetricKnown(A,&set,&flag);
945   if (!set || (set && !flag)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal() called on matrix A not marked as symmetric");
946 
947   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
948   if (!a->sparse_optimized || a->state != state) {
949     MatSeqAIJMKL_create_mkl_handle(A);
950   }
951   ierr = PetscObjectStateGet((PetscObject)P,&state);CHKERRQ(ierr);
952   if (!p->sparse_optimized || p->state != state) {
953     MatSeqAIJMKL_create_mkl_handle(P);
954   }
955   csrA = a->csrA;
956   csrP = p->csrA;
957   csrC = c->csrA;
958   descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
959   descr_type_sym.mode = SPARSE_FILL_MODE_UPPER;
960   descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
961 
962   /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
963   stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FINALIZE_MULT);
964   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to finalize mkl_sparse_sypr()");
965 
966   /* Update the PETSc AIJ representation for matrix C from contents of MKL handle.
967    * This is more complicated than it should be: it turns out that, though mkl_sparse_sypr() will accept a full AIJ/CSR matrix,
968    * the output matrix only contains the upper or lower triangle (we arbitrarily have chosen upper) of the symmetric matrix.
969    * We have to fill in the missing portion, which we currently do below by forming the tranpose and performing at MatAXPY
970    * operation. This may kill any performance benefit of using the optimized mkl_sparse_sypr() routine. Performance might
971    * improve if we come up with a more efficient way to do this, or we can convince the MKL team to provide an option to output
972    * the full matrix. */
973   ierr = MatSeqAIJMKL_update_from_mkl_handle(C);CHKERRQ(ierr);
974   ierr = MatTranspose(C,MAT_INITIAL_MATRIX,&Ct);CHKERRQ(ierr);
975   ierr = MatCreateVecs(C,&zeros,NULL);CHKERRQ(ierr);
976   ierr = VecSetFromOptions(zeros);CHKERRQ(ierr);
977   ierr = VecZeroEntries(zeros);CHKERRQ(ierr);
978   ierr = MatDiagonalSet(Ct,zeros,INSERT_VALUES);CHKERRQ(ierr);
979   ierr = MatAXPY(C,1.0,Ct,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
980   /* Note: The MatAXPY() call destroys the MatProduct, so we must recreate it. */
981   ierr = MatProductCreateWithMat(A,P,PETSC_NULL,C);CHKERRQ(ierr);
982   ierr = MatProductSetType(C,MATPRODUCT_PtAP);CHKERRQ(ierr);
983   ierr = MatSeqAIJMKL_create_mkl_handle(C);CHKERRQ(ierr);
984   ierr = VecDestroy(&zeros);CHKERRQ(ierr);
985   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
986   PetscFunctionReturn(0);
987 }
988 
989 PetscErrorCode MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat C)
990 {
991   Mat_Product         *product = C->product;
992   Mat                 A = product->A,P = product->B;
993   Mat_SeqAIJMKL       *a = (Mat_SeqAIJMKL*)A->spptr,*p = (Mat_SeqAIJMKL*)P->spptr;
994   sparse_matrix_t     csrA,csrP,csrC;
995   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
996   struct matrix_descr descr_type_sym;
997   PetscObjectState    state;
998   PetscErrorCode      ierr;
999 
1000   PetscFunctionBegin;
1001   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
1002   if (!a->sparse_optimized || a->state != state) {
1003     MatSeqAIJMKL_create_mkl_handle(A);
1004   }
1005   ierr = PetscObjectStateGet((PetscObject)P,&state);CHKERRQ(ierr);
1006   if (!p->sparse_optimized || p->state != state) {
1007     MatSeqAIJMKL_create_mkl_handle(P);
1008   }
1009   csrA = a->csrA;
1010   csrP = p->csrA;
1011   descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
1012   descr_type_sym.mode = SPARSE_FILL_MODE_UPPER;
1013   descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
1014 
1015   /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
1016   if (csrP && csrA) {
1017     stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FULL_MULT_NO_VAL);
1018     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to perform symbolic mkl_sparse_sypr()");
1019   } else {
1020     csrC = PETSC_NULL;
1021   }
1022 
1023   /* Update the I and J arrays of the PETSc AIJ representation for matrix C from contents of MKL handle.
1024    * Note that, because mkl_sparse_sypr() only computes one triangle of the symmetric matrix, this representation will only contain
1025    * the upper triangle of the symmetric matrix. We fix this in MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(). I believe that
1026    * leaving things in this incomplete state is OK because the numeric product should follow soon after, but am not certain if this
1027    * is guaranteed. */
1028   ierr = MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF,csrC,P->cmap->N,P->cmap->N,C);CHKERRQ(ierr);
1029 
1030   C->ops->productnumeric = MatProductNumeric_PtAP;
1031   PetscFunctionReturn(0);
1032 }
1033 
1034 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AB(Mat C)
1035 {
1036   PetscFunctionBegin;
1037   C->ops->productsymbolic = MatProductSymbolic_AB;
1038   C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL;
1039   PetscFunctionReturn(0);
1040 }
1041 
1042 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AtB(Mat C)
1043 {
1044   PetscFunctionBegin;
1045   C->ops->productsymbolic = MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL;
1046   PetscFunctionReturn(0);
1047 }
1048 
1049 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABt(Mat C)
1050 {
1051   PetscFunctionBegin;
1052   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ;
1053   C->ops->productsymbolic          = MatProductSymbolic_ABt;
1054   PetscFunctionReturn(0);
1055 }
1056 
1057 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_PtAP(Mat C)
1058 {
1059   PetscErrorCode ierr;
1060   Mat_Product    *product = C->product;
1061   Mat            A = product->A;
1062   PetscBool      set, flag;
1063 
1064   PetscFunctionBegin;
1065 #if defined(PETSC_USE_COMPLEX)
1066   /* By setting C->ops->productsymbolic to NULL, we ensure that MatProductSymbolic_Basic() will be used.
1067    * We do this in several other locations in this file. This works for the time being, but the _Basic()
1068    * routines are considered unsafe and may be removed from the MatProduct code in the future.
1069    * TODO: Add proper MATSEQAIJMKL implementations, instead of relying on the _Basic() routines. */
1070   C->ops->productsymbolic = NULL;
1071 #else
1072   /* AIJMKL only has an optimized routine for PtAP when A is symmetric and real. */
1073   ierr = MatIsSymmetricKnown(A,&set,&flag);CHKERRQ(ierr);
1074   if (set && flag) {
1075     C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal;
1076     PetscFunctionReturn(0);
1077   } else {
1078     C->ops->productsymbolic = NULL; /* MatProductSymbolic_Basic() will be used. */
1079   }
1080   /* Note that we don't set C->ops->productnumeric here, as this must happen in MatProductSymbolic_PtAP_XXX(),
1081    * depending on whether the algorithm for the general case vs. the real symmetric one is used. */
1082 #endif
1083   PetscFunctionReturn(0);
1084 }
1085 
1086 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_RARt(Mat C)
1087 {
1088   PetscFunctionBegin;
1089   C->ops->productsymbolic = NULL; /* MatProductSymbolic_Basic() will be used. */
1090   PetscFunctionReturn(0);
1091 }
1092 
1093 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABC(Mat C)
1094 {
1095   PetscFunctionBegin;
1096   C->ops->productsymbolic = NULL; /* MatProductSymbolic_Basic() will be used. */
1097   PetscFunctionReturn(0);
1098 }
1099 
1100 PetscErrorCode MatProductSetFromOptions_SeqAIJMKL(Mat C)
1101 {
1102   PetscErrorCode ierr;
1103   Mat_Product    *product = C->product;
1104 
1105   PetscFunctionBegin;
1106   switch (product->type) {
1107   case MATPRODUCT_AB:
1108     ierr = MatProductSetFromOptions_SeqAIJMKL_AB(C);CHKERRQ(ierr);
1109     break;
1110   case MATPRODUCT_AtB:
1111     ierr = MatProductSetFromOptions_SeqAIJMKL_AtB(C);CHKERRQ(ierr);
1112     break;
1113   case MATPRODUCT_ABt:
1114     ierr = MatProductSetFromOptions_SeqAIJMKL_ABt(C);CHKERRQ(ierr);
1115     break;
1116   case MATPRODUCT_PtAP:
1117     ierr = MatProductSetFromOptions_SeqAIJMKL_PtAP(C);CHKERRQ(ierr);
1118     break;
1119   case MATPRODUCT_RARt:
1120     ierr = MatProductSetFromOptions_SeqAIJMKL_RARt(C);CHKERRQ(ierr);
1121     break;
1122   case MATPRODUCT_ABC:
1123     ierr = MatProductSetFromOptions_SeqAIJMKL_ABC(C);CHKERRQ(ierr);
1124     break;
1125   default:
1126     break;
1127   }
1128   PetscFunctionReturn(0);
1129 }
1130 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
1131 /* ------------------------ End MatProduct code ------------------------ */
1132 
1133 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a
1134  * SeqAIJMKL matrix.  This routine is called by the MatCreate_SeqAIJMKL()
1135  * routine, but can also be used to convert an assembled SeqAIJ matrix
1136  * into a SeqAIJMKL one. */
1137 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
1138 {
1139   PetscErrorCode ierr;
1140   Mat            B = *newmat;
1141   Mat_SeqAIJMKL  *aijmkl;
1142   PetscBool      set;
1143   PetscBool      sametype;
1144 
1145   PetscFunctionBegin;
1146   if (reuse == MAT_INITIAL_MATRIX) {
1147     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
1148   }
1149 
1150   ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr);
1151   if (sametype) PetscFunctionReturn(0);
1152 
1153   ierr     = PetscNewLog(B,&aijmkl);CHKERRQ(ierr);
1154   B->spptr = (void*) aijmkl;
1155 
1156   /* Set function pointers for methods that we inherit from AIJ but override.
1157    * We also parse some command line options below, since those determine some of the methods we point to. */
1158   B->ops->duplicate        = MatDuplicate_SeqAIJMKL;
1159   B->ops->assemblyend      = MatAssemblyEnd_SeqAIJMKL;
1160   B->ops->destroy          = MatDestroy_SeqAIJMKL;
1161 
1162   aijmkl->sparse_optimized = PETSC_FALSE;
1163 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1164   aijmkl->no_SpMV2 = PETSC_FALSE;  /* Default to using the SpMV2 routines if our MKL supports them. */
1165 #else
1166   aijmkl->no_SpMV2 = PETSC_TRUE;
1167 #endif
1168   aijmkl->eager_inspection = PETSC_FALSE;
1169 
1170   /* Parse command line options. */
1171   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");CHKERRQ(ierr);
1172   ierr = PetscOptionsBool("-mat_aijmkl_no_spmv2","Disable use of inspector-executor (SpMV 2) routines","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);CHKERRQ(ierr);
1173   ierr = PetscOptionsBool("-mat_aijmkl_eager_inspection","Run inspection at matrix assembly time, instead of waiting until needed by an operation","None",(PetscBool)aijmkl->eager_inspection,(PetscBool*)&aijmkl->eager_inspection,&set);CHKERRQ(ierr);
1174   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1175 #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1176   if(!aijmkl->no_SpMV2) {
1177     ierr = PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize();  defaulting to non-SpMV2 routines.\n");
1178     aijmkl->no_SpMV2 = PETSC_TRUE;
1179   }
1180 #endif
1181 
1182 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1183   B->ops->mult                    = MatMult_SeqAIJMKL_SpMV2;
1184   B->ops->multtranspose           = MatMultTranspose_SeqAIJMKL_SpMV2;
1185   B->ops->multadd                 = MatMultAdd_SeqAIJMKL_SpMV2;
1186   B->ops->multtransposeadd        = MatMultTransposeAdd_SeqAIJMKL_SpMV2;
1187 # if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
1188   B->ops->productsetfromoptions   = MatProductSetFromOptions_SeqAIJMKL;
1189   B->ops->matmultsymbolic         = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL;
1190   B->ops->matmultnumeric          = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL;
1191   B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL;
1192   B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL;
1193 #   if !defined(PETSC_USE_COMPLEX)
1194   B->ops->ptapnumeric             = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal;
1195 #   else
1196   B->ops->ptapnumeric             = NULL;
1197 #   endif
1198 # endif
1199 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
1200 
1201 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
1202   /* In MKL version 18, update 2, the old sparse BLAS interfaces were marked as deprecated. If "no_SpMV2" has been specified by the
1203    * user and the old SpBLAS interfaces are deprecated in our MKL version, we use the new _SpMV2 routines (set above), but do not
1204    * call mkl_sparse_optimize(), which results in the old numerical kernels (without the inspector-executor model) being used. For
1205    * versions in which the older interface has not been deprecated, we use the old interface. */
1206   if (aijmkl->no_SpMV2) {
1207     B->ops->mult             = MatMult_SeqAIJMKL;
1208     B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL;
1209     B->ops->multadd          = MatMultAdd_SeqAIJMKL;
1210     B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL;
1211   }
1212 #endif
1213 
1214   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr);
1215 
1216   if(!aijmkl->no_SpMV2) {
1217 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1218 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
1219     ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_seqaijmkl_seqaijmkl_C",MatProductSetFromOptions_SeqAIJMKL);CHKERRQ(ierr);
1220 #endif
1221 #endif
1222   }
1223 
1224   ierr    = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr);
1225   *newmat = B;
1226   PetscFunctionReturn(0);
1227 }
1228 
1229 /*@C
1230    MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL.
1231    This type inherits from AIJ and is largely identical, but uses sparse BLAS
1232    routines from Intel MKL whenever possible.
1233    If the installed version of MKL supports the "SpMV2" sparse
1234    inspector-executor routines, then those are used by default.
1235    MatMult, MatMultAdd, MatMultTranspose, MatMultTransposeAdd, MatMatMult, MatTransposeMatMult, and MatPtAP (for
1236    symmetric A) operations are currently supported.
1237    Note that MKL version 18, update 2 or later is required for MatPtAP/MatPtAPNumeric and MatMatMultNumeric.
1238 
1239    Collective
1240 
1241    Input Parameters:
1242 +  comm - MPI communicator, set to PETSC_COMM_SELF
1243 .  m - number of rows
1244 .  n - number of columns
1245 .  nz - number of nonzeros per row (same for all rows)
1246 -  nnz - array containing the number of nonzeros in the various rows
1247          (possibly different for each row) or NULL
1248 
1249    Output Parameter:
1250 .  A - the matrix
1251 
1252    Options Database Keys:
1253 +  -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines
1254 -  -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, performing this step the first time the matrix is applied
1255 
1256    Notes:
1257    If nnz is given then nz is ignored
1258 
1259    Level: intermediate
1260 
1261 .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues()
1262 @*/
1263 PetscErrorCode  MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
1264 {
1265   PetscErrorCode ierr;
1266 
1267   PetscFunctionBegin;
1268   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1269   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
1270   ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr);
1271   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr);
1272   PetscFunctionReturn(0);
1273 }
1274 
1275 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A)
1276 {
1277   PetscErrorCode ierr;
1278 
1279   PetscFunctionBegin;
1280   ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
1281   ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
1282   PetscFunctionReturn(0);
1283 }
1284