xref: /petsc/src/mat/impls/aij/seq/aijmkl/aijmkl.c (revision bcff154bd1ca9f71844efecffaaa876ba0de4061)
1 
2 /*
3   Defines basic operations for the MATSEQAIJMKL matrix class.
4   This class is derived from the MATSEQAIJ class and retains the
5   compressed row storage (aka Yale sparse matrix format) but uses
6   sparse BLAS operations from the Intel Math Kernel Library (MKL)
7   wherever possible.
8 */
9 
10 #include <../src/mat/impls/aij/seq/aij.h>
11 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h>
12 #include <mkl_spblas.h>
13 
14 typedef struct {
15   PetscBool           no_SpMV2;  /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */
16   PetscBool           eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */
17   PetscBool           sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
18   PetscObjectState    state;
19 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
20   sparse_matrix_t     csrA; /* "Handle" used by SpMV2 inspector-executor routines. */
21   struct matrix_descr descr;
22 #endif
23 } Mat_SeqAIJMKL;
24 
25 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType);
26 
27 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat)
28 {
29   /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */
30   /* so we will ignore 'MatType type'. */
31   PetscErrorCode ierr;
32   Mat            B       = *newmat;
33 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
34   Mat_SeqAIJMKL  *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
35 #endif
36 
37   PetscFunctionBegin;
38   if (reuse == MAT_INITIAL_MATRIX) {
39     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
40   }
41 
42   /* Reset the original function pointers. */
43   B->ops->duplicate               = MatDuplicate_SeqAIJ;
44   B->ops->assemblyend             = MatAssemblyEnd_SeqAIJ;
45   B->ops->destroy                 = MatDestroy_SeqAIJ;
46   B->ops->mult                    = MatMult_SeqAIJ;
47   B->ops->multtranspose           = MatMultTranspose_SeqAIJ;
48   B->ops->multadd                 = MatMultAdd_SeqAIJ;
49   B->ops->multtransposeadd        = MatMultTransposeAdd_SeqAIJ;
50   B->ops->productsetfromoptions   = MatProductSetFromOptions_SeqAIJ;
51   B->ops->matmultsymbolic         = MatMatMultSymbolic_SeqAIJ_SeqAIJ;
52   B->ops->matmultnumeric          = MatMatMultNumeric_SeqAIJ_SeqAIJ;
53   B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ;
54   B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ;
55   B->ops->ptapnumeric             = MatPtAPNumeric_SeqAIJ_SeqAIJ;
56 
57   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);CHKERRQ(ierr);
58 
59 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
60   if (!aijmkl->no_SpMV2) {
61 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
62     ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr);
63 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
64   }
65 
66   /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this
67    * simply involves destroying the MKL sparse matrix handle and then freeing
68    * the spptr pointer. */
69   if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr;
70 
71   if (aijmkl->sparse_optimized) {
72     sparse_status_t stat;
73     stat = mkl_sparse_destroy(aijmkl->csrA);
74     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set hints/complete mkl_sparse_optimize()");
75   }
76 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
77   ierr = PetscFree(B->spptr);CHKERRQ(ierr);
78 
79   /* Change the type of B to MATSEQAIJ. */
80   ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr);
81 
82   *newmat = B;
83   PetscFunctionReturn(0);
84 }
85 
86 PetscErrorCode MatDestroy_SeqAIJMKL(Mat A)
87 {
88   PetscErrorCode ierr;
89   Mat_SeqAIJMKL  *aijmkl = (Mat_SeqAIJMKL*) A->spptr;
90 
91   PetscFunctionBegin;
92 
93   /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an spptr pointer. */
94   if (aijmkl) {
95     /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */
96 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
97     if (aijmkl->sparse_optimized) {
98       sparse_status_t stat = SPARSE_STATUS_SUCCESS;
99       stat = mkl_sparse_destroy(aijmkl->csrA);
100       if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_destroy()");
101     }
102 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
103     ierr = PetscFree(A->spptr);CHKERRQ(ierr);
104   }
105 
106   /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
107    * to destroy everything that remains. */
108   ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr);
109   /* Note that I don't call MatSetType().  I believe this is because that
110    * is only to be called when *building* a matrix.  I could be wrong, but
111    * that is how things work for the SuperLU matrix class. */
112   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
113   PetscFunctionReturn(0);
114 }
115 
116 /* MatSeqAIJMKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it,
117  * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize().
118  * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix
119  * handle, creates a new one, and then calls mkl_sparse_optimize().
120  * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been
121  * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of
122  * an unoptimized matrix handle here. */
123 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A)
124 {
125 #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
126   /* If the MKL library does not have mkl_sparse_optimize(), then this routine
127    * does nothing. We make it callable anyway in this case because it cuts
128    * down on littering the code with #ifdefs. */
129   PetscFunctionBegin;
130   PetscFunctionReturn(0);
131 #else
132   Mat_SeqAIJ       *a = (Mat_SeqAIJ*)A->data;
133   Mat_SeqAIJMKL    *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
134   PetscInt         m,n;
135   MatScalar        *aa;
136   PetscInt         *aj,*ai;
137   sparse_status_t  stat;
138   PetscErrorCode   ierr;
139 
140   PetscFunctionBegin;
141 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
142   /* For MKL versions that still support the old, non-inspector-executor interfaces versions, we simply exit here if the no_SpMV2
143    * option has been specified. For versions that have deprecated the old interfaces (version 18, update 2 and later), we must
144    * use the new inspector-executor interfaces, but we can still use the old, non-inspector-executor code by not calling
145    * mkl_sparse_optimize() later. */
146   if (aijmkl->no_SpMV2) PetscFunctionReturn(0);
147 #endif
148 
149   if (aijmkl->sparse_optimized) {
150     /* Matrix has been previously assembled and optimized. Must destroy old
151      * matrix handle before running the optimization step again. */
152     stat = mkl_sparse_destroy(aijmkl->csrA);
153     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_destroy()");
154   }
155   aijmkl->sparse_optimized = PETSC_FALSE;
156 
157   /* Now perform the SpMV2 setup and matrix optimization. */
158   aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
159   aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
160   aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
161   m = A->rmap->n;
162   n = A->cmap->n;
163   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
164   aa   = a->a;  /* Nonzero elements stored row-by-row. */
165   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
166   if (a->nz && aa && !A->structure_only) {
167     /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
168      * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
169     stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa);
170     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle, mkl_sparse_x_create_csr()");
171     stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
172     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_set_mv_hint()");
173     stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
174     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_set_memory_hint()");
175     if (!aijmkl->no_SpMV2) {
176       stat = mkl_sparse_optimize(aijmkl->csrA);
177       if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_optimize()");
178     }
179     aijmkl->sparse_optimized = PETSC_TRUE;
180     ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr);
181   } else {
182     aijmkl->csrA = PETSC_NULL;
183   }
184 
185   PetscFunctionReturn(0);
186 #endif
187 }
188 
189 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
190 /* Take an already created but empty matrix and set up the nonzero structure from an MKL sparse matrix handle. */
191 static PetscErrorCode MatSeqAIJMKL_setup_structure_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,PetscInt nrows,PetscInt ncols,Mat A)
192 {
193   PetscErrorCode      ierr;
194   sparse_status_t     stat;
195   sparse_index_base_t indexing;
196   PetscInt            m,n;
197   PetscInt            *aj,*ai,*dummy;
198   MatScalar           *aa;
199   Mat_SeqAIJMKL       *aijmkl;
200 
201   if (csrA) {
202     /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
203     stat = mkl_sparse_x_export_csr(csrA,&indexing,&m,&n,&ai,&dummy,&aj,&aa);
204     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");
205     if ((m != nrows) || (n != ncols)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Number of rows/columns does not match those from mkl_sparse_x_export_csr()");
206   } else {
207     aj = ai = PETSC_NULL;
208     aa = PETSC_NULL;
209   }
210 
211   ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
212   ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,nrows,ncols);CHKERRQ(ierr);
213   /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported
214    * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and
215    * they will be destroyed when the MKL handle is destroyed.
216    * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */
217   if (csrA) {
218     ierr = MatSeqAIJSetPreallocationCSR(A,ai,aj,NULL);CHKERRQ(ierr);
219   } else {
220     /* Since MatSeqAIJSetPreallocationCSR does initial set up and assembly begin/end, we must do that ourselves here. */
221     ierr = MatSetUp(A);CHKERRQ(ierr);
222     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
223     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
224   }
225 
226   /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle.
227    * Now turn it into a MATSEQAIJMKL. */
228   ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
229 
230   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
231   aijmkl->csrA = csrA;
232 
233   /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but
234    * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one,
235    * and just need to be able to run the MKL optimization step. */
236   aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
237   aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
238   aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
239   if (csrA) {
240     stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
241     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_set_mv_hint()");
242     stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
243     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_set_memory_hint()");
244   }
245   ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr);
246   PetscFunctionReturn(0);
247 }
248 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
249 
250 /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle.
251  * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in
252  * MatMatMultNumeric(). */
253 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
254 static PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A)
255 {
256   PetscInt            i;
257   PetscInt            nrows,ncols;
258   PetscInt            nz;
259   PetscInt            *ai,*aj,*dummy;
260   PetscScalar         *aa;
261   PetscErrorCode      ierr;
262   Mat_SeqAIJMKL       *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
263   sparse_status_t     stat;
264   sparse_index_base_t indexing;
265 
266   /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */
267   if (!aijmkl->csrA) PetscFunctionReturn(0);
268 
269   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
270   stat = mkl_sparse_x_export_csr(aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);
271   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");
272 
273   /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc
274    * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */
275   for (i=0; i<nrows; i++) {
276     nz = ai[i+1] - ai[i];
277     ierr = MatSetValues_SeqAIJ(A, 1, &i, nz, aj+ai[i], aa+ai[i], INSERT_VALUES);CHKERRQ(ierr);
278   }
279 
280   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
281   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
282 
283   ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr);
284   /* At this point our matrix has a valid MKL handle, the contents of which match the PETSc AIJ representation.
285    * The MKL handle has *not* had mkl_sparse_optimize() called on it, though -- the MKL developers have confirmed
286    * that the matrix inspection/optimization step is not performed when matrix-matrix multiplication is finalized. */
287   aijmkl->sparse_optimized = PETSC_FALSE;
288   PetscFunctionReturn(0);
289 }
290 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
291 
292 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
293 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_view_mkl_handle(Mat A,PetscViewer viewer)
294 {
295   PetscInt            i,j,k;
296   PetscInt            nrows,ncols;
297   PetscInt            nz;
298   PetscInt            *ai,*aj,*dummy;
299   PetscScalar         *aa;
300   PetscErrorCode      ierr;
301   Mat_SeqAIJMKL       *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
302   sparse_status_t     stat;
303   sparse_index_base_t indexing;
304 
305   ierr = PetscViewerASCIIPrintf(viewer,"Contents of MKL sparse matrix handle for MATSEQAIJMKL object:\n");CHKERRQ(ierr);
306 
307   /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */
308   if (!aijmkl->csrA) {
309     ierr = PetscViewerASCIIPrintf(viewer,"MKL matrix handle is NULL\n");CHKERRQ(ierr);
310     PetscFunctionReturn(0);
311   }
312 
313   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
314   stat = mkl_sparse_x_export_csr(aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);
315   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");
316 
317   k = 0;
318   for (i=0; i<nrows; i++) {
319     ierr = PetscViewerASCIIPrintf(viewer,"row %D: ",i);CHKERRQ(ierr);
320     nz = ai[i+1] - ai[i];
321     for (j=0; j<nz; j++) {
322       if (aa) {
323         ierr = PetscViewerASCIIPrintf(viewer,"(%D, %g)  ",aj[k],PetscRealPart(aa[k]));CHKERRQ(ierr);
324       } else {
325         ierr = PetscViewerASCIIPrintf(viewer,"(%D, NULL)",aj[k]);CHKERRQ(ierr);
326       }
327       k++;
328     }
329     ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
330   }
331   PetscFunctionReturn(0);
332 }
333 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
334 
335 PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
336 {
337   PetscErrorCode ierr;
338   Mat_SeqAIJMKL  *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
339   Mat_SeqAIJMKL  *aijmkl_dest;
340 
341   PetscFunctionBegin;
342   ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr);
343   aijmkl_dest = (Mat_SeqAIJMKL*)(*M)->spptr;
344   ierr = PetscArraycpy(aijmkl_dest,aijmkl,1);CHKERRQ(ierr);
345   aijmkl_dest->sparse_optimized = PETSC_FALSE;
346   if (aijmkl->eager_inspection) {
347     ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
348   }
349   PetscFunctionReturn(0);
350 }
351 
352 PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode)
353 {
354   PetscErrorCode  ierr;
355   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
356   Mat_SeqAIJMKL   *aijmkl;
357 
358   PetscFunctionBegin;
359   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
360 
361   /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some
362    * extra information and some different methods, call the AssemblyEnd
363    * routine for a MATSEQAIJ.
364    * I'm not sure if this is the best way to do this, but it avoids
365    * a lot of code duplication. */
366   a->inode.use = PETSC_FALSE;  /* Must disable: otherwise the MKL routines won't get used. */
367   ierr = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr);
368 
369   /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks).
370    * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */
371   aijmkl = (Mat_SeqAIJMKL*)A->spptr;
372   if (aijmkl->eager_inspection) {
373     ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
374   }
375 
376   PetscFunctionReturn(0);
377 }
378 
379 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
380 PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy)
381 {
382   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
383   const PetscScalar *x;
384   PetscScalar       *y;
385   const MatScalar   *aa;
386   PetscErrorCode    ierr;
387   PetscInt          m = A->rmap->n;
388   PetscInt          n = A->cmap->n;
389   PetscScalar       alpha = 1.0;
390   PetscScalar       beta = 0.0;
391   const PetscInt    *aj,*ai;
392   char              matdescra[6];
393 
394 
395   /* Variables not in MatMult_SeqAIJ. */
396   char transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
397 
398   PetscFunctionBegin;
399   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
400   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
401   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
402   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
403   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
404   aa   = a->a;  /* Nonzero elements stored row-by-row. */
405   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
406 
407   /* Call MKL sparse BLAS routine to do the MatMult. */
408   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
409 
410   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
411   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
412   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
413   PetscFunctionReturn(0);
414 }
415 #endif
416 
417 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
418 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
419 {
420   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
421   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
422   const PetscScalar *x;
423   PetscScalar       *y;
424   PetscErrorCode    ierr;
425   sparse_status_t   stat = SPARSE_STATUS_SUCCESS;
426   PetscObjectState  state;
427 
428   PetscFunctionBegin;
429 
430   /* If there are no nonzero entries, zero yy and return immediately. */
431   if (!a->nz) {
432     PetscInt i;
433     PetscInt m=A->rmap->n;
434     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
435     for (i=0; i<m; i++) {
436       y[i] = 0.0;
437     }
438     ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
439     PetscFunctionReturn(0);
440   }
441 
442   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
443   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
444 
445   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
446    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
447    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
448   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
449   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
450     MatSeqAIJMKL_create_mkl_handle(A);
451   }
452 
453   /* Call MKL SpMV2 executor routine to do the MatMult. */
454   stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
455   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()");
456 
457   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
458   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
459   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
460   PetscFunctionReturn(0);
461 }
462 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
463 
464 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
465 PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy)
466 {
467   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
468   const PetscScalar *x;
469   PetscScalar       *y;
470   const MatScalar   *aa;
471   PetscErrorCode    ierr;
472   PetscInt          m = A->rmap->n;
473   PetscInt          n = A->cmap->n;
474   PetscScalar       alpha = 1.0;
475   PetscScalar       beta = 0.0;
476   const PetscInt    *aj,*ai;
477   char              matdescra[6];
478 
479   /* Variables not in MatMultTranspose_SeqAIJ. */
480   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
481 
482   PetscFunctionBegin;
483   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
484   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
485   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
486   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
487   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
488   aa   = a->a;  /* Nonzero elements stored row-by-row. */
489   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
490 
491   /* Call MKL sparse BLAS routine to do the MatMult. */
492   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
493 
494   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
495   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
496   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
497   PetscFunctionReturn(0);
498 }
499 #endif
500 
501 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
502 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
503 {
504   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
505   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
506   const PetscScalar *x;
507   PetscScalar       *y;
508   PetscErrorCode    ierr;
509   sparse_status_t   stat;
510   PetscObjectState  state;
511 
512   PetscFunctionBegin;
513 
514   /* If there are no nonzero entries, zero yy and return immediately. */
515   if (!a->nz) {
516     PetscInt i;
517     PetscInt n=A->cmap->n;
518     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
519     for (i=0; i<n; i++) {
520       y[i] = 0.0;
521     }
522     ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
523     PetscFunctionReturn(0);
524   }
525 
526   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
527   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
528 
529   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
530    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
531    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
532   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
533   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
534     MatSeqAIJMKL_create_mkl_handle(A);
535   }
536 
537   /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
538   stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
539   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()");
540 
541   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
542   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
543   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
544   PetscFunctionReturn(0);
545 }
546 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
547 
548 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
549 PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
550 {
551   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
552   const PetscScalar *x;
553   PetscScalar       *y,*z;
554   const MatScalar   *aa;
555   PetscErrorCode    ierr;
556   PetscInt          m = A->rmap->n;
557   PetscInt          n = A->cmap->n;
558   const PetscInt    *aj,*ai;
559   PetscInt          i;
560 
561   /* Variables not in MatMultAdd_SeqAIJ. */
562   char              transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
563   PetscScalar       alpha = 1.0;
564   PetscScalar       beta;
565   char              matdescra[6];
566 
567   PetscFunctionBegin;
568   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
569   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
570 
571   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
572   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
573   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
574   aa   = a->a;  /* Nonzero elements stored row-by-row. */
575   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
576 
577   /* Call MKL sparse BLAS routine to do the MatMult. */
578   if (zz == yy) {
579     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
580     beta = 1.0;
581     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
582   } else {
583     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
584      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
585     beta = 0.0;
586     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
587     for (i=0; i<m; i++) {
588       z[i] += y[i];
589     }
590   }
591 
592   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
593   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
594   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
595   PetscFunctionReturn(0);
596 }
597 #endif
598 
599 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
600 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
601 {
602   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
603   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
604   const PetscScalar *x;
605   PetscScalar       *y,*z;
606   PetscErrorCode    ierr;
607   PetscInt          m = A->rmap->n;
608   PetscInt          i;
609 
610   /* Variables not in MatMultAdd_SeqAIJ. */
611   sparse_status_t   stat = SPARSE_STATUS_SUCCESS;
612   PetscObjectState  state;
613 
614   PetscFunctionBegin;
615 
616   /* If there are no nonzero entries, set zz = yy and return immediately. */
617   if (!a->nz) {
618     PetscInt i;
619     ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
620     for (i=0; i<m; i++) {
621       z[i] = y[i];
622     }
623     ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
624     PetscFunctionReturn(0);
625   }
626 
627   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
628   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
629 
630   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
631    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
632    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
633   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
634   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
635     MatSeqAIJMKL_create_mkl_handle(A);
636   }
637 
638   /* Call MKL sparse BLAS routine to do the MatMult. */
639   if (zz == yy) {
640     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
641      * with alpha and beta both set to 1.0. */
642     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
643     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()");
644   } else {
645     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
646      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
647     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
648     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()");
649     for (i=0; i<m; i++) {
650       z[i] += y[i];
651     }
652   }
653 
654   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
655   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
656   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
657   PetscFunctionReturn(0);
658 }
659 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
660 
661 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
662 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
663 {
664   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
665   const PetscScalar *x;
666   PetscScalar       *y,*z;
667   const MatScalar   *aa;
668   PetscErrorCode    ierr;
669   PetscInt          m = A->rmap->n;
670   PetscInt          n = A->cmap->n;
671   const PetscInt    *aj,*ai;
672   PetscInt          i;
673 
674   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
675   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
676   PetscScalar       alpha = 1.0;
677   PetscScalar       beta;
678   char              matdescra[6];
679 
680   PetscFunctionBegin;
681   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
682   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
683 
684   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
685   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
686   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
687   aa   = a->a;  /* Nonzero elements stored row-by-row. */
688   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
689 
690   /* Call MKL sparse BLAS routine to do the MatMult. */
691   if (zz == yy) {
692     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
693     beta = 1.0;
694     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
695   } else {
696     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
697      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
698     beta = 0.0;
699     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
700     for (i=0; i<n; i++) {
701       z[i] += y[i];
702     }
703   }
704 
705   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
706   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
707   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
708   PetscFunctionReturn(0);
709 }
710 #endif
711 
712 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
713 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
714 {
715   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
716   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
717   const PetscScalar *x;
718   PetscScalar       *y,*z;
719   PetscErrorCode    ierr;
720   PetscInt          n = A->cmap->n;
721   PetscInt          i;
722   PetscObjectState  state;
723 
724   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
725   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
726 
727   PetscFunctionBegin;
728 
729   /* If there are no nonzero entries, set zz = yy and return immediately. */
730   if (!a->nz) {
731     PetscInt i;
732     ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
733     for (i=0; i<n; i++) {
734       z[i] = y[i];
735     }
736     ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
737     PetscFunctionReturn(0);
738   }
739 
740   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
741   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
742 
743   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
744    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
745    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
746   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
747   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
748     MatSeqAIJMKL_create_mkl_handle(A);
749   }
750 
751   /* Call MKL sparse BLAS routine to do the MatMult. */
752   if (zz == yy) {
753     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
754      * with alpha and beta both set to 1.0. */
755     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
756     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()");
757   } else {
758     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
759      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
760     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
761     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()");
762     for (i=0; i<n; i++) {
763       z[i] += y[i];
764     }
765   }
766 
767   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
768   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
769   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
770   PetscFunctionReturn(0);
771 }
772 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
773 
774 /* -------------------------- MatProduct code -------------------------- */
775 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
776 static PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(Mat A,const sparse_operation_t transA,Mat B,const sparse_operation_t transB,Mat C)
777 {
778   Mat_SeqAIJMKL       *a = (Mat_SeqAIJMKL*)A->spptr,*b = (Mat_SeqAIJMKL*)B->spptr;
779   sparse_matrix_t     csrA,csrB,csrC;
780   PetscInt            nrows,ncols;
781   PetscErrorCode      ierr;
782   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
783   struct matrix_descr descr_type_gen;
784   PetscObjectState    state;
785 
786   PetscFunctionBegin;
787   /* Determine the number of rows and columns that the result matrix C will have. We have to do this ourselves because MKL does
788    * not handle sparse matrices with zero rows or columns. */
789   if (transA == SPARSE_OPERATION_NON_TRANSPOSE) nrows = A->rmap->N;
790   else nrows = A->cmap->N;
791   if (transB == SPARSE_OPERATION_NON_TRANSPOSE) ncols = B->cmap->N;
792   else ncols = B->rmap->N;
793 
794   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
795   if (!a->sparse_optimized || a->state != state) {
796     MatSeqAIJMKL_create_mkl_handle(A);
797   }
798   ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr);
799   if (!b->sparse_optimized || b->state != state) {
800     MatSeqAIJMKL_create_mkl_handle(B);
801   }
802   csrA = a->csrA;
803   csrB = b->csrA;
804   descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
805 
806   if (csrA && csrB) {
807     stat = mkl_sparse_sp2m(transA,descr_type_gen,csrA,transB,descr_type_gen,csrB,SPARSE_STAGE_FULL_MULT_NO_VAL,&csrC);
808     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete symbolic stage of sparse matrix-matrix multiply in mkl_sparse_sp2m()");
809   } else {
810     csrC = PETSC_NULL;
811   }
812 
813   ierr = MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF,csrC,nrows,ncols,C);CHKERRQ(ierr);
814 
815   PetscFunctionReturn(0);
816 }
817 
818 PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(Mat A,const sparse_operation_t transA,Mat B,const sparse_operation_t transB,Mat C)
819 {
820   Mat_SeqAIJMKL       *a = (Mat_SeqAIJMKL*)A->spptr,*b = (Mat_SeqAIJMKL*)B->spptr,*c = (Mat_SeqAIJMKL*)C->spptr;
821   sparse_matrix_t     csrA, csrB, csrC;
822   PetscErrorCode      ierr;
823   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
824   struct matrix_descr descr_type_gen;
825   PetscObjectState    state;
826 
827   PetscFunctionBegin;
828   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
829   if (!a->sparse_optimized || a->state != state) {
830     MatSeqAIJMKL_create_mkl_handle(A);
831   }
832   ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr);
833   if (!b->sparse_optimized || b->state != state) {
834     MatSeqAIJMKL_create_mkl_handle(B);
835   }
836   csrA = a->csrA;
837   csrB = b->csrA;
838   csrC = c->csrA;
839   descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
840 
841   if (csrA && csrB) {
842     stat = mkl_sparse_sp2m(transA,descr_type_gen,csrA,transB,descr_type_gen,csrB,SPARSE_STAGE_FINALIZE_MULT,&csrC);
843     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete numerical stage of sparse matrix-matrix multiply in mkl_sparse_sp2m()");
844   } else {
845     csrC = PETSC_NULL;
846   }
847 
848   /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */
849   ierr = MatSeqAIJMKL_update_from_mkl_handle(C);CHKERRQ(ierr);
850 
851   PetscFunctionReturn(0);
852 }
853 
854 PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C)
855 {
856   PetscErrorCode ierr;
857 
858   PetscFunctionBegin;
859   ierr = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);CHKERRQ(ierr);
860   PetscFunctionReturn(0);
861 }
862 
863 PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C)
864 {
865   PetscErrorCode ierr;
866 
867   PetscFunctionBegin;
868   ierr = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);CHKERRQ(ierr);
869   PetscFunctionReturn(0);
870 }
871 
872 PetscErrorCode MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C)
873 {
874   PetscErrorCode ierr;
875 
876   PetscFunctionBegin;
877   ierr = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);CHKERRQ(ierr);
878   PetscFunctionReturn(0);
879 }
880 
881 PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C)
882 {
883   PetscErrorCode ierr;
884 
885   PetscFunctionBegin;
886   ierr = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);CHKERRQ(ierr);
887   PetscFunctionReturn(0);
888 }
889 
890 PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C)
891 {
892   PetscErrorCode ierr;
893 
894   PetscFunctionBegin;
895   ierr = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_TRANSPOSE,C);CHKERRQ(ierr);
896   PetscFunctionReturn(0);
897 }
898 
899 PetscErrorCode MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C)
900 {
901   PetscErrorCode ierr;
902 
903   PetscFunctionBegin;
904   ierr = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_TRANSPOSE,C);CHKERRQ(ierr);
905   PetscFunctionReturn(0);
906 }
907 
908 static PetscErrorCode MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL(Mat C)
909 {
910   PetscErrorCode ierr;
911   Mat_Product    *product = C->product;
912   Mat            A = product->A,B = product->B;
913 
914   PetscFunctionBegin;
915   ierr = MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(A,B,C);CHKERRQ(ierr);
916   PetscFunctionReturn(0);
917 }
918 
919 static PetscErrorCode MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL(Mat C)
920 {
921   PetscErrorCode ierr;
922   Mat_Product    *product = C->product;
923   Mat            A = product->A,B = product->B;
924   PetscReal      fill = product->fill;
925 
926   PetscFunctionBegin;
927   ierr = MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(A,B,fill,C);CHKERRQ(ierr);
928   C->ops->productnumeric = MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL;
929   PetscFunctionReturn(0);
930 }
931 
932 PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat A,Mat P,Mat C)
933 {
934   Mat                 Ct;
935   Vec                 zeros;
936   Mat_SeqAIJMKL       *a = (Mat_SeqAIJMKL*)A->spptr,*p = (Mat_SeqAIJMKL*)P->spptr,*c = (Mat_SeqAIJMKL*)C->spptr;
937   sparse_matrix_t     csrA, csrP, csrC;
938   PetscBool           set, flag;
939   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
940   struct matrix_descr descr_type_sym;
941   PetscObjectState    state;
942   PetscErrorCode      ierr;
943 
944   PetscFunctionBegin;
945   ierr = MatIsSymmetricKnown(A,&set,&flag);
946   if (!set || (set && !flag)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal() called on matrix A not marked as symmetric");
947 
948   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
949   if (!a->sparse_optimized || a->state != state) {
950     MatSeqAIJMKL_create_mkl_handle(A);
951   }
952   ierr = PetscObjectStateGet((PetscObject)P,&state);CHKERRQ(ierr);
953   if (!p->sparse_optimized || p->state != state) {
954     MatSeqAIJMKL_create_mkl_handle(P);
955   }
956   csrA = a->csrA;
957   csrP = p->csrA;
958   csrC = c->csrA;
959   descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
960   descr_type_sym.mode = SPARSE_FILL_MODE_UPPER;
961   descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
962 
963   /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
964   stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FINALIZE_MULT);
965   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to finalize mkl_sparse_sypr()");
966 
967   /* Update the PETSc AIJ representation for matrix C from contents of MKL handle.
968    * This is more complicated than it should be: it turns out that, though mkl_sparse_sypr() will accept a full AIJ/CSR matrix,
969    * the output matrix only contains the upper or lower triangle (we arbitrarily have chosen upper) of the symmetric matrix.
970    * We have to fill in the missing portion, which we currently do below by forming the tranpose and performing at MatAXPY
971    * operation. This may kill any performance benefit of using the optimized mkl_sparse_sypr() routine. Performance might
972    * improve if we come up with a more efficient way to do this, or we can convince the MKL team to provide an option to output
973    * the full matrix. */
974   ierr = MatSeqAIJMKL_update_from_mkl_handle(C);CHKERRQ(ierr);
975   ierr = MatTranspose(C,MAT_INITIAL_MATRIX,&Ct);CHKERRQ(ierr);
976   ierr = MatCreateVecs(C,&zeros,NULL);CHKERRQ(ierr);
977   ierr = VecSetFromOptions(zeros);CHKERRQ(ierr);
978   ierr = VecZeroEntries(zeros);CHKERRQ(ierr);
979   ierr = MatDiagonalSet(Ct,zeros,INSERT_VALUES);CHKERRQ(ierr);
980   ierr = MatAXPY(C,1.0,Ct,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
981   /* Note: The MatAXPY() call destroys the MatProduct, so we must recreate it. */
982   ierr = MatProductCreateWithMat(A,P,PETSC_NULL,C);CHKERRQ(ierr);
983   ierr = MatProductSetType(C,MATPRODUCT_PtAP);CHKERRQ(ierr);
984   ierr = MatSeqAIJMKL_create_mkl_handle(C);CHKERRQ(ierr);
985   ierr = VecDestroy(&zeros);CHKERRQ(ierr);
986   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
987   PetscFunctionReturn(0);
988 }
989 
990 PetscErrorCode MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat C)
991 {
992   Mat_Product         *product = C->product;
993   Mat                 A = product->A,P = product->B;
994   Mat_SeqAIJMKL       *a = (Mat_SeqAIJMKL*)A->spptr,*p = (Mat_SeqAIJMKL*)P->spptr;
995   sparse_matrix_t     csrA,csrP,csrC;
996   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
997   struct matrix_descr descr_type_sym;
998   PetscObjectState    state;
999   PetscErrorCode      ierr;
1000 
1001   PetscFunctionBegin;
1002   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
1003   if (!a->sparse_optimized || a->state != state) {
1004     MatSeqAIJMKL_create_mkl_handle(A);
1005   }
1006   ierr = PetscObjectStateGet((PetscObject)P,&state);CHKERRQ(ierr);
1007   if (!p->sparse_optimized || p->state != state) {
1008     MatSeqAIJMKL_create_mkl_handle(P);
1009   }
1010   csrA = a->csrA;
1011   csrP = p->csrA;
1012   descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
1013   descr_type_sym.mode = SPARSE_FILL_MODE_UPPER;
1014   descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
1015 
1016   /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
1017   if (csrP && csrA) {
1018     stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FULL_MULT_NO_VAL);
1019     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to perform symbolic mkl_sparse_sypr()");
1020   } else {
1021     csrC = PETSC_NULL;
1022   }
1023 
1024   /* Update the I and J arrays of the PETSc AIJ representation for matrix C from contents of MKL handle.
1025    * Note that, because mkl_sparse_sypr() only computes one triangle of the symmetric matrix, this representation will only contain
1026    * the upper triangle of the symmetric matrix. We fix this in MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(). I believe that
1027    * leaving things in this incomplete state is OK because the numeric product should follow soon after, but am not certain if this
1028    * is guaranteed. */
1029   ierr = MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF,csrC,P->cmap->N,P->cmap->N,C);CHKERRQ(ierr);
1030 
1031   C->ops->productnumeric = MatProductNumeric_PtAP;
1032   PetscFunctionReturn(0);
1033 }
1034 
1035 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AB(Mat C)
1036 {
1037   PetscFunctionBegin;
1038   C->ops->productsymbolic = MatProductSymbolic_AB;
1039   C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL;
1040   PetscFunctionReturn(0);
1041 }
1042 
1043 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AtB(Mat C)
1044 {
1045   PetscFunctionBegin;
1046   C->ops->productsymbolic = MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL;
1047   PetscFunctionReturn(0);
1048 }
1049 
1050 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABt(Mat C)
1051 {
1052   PetscFunctionBegin;
1053   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ;
1054   C->ops->productsymbolic          = MatProductSymbolic_ABt;
1055   PetscFunctionReturn(0);
1056 }
1057 
1058 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_PtAP(Mat C)
1059 {
1060   PetscErrorCode ierr;
1061   Mat_Product    *product = C->product;
1062   Mat            A = product->A;
1063   PetscBool      set, flag;
1064 
1065   PetscFunctionBegin;
1066 #if defined(PETSC_USE_COMPLEX)
1067   /* By setting C->ops->productsymbolic to NULL, we ensure that MatProductSymbolic_Basic() will be used.
1068    * We do this in several other locations in this file. This works for the time being, but the _Basic()
1069    * routines are considered unsafe and may be removed from the MatProduct code in the future.
1070    * TODO: Add proper MATSEQAIJMKL implementations, instead of relying on the _Basic() routines. */
1071   C->ops->productsymbolic = NULL;
1072 #else
1073   /* AIJMKL only has an optimized routine for PtAP when A is symmetric and real. */
1074   ierr = MatIsSymmetricKnown(A,&set,&flag);CHKERRQ(ierr);
1075   if (set && flag) {
1076     C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal;
1077     PetscFunctionReturn(0);
1078   } else {
1079     C->ops->productsymbolic = NULL; /* MatProductSymbolic_Basic() will be used. */
1080   }
1081   /* Note that we don't set C->ops->productnumeric here, as this must happen in MatProductSymbolic_PtAP_XXX(),
1082    * depending on whether the algorithm for the general case vs. the real symmetric one is used. */
1083 #endif
1084   PetscFunctionReturn(0);
1085 }
1086 
1087 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_RARt(Mat C)
1088 {
1089   PetscFunctionBegin;
1090   C->ops->productsymbolic = NULL; /* MatProductSymbolic_Basic() will be used. */
1091   PetscFunctionReturn(0);
1092 }
1093 
1094 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABC(Mat C)
1095 {
1096   PetscFunctionBegin;
1097   C->ops->productsymbolic = NULL; /* MatProductSymbolic_Basic() will be used. */
1098   PetscFunctionReturn(0);
1099 }
1100 
1101 PetscErrorCode MatProductSetFromOptions_SeqAIJMKL(Mat C)
1102 {
1103   PetscErrorCode ierr;
1104   Mat_Product    *product = C->product;
1105 
1106   PetscFunctionBegin;
1107   switch (product->type) {
1108   case MATPRODUCT_AB:
1109     ierr = MatProductSetFromOptions_SeqAIJMKL_AB(C);CHKERRQ(ierr);
1110     break;
1111   case MATPRODUCT_AtB:
1112     ierr = MatProductSetFromOptions_SeqAIJMKL_AtB(C);CHKERRQ(ierr);
1113     break;
1114   case MATPRODUCT_ABt:
1115     ierr = MatProductSetFromOptions_SeqAIJMKL_ABt(C);CHKERRQ(ierr);
1116     break;
1117   case MATPRODUCT_PtAP:
1118     ierr = MatProductSetFromOptions_SeqAIJMKL_PtAP(C);CHKERRQ(ierr);
1119     break;
1120   case MATPRODUCT_RARt:
1121     ierr = MatProductSetFromOptions_SeqAIJMKL_RARt(C);CHKERRQ(ierr);
1122     break;
1123   case MATPRODUCT_ABC:
1124     ierr = MatProductSetFromOptions_SeqAIJMKL_ABC(C);CHKERRQ(ierr);
1125     break;
1126   default:
1127     break;
1128   }
1129   PetscFunctionReturn(0);
1130 }
1131 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
1132 /* ------------------------ End MatProduct code ------------------------ */
1133 
1134 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a
1135  * SeqAIJMKL matrix.  This routine is called by the MatCreate_SeqAIJMKL()
1136  * routine, but can also be used to convert an assembled SeqAIJ matrix
1137  * into a SeqAIJMKL one. */
1138 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
1139 {
1140   PetscErrorCode ierr;
1141   Mat            B = *newmat;
1142   Mat_SeqAIJMKL  *aijmkl;
1143   PetscBool      set;
1144   PetscBool      sametype;
1145 
1146   PetscFunctionBegin;
1147   if (reuse == MAT_INITIAL_MATRIX) {
1148     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
1149   }
1150 
1151   ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr);
1152   if (sametype) PetscFunctionReturn(0);
1153 
1154   ierr     = PetscNewLog(B,&aijmkl);CHKERRQ(ierr);
1155   B->spptr = (void*) aijmkl;
1156 
1157   /* Set function pointers for methods that we inherit from AIJ but override.
1158    * We also parse some command line options below, since those determine some of the methods we point to. */
1159   B->ops->duplicate        = MatDuplicate_SeqAIJMKL;
1160   B->ops->assemblyend      = MatAssemblyEnd_SeqAIJMKL;
1161   B->ops->destroy          = MatDestroy_SeqAIJMKL;
1162 
1163   aijmkl->sparse_optimized = PETSC_FALSE;
1164 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1165   aijmkl->no_SpMV2 = PETSC_FALSE;  /* Default to using the SpMV2 routines if our MKL supports them. */
1166 #else
1167   aijmkl->no_SpMV2 = PETSC_TRUE;
1168 #endif
1169   aijmkl->eager_inspection = PETSC_FALSE;
1170 
1171   /* Parse command line options. */
1172   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");CHKERRQ(ierr);
1173   ierr = PetscOptionsBool("-mat_aijmkl_no_spmv2","Disable use of inspector-executor (SpMV 2) routines","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);CHKERRQ(ierr);
1174   ierr = PetscOptionsBool("-mat_aijmkl_eager_inspection","Run inspection at matrix assembly time, instead of waiting until needed by an operation","None",(PetscBool)aijmkl->eager_inspection,(PetscBool*)&aijmkl->eager_inspection,&set);CHKERRQ(ierr);
1175   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1176 #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1177   if (!aijmkl->no_SpMV2) {
1178     ierr = PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize();  defaulting to non-SpMV2 routines.\n");
1179     aijmkl->no_SpMV2 = PETSC_TRUE;
1180   }
1181 #endif
1182 
1183 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1184   B->ops->mult                    = MatMult_SeqAIJMKL_SpMV2;
1185   B->ops->multtranspose           = MatMultTranspose_SeqAIJMKL_SpMV2;
1186   B->ops->multadd                 = MatMultAdd_SeqAIJMKL_SpMV2;
1187   B->ops->multtransposeadd        = MatMultTransposeAdd_SeqAIJMKL_SpMV2;
1188 # if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
1189   B->ops->productsetfromoptions   = MatProductSetFromOptions_SeqAIJMKL;
1190   B->ops->matmultsymbolic         = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL;
1191   B->ops->matmultnumeric          = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL;
1192   B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL;
1193   B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL;
1194 #   if !defined(PETSC_USE_COMPLEX)
1195   B->ops->ptapnumeric             = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal;
1196 #   else
1197   B->ops->ptapnumeric             = NULL;
1198 #   endif
1199 # endif
1200 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
1201 
1202 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
1203   /* In MKL version 18, update 2, the old sparse BLAS interfaces were marked as deprecated. If "no_SpMV2" has been specified by the
1204    * user and the old SpBLAS interfaces are deprecated in our MKL version, we use the new _SpMV2 routines (set above), but do not
1205    * call mkl_sparse_optimize(), which results in the old numerical kernels (without the inspector-executor model) being used. For
1206    * versions in which the older interface has not been deprecated, we use the old interface. */
1207   if (aijmkl->no_SpMV2) {
1208     B->ops->mult             = MatMult_SeqAIJMKL;
1209     B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL;
1210     B->ops->multadd          = MatMultAdd_SeqAIJMKL;
1211     B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL;
1212   }
1213 #endif
1214 
1215   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr);
1216 
1217   if (!aijmkl->no_SpMV2) {
1218 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1219 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
1220     ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_seqaijmkl_seqaijmkl_C",MatProductSetFromOptions_SeqAIJMKL);CHKERRQ(ierr);
1221 #endif
1222 #endif
1223   }
1224 
1225   ierr    = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr);
1226   *newmat = B;
1227   PetscFunctionReturn(0);
1228 }
1229 
1230 /*@C
1231    MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL.
1232    This type inherits from AIJ and is largely identical, but uses sparse BLAS
1233    routines from Intel MKL whenever possible.
1234    If the installed version of MKL supports the "SpMV2" sparse
1235    inspector-executor routines, then those are used by default.
1236    MatMult, MatMultAdd, MatMultTranspose, MatMultTransposeAdd, MatMatMult, MatTransposeMatMult, and MatPtAP (for
1237    symmetric A) operations are currently supported.
1238    Note that MKL version 18, update 2 or later is required for MatPtAP/MatPtAPNumeric and MatMatMultNumeric.
1239 
1240    Collective
1241 
1242    Input Parameters:
1243 +  comm - MPI communicator, set to PETSC_COMM_SELF
1244 .  m - number of rows
1245 .  n - number of columns
1246 .  nz - number of nonzeros per row (same for all rows)
1247 -  nnz - array containing the number of nonzeros in the various rows
1248          (possibly different for each row) or NULL
1249 
1250    Output Parameter:
1251 .  A - the matrix
1252 
1253    Options Database Keys:
1254 +  -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines
1255 -  -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, performing this step the first time the matrix is applied
1256 
1257    Notes:
1258    If nnz is given then nz is ignored
1259 
1260    Level: intermediate
1261 
1262 .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues()
1263 @*/
1264 PetscErrorCode  MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
1265 {
1266   PetscErrorCode ierr;
1267 
1268   PetscFunctionBegin;
1269   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1270   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
1271   ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr);
1272   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr);
1273   PetscFunctionReturn(0);
1274 }
1275 
1276 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A)
1277 {
1278   PetscErrorCode ierr;
1279 
1280   PetscFunctionBegin;
1281   ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
1282   ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
1283   PetscFunctionReturn(0);
1284 }
1285