xref: /petsc/src/mat/impls/aij/seq/aijmkl/aijmkl.c (revision cbd2c53bc8d4e32f1aab714f173fd0b4c821fa72)
1 
2 /*
3   Defines basic operations for the MATSEQAIJMKL matrix class.
4   This class is derived from the MATSEQAIJ class and retains the
5   compressed row storage (aka Yale sparse matrix format) but uses
6   sparse BLAS operations from the Intel Math Kernel Library (MKL)
7   wherever possible.
8 */
9 
10 #include <../src/mat/impls/aij/seq/aij.h>
11 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h>
12 #include <mkl_spblas.h>
13 
14 typedef struct {
15   PetscBool           no_SpMV2;  /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */
16   PetscBool           eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */
17   PetscBool           sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
18   PetscObjectState    state;
19 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
20   sparse_matrix_t     csrA; /* "Handle" used by SpMV2 inspector-executor routines. */
21   struct matrix_descr descr;
22 #endif
23 } Mat_SeqAIJMKL;
24 
25 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType);
26 
27 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat)
28 {
29   /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */
30   /* so we will ignore 'MatType type'. */
31   PetscErrorCode ierr;
32   Mat            B       = *newmat;
33 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
34   Mat_SeqAIJMKL  *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
35 #endif
36 
37   PetscFunctionBegin;
38   if (reuse == MAT_INITIAL_MATRIX) {
39     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
40   }
41 
42   /* Reset the original function pointers. */
43   B->ops->duplicate               = MatDuplicate_SeqAIJ;
44   B->ops->assemblyend             = MatAssemblyEnd_SeqAIJ;
45   B->ops->destroy                 = MatDestroy_SeqAIJ;
46   B->ops->mult                    = MatMult_SeqAIJ;
47   B->ops->multtranspose           = MatMultTranspose_SeqAIJ;
48   B->ops->multadd                 = MatMultAdd_SeqAIJ;
49   B->ops->multtransposeadd        = MatMultTransposeAdd_SeqAIJ;
50   B->ops->productsetfromoptions   = MatProductSetFromOptions_SeqAIJ;
51   B->ops->matmultsymbolic         = MatMatMultSymbolic_SeqAIJ_SeqAIJ;
52   B->ops->matmultnumeric          = MatMatMultNumeric_SeqAIJ_SeqAIJ;
53   B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ;
54   B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ;
55   B->ops->ptapnumeric             = MatPtAPNumeric_SeqAIJ_SeqAIJ;
56 
57   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);CHKERRQ(ierr);
58 
59 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
60   /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this
61    * simply involves destroying the MKL sparse matrix handle and then freeing
62    * the spptr pointer. */
63   if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr;
64 
65   if (aijmkl->sparse_optimized) {
66     sparse_status_t stat;
67     stat = mkl_sparse_destroy(aijmkl->csrA);
68     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set hints/complete mkl_sparse_optimize()");
69   }
70 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
71   ierr = PetscFree(B->spptr);CHKERRQ(ierr);
72 
73   /* Change the type of B to MATSEQAIJ. */
74   ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr);
75 
76   *newmat = B;
77   PetscFunctionReturn(0);
78 }
79 
80 PetscErrorCode MatDestroy_SeqAIJMKL(Mat A)
81 {
82   PetscErrorCode ierr;
83   Mat_SeqAIJMKL  *aijmkl = (Mat_SeqAIJMKL*) A->spptr;
84 
85   PetscFunctionBegin;
86 
87   /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an spptr pointer. */
88   if (aijmkl) {
89     /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */
90 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
91     if (aijmkl->sparse_optimized) {
92       sparse_status_t stat = SPARSE_STATUS_SUCCESS;
93       stat = mkl_sparse_destroy(aijmkl->csrA);
94       if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_destroy()");
95     }
96 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
97     ierr = PetscFree(A->spptr);CHKERRQ(ierr);
98   }
99 
100   /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
101    * to destroy everything that remains. */
102   ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr);
103   /* Note that I don't call MatSetType().  I believe this is because that
104    * is only to be called when *building* a matrix.  I could be wrong, but
105    * that is how things work for the SuperLU matrix class. */
106   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
107   PetscFunctionReturn(0);
108 }
109 
110 /* MatSeqAIJMKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it,
111  * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize().
112  * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix
113  * handle, creates a new one, and then calls mkl_sparse_optimize().
114  * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been
115  * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of
116  * an unoptimized matrix handle here. */
117 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A)
118 {
119 #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
120   /* If the MKL library does not have mkl_sparse_optimize(), then this routine
121    * does nothing. We make it callable anyway in this case because it cuts
122    * down on littering the code with #ifdefs. */
123   PetscFunctionBegin;
124   PetscFunctionReturn(0);
125 #else
126   Mat_SeqAIJ       *a = (Mat_SeqAIJ*)A->data;
127   Mat_SeqAIJMKL    *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
128   PetscInt         m,n;
129   MatScalar        *aa;
130   PetscInt         *aj,*ai;
131   sparse_status_t  stat;
132   PetscErrorCode   ierr;
133 
134   PetscFunctionBegin;
135 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
136   /* For MKL versions that still support the old, non-inspector-executor interfaces versions, we simply exit here if the no_SpMV2
137    * option has been specified. For versions that have deprecated the old interfaces (version 18, update 2 and later), we must
138    * use the new inspector-executor interfaces, but we can still use the old, non-inspector-executor code by not calling
139    * mkl_sparse_optimize() later. */
140   if (aijmkl->no_SpMV2) PetscFunctionReturn(0);
141 #endif
142 
143   if (aijmkl->sparse_optimized) {
144     /* Matrix has been previously assembled and optimized. Must destroy old
145      * matrix handle before running the optimization step again. */
146     stat = mkl_sparse_destroy(aijmkl->csrA);
147     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_destroy()");
148   }
149   aijmkl->sparse_optimized = PETSC_FALSE;
150 
151   /* Now perform the SpMV2 setup and matrix optimization. */
152   aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
153   aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
154   aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
155   m = A->rmap->n;
156   n = A->cmap->n;
157   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
158   aa   = a->a;  /* Nonzero elements stored row-by-row. */
159   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
160   if (a->nz && aa && !A->structure_only) {
161     /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
162      * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
163     stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa);
164     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle, mkl_sparse_x_create_csr()");
165     stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
166     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_set_mv_hint()");
167     stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
168     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_set_memory_hint()");
169     if (!aijmkl->no_SpMV2) {
170       stat = mkl_sparse_optimize(aijmkl->csrA);
171       if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_optimize()");
172     }
173     aijmkl->sparse_optimized = PETSC_TRUE;
174     ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr);
175   } else {
176     aijmkl->csrA = PETSC_NULL;
177   }
178 
179   PetscFunctionReturn(0);
180 #endif
181 }
182 
183 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
184 /* Take an already created but empty matrix and set up the nonzero structure from an MKL sparse matrix handle. */
185 static PetscErrorCode MatSeqAIJMKL_setup_structure_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,PetscInt nrows,PetscInt ncols,Mat A)
186 {
187   PetscErrorCode      ierr;
188   sparse_status_t     stat;
189   sparse_index_base_t indexing;
190   PetscInt            m,n;
191   PetscInt            *aj,*ai,*dummy;
192   MatScalar           *aa;
193   Mat_SeqAIJMKL       *aijmkl;
194 
195   if (csrA) {
196     /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
197     stat = mkl_sparse_x_export_csr(csrA,&indexing,&m,&n,&ai,&dummy,&aj,&aa);
198     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");
199     if ((m != nrows) || (n != ncols)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Number of rows/columns does not match those from mkl_sparse_x_export_csr()");
200   } else {
201     aj = ai = PETSC_NULL;
202     aa = PETSC_NULL;
203   }
204 
205   ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
206   ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,nrows,ncols);CHKERRQ(ierr);
207   /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported
208    * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and
209    * they will be destroyed when the MKL handle is destroyed.
210    * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */
211   if (csrA) {
212     ierr = MatSeqAIJSetPreallocationCSR(A,ai,aj,NULL);CHKERRQ(ierr);
213   } else {
214     /* Since MatSeqAIJSetPreallocationCSR does initial set up and assembly begin/end, we must do that ourselves here. */
215     ierr = MatSetUp(A);CHKERRQ(ierr);
216     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
217     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
218   }
219 
220   /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle.
221    * Now turn it into a MATSEQAIJMKL. */
222   ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
223 
224   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
225   aijmkl->csrA = csrA;
226 
227   /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but
228    * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one,
229    * and just need to be able to run the MKL optimization step. */
230   aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
231   aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
232   aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
233   if (csrA) {
234     stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
235     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_set_mv_hint()");
236     stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
237     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_set_memory_hint()");
238   }
239   ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr);
240   PetscFunctionReturn(0);
241 }
242 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
243 
244 /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle.
245  * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in
246  * MatMatMultNumeric(). */
247 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
248 static PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A)
249 {
250   PetscInt            i;
251   PetscInt            nrows,ncols;
252   PetscInt            nz;
253   PetscInt            *ai,*aj,*dummy;
254   PetscScalar         *aa;
255   PetscErrorCode      ierr;
256   Mat_SeqAIJMKL       *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
257   sparse_status_t     stat;
258   sparse_index_base_t indexing;
259 
260   /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */
261   if (!aijmkl->csrA) PetscFunctionReturn(0);
262 
263   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
264   stat = mkl_sparse_x_export_csr(aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);
265   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");
266 
267   /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc
268    * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */
269   for (i=0; i<nrows; i++) {
270     nz = ai[i+1] - ai[i];
271     ierr = MatSetValues_SeqAIJ(A, 1, &i, nz, aj+ai[i], aa+ai[i], INSERT_VALUES);CHKERRQ(ierr);
272   }
273 
274   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
275   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
276 
277   ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr);
278   /* At this point our matrix has a valid MKL handle, the contents of which match the PETSc AIJ representation.
279    * The MKL handle has *not* had mkl_sparse_optimize() called on it, though -- the MKL developers have confirmed
280    * that the matrix inspection/optimization step is not performed when matrix-matrix multiplication is finalized. */
281   aijmkl->sparse_optimized = PETSC_FALSE;
282   PetscFunctionReturn(0);
283 }
284 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
285 
286 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
287 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_view_mkl_handle(Mat A,PetscViewer viewer)
288 {
289   PetscInt            i,j,k;
290   PetscInt            nrows,ncols;
291   PetscInt            nz;
292   PetscInt            *ai,*aj,*dummy;
293   PetscScalar         *aa;
294   PetscErrorCode      ierr;
295   Mat_SeqAIJMKL       *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
296   sparse_status_t     stat;
297   sparse_index_base_t indexing;
298 
299   ierr = PetscViewerASCIIPrintf(viewer,"Contents of MKL sparse matrix handle for MATSEQAIJMKL object:\n");CHKERRQ(ierr);
300 
301   /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */
302   if (!aijmkl->csrA) {
303     ierr = PetscViewerASCIIPrintf(viewer,"MKL matrix handle is NULL\n");CHKERRQ(ierr);
304     PetscFunctionReturn(0);
305   }
306 
307   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
308   stat = mkl_sparse_x_export_csr(aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);
309   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");
310 
311   k = 0;
312   for (i=0; i<nrows; i++) {
313     ierr = PetscViewerASCIIPrintf(viewer,"row %D: ",i);CHKERRQ(ierr);
314     nz = ai[i+1] - ai[i];
315     for (j=0; j<nz; j++) {
316       if (aa) {
317         ierr = PetscViewerASCIIPrintf(viewer,"(%D, %g)  ",aj[k],PetscRealPart(aa[k]));CHKERRQ(ierr);
318       } else {
319         ierr = PetscViewerASCIIPrintf(viewer,"(%D, NULL)",aj[k]);CHKERRQ(ierr);
320       }
321       k++;
322     }
323     ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
324   }
325   PetscFunctionReturn(0);
326 }
327 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
328 
329 PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
330 {
331   PetscErrorCode ierr;
332   Mat_SeqAIJMKL  *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
333   Mat_SeqAIJMKL  *aijmkl_dest;
334 
335   PetscFunctionBegin;
336   ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr);
337   aijmkl_dest = (Mat_SeqAIJMKL*)(*M)->spptr;
338   ierr = PetscArraycpy(aijmkl_dest,aijmkl,1);CHKERRQ(ierr);
339   aijmkl_dest->sparse_optimized = PETSC_FALSE;
340   if (aijmkl->eager_inspection) {
341     ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
342   }
343   PetscFunctionReturn(0);
344 }
345 
346 PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode)
347 {
348   PetscErrorCode  ierr;
349   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
350   Mat_SeqAIJMKL   *aijmkl;
351 
352   PetscFunctionBegin;
353   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
354 
355   /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some
356    * extra information and some different methods, call the AssemblyEnd
357    * routine for a MATSEQAIJ.
358    * I'm not sure if this is the best way to do this, but it avoids
359    * a lot of code duplication. */
360   a->inode.use = PETSC_FALSE;  /* Must disable: otherwise the MKL routines won't get used. */
361   ierr = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr);
362 
363   /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks).
364    * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */
365   aijmkl = (Mat_SeqAIJMKL*)A->spptr;
366   if (aijmkl->eager_inspection) {
367     ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
368   }
369 
370   PetscFunctionReturn(0);
371 }
372 
373 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
374 PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy)
375 {
376   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
377   const PetscScalar *x;
378   PetscScalar       *y;
379   const MatScalar   *aa;
380   PetscErrorCode    ierr;
381   PetscInt          m = A->rmap->n;
382   PetscInt          n = A->cmap->n;
383   PetscScalar       alpha = 1.0;
384   PetscScalar       beta = 0.0;
385   const PetscInt    *aj,*ai;
386   char              matdescra[6];
387 
388   /* Variables not in MatMult_SeqAIJ. */
389   char transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
390 
391   PetscFunctionBegin;
392   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
393   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
394   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
395   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
396   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
397   aa   = a->a;  /* Nonzero elements stored row-by-row. */
398   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
399 
400   /* Call MKL sparse BLAS routine to do the MatMult. */
401   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
402 
403   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
404   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
405   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
406   PetscFunctionReturn(0);
407 }
408 #endif
409 
410 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
411 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
412 {
413   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
414   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
415   const PetscScalar *x;
416   PetscScalar       *y;
417   PetscErrorCode    ierr;
418   sparse_status_t   stat = SPARSE_STATUS_SUCCESS;
419   PetscObjectState  state;
420 
421   PetscFunctionBegin;
422 
423   /* If there are no nonzero entries, zero yy and return immediately. */
424   if (!a->nz) {
425     PetscInt i;
426     PetscInt m=A->rmap->n;
427     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
428     for (i=0; i<m; i++) {
429       y[i] = 0.0;
430     }
431     ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
432     PetscFunctionReturn(0);
433   }
434 
435   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
436   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
437 
438   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
439    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
440    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
441   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
442   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
443     MatSeqAIJMKL_create_mkl_handle(A);
444   }
445 
446   /* Call MKL SpMV2 executor routine to do the MatMult. */
447   stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
448   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()");
449 
450   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
451   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
452   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
453   PetscFunctionReturn(0);
454 }
455 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
456 
457 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
458 PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy)
459 {
460   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
461   const PetscScalar *x;
462   PetscScalar       *y;
463   const MatScalar   *aa;
464   PetscErrorCode    ierr;
465   PetscInt          m = A->rmap->n;
466   PetscInt          n = A->cmap->n;
467   PetscScalar       alpha = 1.0;
468   PetscScalar       beta = 0.0;
469   const PetscInt    *aj,*ai;
470   char              matdescra[6];
471 
472   /* Variables not in MatMultTranspose_SeqAIJ. */
473   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
474 
475   PetscFunctionBegin;
476   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
477   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
478   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
479   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
480   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
481   aa   = a->a;  /* Nonzero elements stored row-by-row. */
482   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
483 
484   /* Call MKL sparse BLAS routine to do the MatMult. */
485   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
486 
487   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
488   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
489   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
490   PetscFunctionReturn(0);
491 }
492 #endif
493 
494 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
495 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
496 {
497   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
498   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
499   const PetscScalar *x;
500   PetscScalar       *y;
501   PetscErrorCode    ierr;
502   sparse_status_t   stat;
503   PetscObjectState  state;
504 
505   PetscFunctionBegin;
506 
507   /* If there are no nonzero entries, zero yy and return immediately. */
508   if (!a->nz) {
509     PetscInt i;
510     PetscInt n=A->cmap->n;
511     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
512     for (i=0; i<n; i++) {
513       y[i] = 0.0;
514     }
515     ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
516     PetscFunctionReturn(0);
517   }
518 
519   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
520   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
521 
522   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
523    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
524    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
525   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
526   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
527     MatSeqAIJMKL_create_mkl_handle(A);
528   }
529 
530   /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
531   stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
532   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()");
533 
534   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
535   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
536   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
537   PetscFunctionReturn(0);
538 }
539 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
540 
541 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
542 PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
543 {
544   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
545   const PetscScalar *x;
546   PetscScalar       *y,*z;
547   const MatScalar   *aa;
548   PetscErrorCode    ierr;
549   PetscInt          m = A->rmap->n;
550   PetscInt          n = A->cmap->n;
551   const PetscInt    *aj,*ai;
552   PetscInt          i;
553 
554   /* Variables not in MatMultAdd_SeqAIJ. */
555   char              transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
556   PetscScalar       alpha = 1.0;
557   PetscScalar       beta;
558   char              matdescra[6];
559 
560   PetscFunctionBegin;
561   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
562   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
563 
564   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
565   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
566   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
567   aa   = a->a;  /* Nonzero elements stored row-by-row. */
568   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
569 
570   /* Call MKL sparse BLAS routine to do the MatMult. */
571   if (zz == yy) {
572     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
573     beta = 1.0;
574     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
575   } else {
576     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
577      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
578     beta = 0.0;
579     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
580     for (i=0; i<m; i++) {
581       z[i] += y[i];
582     }
583   }
584 
585   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
586   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
587   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
588   PetscFunctionReturn(0);
589 }
590 #endif
591 
592 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
593 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
594 {
595   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
596   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
597   const PetscScalar *x;
598   PetscScalar       *y,*z;
599   PetscErrorCode    ierr;
600   PetscInt          m = A->rmap->n;
601   PetscInt          i;
602 
603   /* Variables not in MatMultAdd_SeqAIJ. */
604   sparse_status_t   stat = SPARSE_STATUS_SUCCESS;
605   PetscObjectState  state;
606 
607   PetscFunctionBegin;
608 
609   /* If there are no nonzero entries, set zz = yy and return immediately. */
610   if (!a->nz) {
611     PetscInt i;
612     ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
613     for (i=0; i<m; i++) {
614       z[i] = y[i];
615     }
616     ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
617     PetscFunctionReturn(0);
618   }
619 
620   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
621   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
622 
623   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
624    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
625    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
626   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
627   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
628     MatSeqAIJMKL_create_mkl_handle(A);
629   }
630 
631   /* Call MKL sparse BLAS routine to do the MatMult. */
632   if (zz == yy) {
633     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
634      * with alpha and beta both set to 1.0. */
635     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
636     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()");
637   } else {
638     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
639      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
640     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
641     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()");
642     for (i=0; i<m; i++) {
643       z[i] += y[i];
644     }
645   }
646 
647   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
648   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
649   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
650   PetscFunctionReturn(0);
651 }
652 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
653 
654 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
655 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
656 {
657   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
658   const PetscScalar *x;
659   PetscScalar       *y,*z;
660   const MatScalar   *aa;
661   PetscErrorCode    ierr;
662   PetscInt          m = A->rmap->n;
663   PetscInt          n = A->cmap->n;
664   const PetscInt    *aj,*ai;
665   PetscInt          i;
666 
667   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
668   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
669   PetscScalar       alpha = 1.0;
670   PetscScalar       beta;
671   char              matdescra[6];
672 
673   PetscFunctionBegin;
674   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
675   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
676 
677   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
678   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
679   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
680   aa   = a->a;  /* Nonzero elements stored row-by-row. */
681   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
682 
683   /* Call MKL sparse BLAS routine to do the MatMult. */
684   if (zz == yy) {
685     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
686     beta = 1.0;
687     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
688   } else {
689     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
690      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
691     beta = 0.0;
692     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
693     for (i=0; i<n; i++) {
694       z[i] += y[i];
695     }
696   }
697 
698   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
699   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
700   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
701   PetscFunctionReturn(0);
702 }
703 #endif
704 
705 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
706 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
707 {
708   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
709   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
710   const PetscScalar *x;
711   PetscScalar       *y,*z;
712   PetscErrorCode    ierr;
713   PetscInt          n = A->cmap->n;
714   PetscInt          i;
715   PetscObjectState  state;
716 
717   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
718   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
719 
720   PetscFunctionBegin;
721 
722   /* If there are no nonzero entries, set zz = yy and return immediately. */
723   if (!a->nz) {
724     PetscInt i;
725     ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
726     for (i=0; i<n; i++) {
727       z[i] = y[i];
728     }
729     ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
730     PetscFunctionReturn(0);
731   }
732 
733   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
734   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
735 
736   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
737    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
738    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
739   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
740   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
741     MatSeqAIJMKL_create_mkl_handle(A);
742   }
743 
744   /* Call MKL sparse BLAS routine to do the MatMult. */
745   if (zz == yy) {
746     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
747      * with alpha and beta both set to 1.0. */
748     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
749     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()");
750   } else {
751     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
752      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
753     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
754     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()");
755     for (i=0; i<n; i++) {
756       z[i] += y[i];
757     }
758   }
759 
760   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
761   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
762   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
763   PetscFunctionReturn(0);
764 }
765 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
766 
767 /* -------------------------- MatProduct code -------------------------- */
768 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
769 static PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(Mat A,const sparse_operation_t transA,Mat B,const sparse_operation_t transB,Mat C)
770 {
771   Mat_SeqAIJMKL       *a = (Mat_SeqAIJMKL*)A->spptr,*b = (Mat_SeqAIJMKL*)B->spptr;
772   sparse_matrix_t     csrA,csrB,csrC;
773   PetscInt            nrows,ncols;
774   PetscErrorCode      ierr;
775   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
776   struct matrix_descr descr_type_gen;
777   PetscObjectState    state;
778 
779   PetscFunctionBegin;
780   /* Determine the number of rows and columns that the result matrix C will have. We have to do this ourselves because MKL does
781    * not handle sparse matrices with zero rows or columns. */
782   if (transA == SPARSE_OPERATION_NON_TRANSPOSE) nrows = A->rmap->N;
783   else nrows = A->cmap->N;
784   if (transB == SPARSE_OPERATION_NON_TRANSPOSE) ncols = B->cmap->N;
785   else ncols = B->rmap->N;
786 
787   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
788   if (!a->sparse_optimized || a->state != state) {
789     MatSeqAIJMKL_create_mkl_handle(A);
790   }
791   ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr);
792   if (!b->sparse_optimized || b->state != state) {
793     MatSeqAIJMKL_create_mkl_handle(B);
794   }
795   csrA = a->csrA;
796   csrB = b->csrA;
797   descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
798 
799   if (csrA && csrB) {
800     stat = mkl_sparse_sp2m(transA,descr_type_gen,csrA,transB,descr_type_gen,csrB,SPARSE_STAGE_FULL_MULT_NO_VAL,&csrC);
801     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete symbolic stage of sparse matrix-matrix multiply in mkl_sparse_sp2m()");
802   } else {
803     csrC = PETSC_NULL;
804   }
805 
806   ierr = MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF,csrC,nrows,ncols,C);CHKERRQ(ierr);
807 
808   PetscFunctionReturn(0);
809 }
810 
811 PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(Mat A,const sparse_operation_t transA,Mat B,const sparse_operation_t transB,Mat C)
812 {
813   Mat_SeqAIJMKL       *a = (Mat_SeqAIJMKL*)A->spptr,*b = (Mat_SeqAIJMKL*)B->spptr,*c = (Mat_SeqAIJMKL*)C->spptr;
814   sparse_matrix_t     csrA, csrB, csrC;
815   PetscErrorCode      ierr;
816   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
817   struct matrix_descr descr_type_gen;
818   PetscObjectState    state;
819 
820   PetscFunctionBegin;
821   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
822   if (!a->sparse_optimized || a->state != state) {
823     MatSeqAIJMKL_create_mkl_handle(A);
824   }
825   ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr);
826   if (!b->sparse_optimized || b->state != state) {
827     MatSeqAIJMKL_create_mkl_handle(B);
828   }
829   csrA = a->csrA;
830   csrB = b->csrA;
831   csrC = c->csrA;
832   descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
833 
834   if (csrA && csrB) {
835     stat = mkl_sparse_sp2m(transA,descr_type_gen,csrA,transB,descr_type_gen,csrB,SPARSE_STAGE_FINALIZE_MULT,&csrC);
836     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete numerical stage of sparse matrix-matrix multiply in mkl_sparse_sp2m()");
837   } else {
838     csrC = PETSC_NULL;
839   }
840 
841   /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */
842   ierr = MatSeqAIJMKL_update_from_mkl_handle(C);CHKERRQ(ierr);
843 
844   PetscFunctionReturn(0);
845 }
846 
847 PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C)
848 {
849   PetscErrorCode ierr;
850 
851   PetscFunctionBegin;
852   ierr = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);CHKERRQ(ierr);
853   PetscFunctionReturn(0);
854 }
855 
856 PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C)
857 {
858   PetscErrorCode ierr;
859 
860   PetscFunctionBegin;
861   ierr = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);CHKERRQ(ierr);
862   PetscFunctionReturn(0);
863 }
864 
865 PetscErrorCode MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C)
866 {
867   PetscErrorCode ierr;
868 
869   PetscFunctionBegin;
870   ierr = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);CHKERRQ(ierr);
871   PetscFunctionReturn(0);
872 }
873 
874 PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C)
875 {
876   PetscErrorCode ierr;
877 
878   PetscFunctionBegin;
879   ierr = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);CHKERRQ(ierr);
880   PetscFunctionReturn(0);
881 }
882 
883 PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C)
884 {
885   PetscErrorCode ierr;
886 
887   PetscFunctionBegin;
888   ierr = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_TRANSPOSE,C);CHKERRQ(ierr);
889   PetscFunctionReturn(0);
890 }
891 
892 PetscErrorCode MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C)
893 {
894   PetscErrorCode ierr;
895 
896   PetscFunctionBegin;
897   ierr = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_TRANSPOSE,C);CHKERRQ(ierr);
898   PetscFunctionReturn(0);
899 }
900 
901 static PetscErrorCode MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL(Mat C)
902 {
903   PetscErrorCode ierr;
904   Mat_Product    *product = C->product;
905   Mat            A = product->A,B = product->B;
906 
907   PetscFunctionBegin;
908   ierr = MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(A,B,C);CHKERRQ(ierr);
909   PetscFunctionReturn(0);
910 }
911 
912 static PetscErrorCode MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL(Mat C)
913 {
914   PetscErrorCode ierr;
915   Mat_Product    *product = C->product;
916   Mat            A = product->A,B = product->B;
917   PetscReal      fill = product->fill;
918 
919   PetscFunctionBegin;
920   ierr = MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(A,B,fill,C);CHKERRQ(ierr);
921   C->ops->productnumeric = MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL;
922   PetscFunctionReturn(0);
923 }
924 
925 PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat A,Mat P,Mat C)
926 {
927   Mat                 Ct;
928   Vec                 zeros;
929   Mat_SeqAIJMKL       *a = (Mat_SeqAIJMKL*)A->spptr,*p = (Mat_SeqAIJMKL*)P->spptr,*c = (Mat_SeqAIJMKL*)C->spptr;
930   sparse_matrix_t     csrA, csrP, csrC;
931   PetscBool           set, flag;
932   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
933   struct matrix_descr descr_type_sym;
934   PetscObjectState    state;
935   PetscErrorCode      ierr;
936 
937   PetscFunctionBegin;
938   ierr = MatIsSymmetricKnown(A,&set,&flag);
939   if (!set || (set && !flag)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal() called on matrix A not marked as symmetric");
940 
941   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
942   if (!a->sparse_optimized || a->state != state) {
943     MatSeqAIJMKL_create_mkl_handle(A);
944   }
945   ierr = PetscObjectStateGet((PetscObject)P,&state);CHKERRQ(ierr);
946   if (!p->sparse_optimized || p->state != state) {
947     MatSeqAIJMKL_create_mkl_handle(P);
948   }
949   csrA = a->csrA;
950   csrP = p->csrA;
951   csrC = c->csrA;
952   descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
953   descr_type_sym.mode = SPARSE_FILL_MODE_UPPER;
954   descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
955 
956   /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
957   stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FINALIZE_MULT);
958   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to finalize mkl_sparse_sypr()");
959 
960   /* Update the PETSc AIJ representation for matrix C from contents of MKL handle.
961    * This is more complicated than it should be: it turns out that, though mkl_sparse_sypr() will accept a full AIJ/CSR matrix,
962    * the output matrix only contains the upper or lower triangle (we arbitrarily have chosen upper) of the symmetric matrix.
963    * We have to fill in the missing portion, which we currently do below by forming the tranpose and performing at MatAXPY
964    * operation. This may kill any performance benefit of using the optimized mkl_sparse_sypr() routine. Performance might
965    * improve if we come up with a more efficient way to do this, or we can convince the MKL team to provide an option to output
966    * the full matrix. */
967   ierr = MatSeqAIJMKL_update_from_mkl_handle(C);CHKERRQ(ierr);
968   ierr = MatTranspose(C,MAT_INITIAL_MATRIX,&Ct);CHKERRQ(ierr);
969   ierr = MatCreateVecs(C,&zeros,NULL);CHKERRQ(ierr);
970   ierr = VecSetFromOptions(zeros);CHKERRQ(ierr);
971   ierr = VecZeroEntries(zeros);CHKERRQ(ierr);
972   ierr = MatDiagonalSet(Ct,zeros,INSERT_VALUES);CHKERRQ(ierr);
973   ierr = MatAXPY(C,1.0,Ct,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
974   /* Note: The MatAXPY() call destroys the MatProduct, so we must recreate it. */
975   ierr = MatProductCreateWithMat(A,P,PETSC_NULL,C);CHKERRQ(ierr);
976   ierr = MatProductSetType(C,MATPRODUCT_PtAP);CHKERRQ(ierr);
977   ierr = MatSeqAIJMKL_create_mkl_handle(C);CHKERRQ(ierr);
978   ierr = VecDestroy(&zeros);CHKERRQ(ierr);
979   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
980   PetscFunctionReturn(0);
981 }
982 
983 PetscErrorCode MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat C)
984 {
985   Mat_Product         *product = C->product;
986   Mat                 A = product->A,P = product->B;
987   Mat_SeqAIJMKL       *a = (Mat_SeqAIJMKL*)A->spptr,*p = (Mat_SeqAIJMKL*)P->spptr;
988   sparse_matrix_t     csrA,csrP,csrC;
989   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
990   struct matrix_descr descr_type_sym;
991   PetscObjectState    state;
992   PetscErrorCode      ierr;
993 
994   PetscFunctionBegin;
995   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
996   if (!a->sparse_optimized || a->state != state) {
997     MatSeqAIJMKL_create_mkl_handle(A);
998   }
999   ierr = PetscObjectStateGet((PetscObject)P,&state);CHKERRQ(ierr);
1000   if (!p->sparse_optimized || p->state != state) {
1001     MatSeqAIJMKL_create_mkl_handle(P);
1002   }
1003   csrA = a->csrA;
1004   csrP = p->csrA;
1005   descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
1006   descr_type_sym.mode = SPARSE_FILL_MODE_UPPER;
1007   descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
1008 
1009   /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
1010   if (csrP && csrA) {
1011     stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FULL_MULT_NO_VAL);
1012     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to perform symbolic mkl_sparse_sypr()");
1013   } else {
1014     csrC = PETSC_NULL;
1015   }
1016 
1017   /* Update the I and J arrays of the PETSc AIJ representation for matrix C from contents of MKL handle.
1018    * Note that, because mkl_sparse_sypr() only computes one triangle of the symmetric matrix, this representation will only contain
1019    * the upper triangle of the symmetric matrix. We fix this in MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(). I believe that
1020    * leaving things in this incomplete state is OK because the numeric product should follow soon after, but am not certain if this
1021    * is guaranteed. */
1022   ierr = MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF,csrC,P->cmap->N,P->cmap->N,C);CHKERRQ(ierr);
1023 
1024   C->ops->productnumeric = MatProductNumeric_PtAP;
1025   PetscFunctionReturn(0);
1026 }
1027 
1028 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AB(Mat C)
1029 {
1030   PetscFunctionBegin;
1031   C->ops->productsymbolic = MatProductSymbolic_AB;
1032   C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL;
1033   PetscFunctionReturn(0);
1034 }
1035 
1036 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AtB(Mat C)
1037 {
1038   PetscFunctionBegin;
1039   C->ops->productsymbolic = MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL;
1040   PetscFunctionReturn(0);
1041 }
1042 
1043 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABt(Mat C)
1044 {
1045   PetscFunctionBegin;
1046   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ;
1047   C->ops->productsymbolic          = MatProductSymbolic_ABt;
1048   PetscFunctionReturn(0);
1049 }
1050 
1051 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_PtAP(Mat C)
1052 {
1053   PetscErrorCode ierr;
1054   Mat_Product    *product = C->product;
1055   Mat            A = product->A;
1056   PetscBool      set, flag;
1057 
1058   PetscFunctionBegin;
1059   if (PetscDefined(USE_COMPLEX)) {
1060     /* By setting C->ops->productsymbolic to NULL, we ensure that MatProductSymbolic_Unsafe() will be used.
1061      * We do this in several other locations in this file. This works for the time being, but these
1062      * routines are considered unsafe and may be removed from the MatProduct code in the future.
1063      * TODO: Add proper MATSEQAIJMKL implementations */
1064     C->ops->productsymbolic = NULL;
1065   } else {
1066     /* AIJMKL only has an optimized routine for PtAP when A is symmetric and real. */
1067     ierr = MatIsSymmetricKnown(A,&set,&flag);CHKERRQ(ierr);
1068     if (set && flag) C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal;
1069     else C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
1070     /* Note that we don't set C->ops->productnumeric here, as this must happen in MatProductSymbolic_PtAP_XXX(),
1071      * depending on whether the algorithm for the general case vs. the real symmetric one is used. */
1072   }
1073   PetscFunctionReturn(0);
1074 }
1075 
1076 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_RARt(Mat C)
1077 {
1078   PetscFunctionBegin;
1079   C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
1080   PetscFunctionReturn(0);
1081 }
1082 
1083 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABC(Mat C)
1084 {
1085   PetscFunctionBegin;
1086   C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
1087   PetscFunctionReturn(0);
1088 }
1089 
1090 PetscErrorCode MatProductSetFromOptions_SeqAIJMKL(Mat C)
1091 {
1092   PetscErrorCode ierr;
1093   Mat_Product    *product = C->product;
1094 
1095   PetscFunctionBegin;
1096   switch (product->type) {
1097   case MATPRODUCT_AB:
1098     ierr = MatProductSetFromOptions_SeqAIJMKL_AB(C);CHKERRQ(ierr);
1099     break;
1100   case MATPRODUCT_AtB:
1101     ierr = MatProductSetFromOptions_SeqAIJMKL_AtB(C);CHKERRQ(ierr);
1102     break;
1103   case MATPRODUCT_ABt:
1104     ierr = MatProductSetFromOptions_SeqAIJMKL_ABt(C);CHKERRQ(ierr);
1105     break;
1106   case MATPRODUCT_PtAP:
1107     ierr = MatProductSetFromOptions_SeqAIJMKL_PtAP(C);CHKERRQ(ierr);
1108     break;
1109   case MATPRODUCT_RARt:
1110     ierr = MatProductSetFromOptions_SeqAIJMKL_RARt(C);CHKERRQ(ierr);
1111     break;
1112   case MATPRODUCT_ABC:
1113     ierr = MatProductSetFromOptions_SeqAIJMKL_ABC(C);CHKERRQ(ierr);
1114     break;
1115   default:
1116     break;
1117   }
1118   PetscFunctionReturn(0);
1119 }
1120 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
1121 /* ------------------------ End MatProduct code ------------------------ */
1122 
1123 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a
1124  * SeqAIJMKL matrix.  This routine is called by the MatCreate_SeqAIJMKL()
1125  * routine, but can also be used to convert an assembled SeqAIJ matrix
1126  * into a SeqAIJMKL one. */
1127 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
1128 {
1129   PetscErrorCode ierr;
1130   Mat            B = *newmat;
1131   Mat_SeqAIJMKL  *aijmkl;
1132   PetscBool      set;
1133   PetscBool      sametype;
1134 
1135   PetscFunctionBegin;
1136   if (reuse == MAT_INITIAL_MATRIX) {
1137     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
1138   }
1139 
1140   ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr);
1141   if (sametype) PetscFunctionReturn(0);
1142 
1143   ierr     = PetscNewLog(B,&aijmkl);CHKERRQ(ierr);
1144   B->spptr = (void*) aijmkl;
1145 
1146   /* Set function pointers for methods that we inherit from AIJ but override.
1147    * We also parse some command line options below, since those determine some of the methods we point to. */
1148   B->ops->duplicate        = MatDuplicate_SeqAIJMKL;
1149   B->ops->assemblyend      = MatAssemblyEnd_SeqAIJMKL;
1150   B->ops->destroy          = MatDestroy_SeqAIJMKL;
1151 
1152   aijmkl->sparse_optimized = PETSC_FALSE;
1153 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1154   aijmkl->no_SpMV2 = PETSC_FALSE;  /* Default to using the SpMV2 routines if our MKL supports them. */
1155 #else
1156   aijmkl->no_SpMV2 = PETSC_TRUE;
1157 #endif
1158   aijmkl->eager_inspection = PETSC_FALSE;
1159 
1160   /* Parse command line options. */
1161   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");CHKERRQ(ierr);
1162   ierr = PetscOptionsBool("-mat_aijmkl_no_spmv2","Disable use of inspector-executor (SpMV 2) routines","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);CHKERRQ(ierr);
1163   ierr = PetscOptionsBool("-mat_aijmkl_eager_inspection","Run inspection at matrix assembly time, instead of waiting until needed by an operation","None",(PetscBool)aijmkl->eager_inspection,(PetscBool*)&aijmkl->eager_inspection,&set);CHKERRQ(ierr);
1164   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1165 #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1166   if (!aijmkl->no_SpMV2) {
1167     ierr = PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize();  defaulting to non-SpMV2 routines.\n");
1168     aijmkl->no_SpMV2 = PETSC_TRUE;
1169   }
1170 #endif
1171 
1172 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1173   B->ops->mult                    = MatMult_SeqAIJMKL_SpMV2;
1174   B->ops->multtranspose           = MatMultTranspose_SeqAIJMKL_SpMV2;
1175   B->ops->multadd                 = MatMultAdd_SeqAIJMKL_SpMV2;
1176   B->ops->multtransposeadd        = MatMultTransposeAdd_SeqAIJMKL_SpMV2;
1177 # if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
1178   B->ops->productsetfromoptions   = MatProductSetFromOptions_SeqAIJMKL;
1179   B->ops->matmultsymbolic         = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL;
1180   B->ops->matmultnumeric          = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL;
1181   B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL;
1182   B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL;
1183 #   if !defined(PETSC_USE_COMPLEX)
1184   B->ops->ptapnumeric             = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal;
1185 #   else
1186   B->ops->ptapnumeric             = NULL;
1187 #   endif
1188 # endif
1189 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
1190 
1191 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
1192   /* In MKL version 18, update 2, the old sparse BLAS interfaces were marked as deprecated. If "no_SpMV2" has been specified by the
1193    * user and the old SpBLAS interfaces are deprecated in our MKL version, we use the new _SpMV2 routines (set above), but do not
1194    * call mkl_sparse_optimize(), which results in the old numerical kernels (without the inspector-executor model) being used. For
1195    * versions in which the older interface has not been deprecated, we use the old interface. */
1196   if (aijmkl->no_SpMV2) {
1197     B->ops->mult             = MatMult_SeqAIJMKL;
1198     B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL;
1199     B->ops->multadd          = MatMultAdd_SeqAIJMKL;
1200     B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL;
1201   }
1202 #endif
1203 
1204   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr);
1205 
1206   ierr    = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr);
1207   *newmat = B;
1208   PetscFunctionReturn(0);
1209 }
1210 
1211 /*@C
1212    MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL.
1213    This type inherits from AIJ and is largely identical, but uses sparse BLAS
1214    routines from Intel MKL whenever possible.
1215    If the installed version of MKL supports the "SpMV2" sparse
1216    inspector-executor routines, then those are used by default.
1217    MatMult, MatMultAdd, MatMultTranspose, MatMultTransposeAdd, MatMatMult, MatTransposeMatMult, and MatPtAP (for
1218    symmetric A) operations are currently supported.
1219    Note that MKL version 18, update 2 or later is required for MatPtAP/MatPtAPNumeric and MatMatMultNumeric.
1220 
1221    Collective
1222 
1223    Input Parameters:
1224 +  comm - MPI communicator, set to PETSC_COMM_SELF
1225 .  m - number of rows
1226 .  n - number of columns
1227 .  nz - number of nonzeros per row (same for all rows)
1228 -  nnz - array containing the number of nonzeros in the various rows
1229          (possibly different for each row) or NULL
1230 
1231    Output Parameter:
1232 .  A - the matrix
1233 
1234    Options Database Keys:
1235 +  -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines
1236 -  -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, performing this step the first time the matrix is applied
1237 
1238    Notes:
1239    If nnz is given then nz is ignored
1240 
1241    Level: intermediate
1242 
1243 .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues()
1244 @*/
1245 PetscErrorCode  MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
1246 {
1247   PetscErrorCode ierr;
1248 
1249   PetscFunctionBegin;
1250   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1251   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
1252   ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr);
1253   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr);
1254   PetscFunctionReturn(0);
1255 }
1256 
1257 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A)
1258 {
1259   PetscErrorCode ierr;
1260 
1261   PetscFunctionBegin;
1262   ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
1263   ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
1264   PetscFunctionReturn(0);
1265 }
1266