xref: /petsc/src/mat/impls/aij/seq/aijmkl/aijmkl.c (revision d2522c19e8fa9bca20aaca277941d9a63e71db6a)
1 
2 /*
3   Defines basic operations for the MATSEQAIJMKL matrix class.
4   This class is derived from the MATSEQAIJ class and retains the
5   compressed row storage (aka Yale sparse matrix format) but uses
6   sparse BLAS operations from the Intel Math Kernel Library (MKL)
7   wherever possible.
8 */
9 
10 #include <../src/mat/impls/aij/seq/aij.h>
11 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h>
12 #include <mkl_spblas.h>
13 
14 typedef struct {
15   PetscBool        no_SpMV2;         /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */
16   PetscBool        eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */
17   PetscBool        sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
18   PetscObjectState state;
19 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
20   sparse_matrix_t     csrA; /* "Handle" used by SpMV2 inspector-executor routines. */
21   struct matrix_descr descr;
22 #endif
23 } Mat_SeqAIJMKL;
24 
25 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat, MatAssemblyType);
26 
27 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A, MatType type, MatReuse reuse, Mat *newmat) {
28   /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */
29   /* so we will ignore 'MatType type'. */
30   Mat B = *newmat;
31 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
32   Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
33 #endif
34 
35   PetscFunctionBegin;
36   if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B));
37 
38   /* Reset the original function pointers. */
39   B->ops->duplicate               = MatDuplicate_SeqAIJ;
40   B->ops->assemblyend             = MatAssemblyEnd_SeqAIJ;
41   B->ops->destroy                 = MatDestroy_SeqAIJ;
42   B->ops->mult                    = MatMult_SeqAIJ;
43   B->ops->multtranspose           = MatMultTranspose_SeqAIJ;
44   B->ops->multadd                 = MatMultAdd_SeqAIJ;
45   B->ops->multtransposeadd        = MatMultTransposeAdd_SeqAIJ;
46   B->ops->productsetfromoptions   = MatProductSetFromOptions_SeqAIJ;
47   B->ops->matmultsymbolic         = MatMatMultSymbolic_SeqAIJ_SeqAIJ;
48   B->ops->matmultnumeric          = MatMatMultNumeric_SeqAIJ_SeqAIJ;
49   B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ;
50   B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ;
51   B->ops->ptapnumeric             = MatPtAPNumeric_SeqAIJ_SeqAIJ;
52 
53   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqaijmkl_seqaij_C", NULL));
54 
55 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
56   /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this
57    * simply involves destroying the MKL sparse matrix handle and then freeing
58    * the spptr pointer. */
59   if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL *)B->spptr;
60 
61   if (aijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA);
62 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
63   PetscCall(PetscFree(B->spptr));
64 
65   /* Change the type of B to MATSEQAIJ. */
66   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ));
67 
68   *newmat = B;
69   PetscFunctionReturn(0);
70 }
71 
72 PetscErrorCode MatDestroy_SeqAIJMKL(Mat A) {
73   Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
74 
75   PetscFunctionBegin;
76 
77   /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an spptr pointer. */
78   if (aijmkl) {
79     /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */
80 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
81     if (aijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA);
82 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
83     PetscCall(PetscFree(A->spptr));
84   }
85 
86   /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
87    * to destroy everything that remains. */
88   PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ));
89   /* Note that I don't call MatSetType().  I believe this is because that
90    * is only to be called when *building* a matrix.  I could be wrong, but
91    * that is how things work for the SuperLU matrix class. */
92   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqaijmkl_seqaij_C", NULL));
93   PetscCall(MatDestroy_SeqAIJ(A));
94   PetscFunctionReturn(0);
95 }
96 
97 /* MatSeqAIJMKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it,
98  * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize().
99  * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix
100  * handle, creates a new one, and then calls mkl_sparse_optimize().
101  * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been
102  * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of
103  * an unoptimized matrix handle here. */
104 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A) {
105 #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
106   /* If the MKL library does not have mkl_sparse_optimize(), then this routine
107    * does nothing. We make it callable anyway in this case because it cuts
108    * down on littering the code with #ifdefs. */
109   PetscFunctionBegin;
110   PetscFunctionReturn(0);
111 #else
112   Mat_SeqAIJ    *a      = (Mat_SeqAIJ *)A->data;
113   Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
114   PetscInt       m, n;
115   MatScalar     *aa;
116   PetscInt      *aj, *ai;
117 
118   PetscFunctionBegin;
119 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
120   /* For MKL versions that still support the old, non-inspector-executor interfaces versions, we simply exit here if the no_SpMV2
121    * option has been specified. For versions that have deprecated the old interfaces (version 18, update 2 and later), we must
122    * use the new inspector-executor interfaces, but we can still use the old, non-inspector-executor code by not calling
123    * mkl_sparse_optimize() later. */
124   if (aijmkl->no_SpMV2) PetscFunctionReturn(0);
125 #endif
126 
127   if (aijmkl->sparse_optimized) {
128     /* Matrix has been previously assembled and optimized. Must destroy old
129      * matrix handle before running the optimization step again. */
130     PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA);
131   }
132   aijmkl->sparse_optimized = PETSC_FALSE;
133 
134   /* Now perform the SpMV2 setup and matrix optimization. */
135   aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
136   aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
137   aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
138   m                  = A->rmap->n;
139   n                  = A->cmap->n;
140   aj                 = a->j; /* aj[k] gives column index for element aa[k]. */
141   aa                 = a->a; /* Nonzero elements stored row-by-row. */
142   ai                 = a->i; /* ai[k] is the position in aa and aj where row k starts. */
143   if (a->nz && aa && !A->structure_only) {
144     /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
145      * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
146     PetscCallExternal(mkl_sparse_x_create_csr, &aijmkl->csrA, SPARSE_INDEX_BASE_ZERO, m, n, ai, ai + 1, aj, aa);
147     PetscCallExternal(mkl_sparse_set_mv_hint, aijmkl->csrA, SPARSE_OPERATION_NON_TRANSPOSE, aijmkl->descr, 1000);
148     PetscCallExternal(mkl_sparse_set_memory_hint, aijmkl->csrA, SPARSE_MEMORY_AGGRESSIVE);
149     if (!aijmkl->no_SpMV2) { PetscCallExternal(mkl_sparse_optimize, aijmkl->csrA); }
150     aijmkl->sparse_optimized = PETSC_TRUE;
151     PetscCall(PetscObjectStateGet((PetscObject)A, &(aijmkl->state)));
152   } else {
153     aijmkl->csrA = PETSC_NULL;
154   }
155 
156   PetscFunctionReturn(0);
157 #endif
158 }
159 
160 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
161 /* Take an already created but empty matrix and set up the nonzero structure from an MKL sparse matrix handle. */
162 static PetscErrorCode MatSeqAIJMKL_setup_structure_from_mkl_handle(MPI_Comm comm, sparse_matrix_t csrA, PetscInt nrows, PetscInt ncols, Mat A) {
163   sparse_index_base_t indexing;
164   PetscInt            m, n;
165   PetscInt           *aj, *ai, *dummy;
166   MatScalar          *aa;
167   Mat_SeqAIJMKL      *aijmkl;
168 
169   PetscFunctionBegin;
170   if (csrA) {
171     /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
172     PetscCallExternal(mkl_sparse_x_export_csr, csrA, &indexing, &m, &n, &ai, &dummy, &aj, &aa);
173     PetscCheck((m == nrows) && (n == ncols), PETSC_COMM_SELF, PETSC_ERR_LIB, "Number of rows/columns does not match those from mkl_sparse_x_export_csr()");
174   } else {
175     aj = ai = PETSC_NULL;
176     aa      = PETSC_NULL;
177   }
178 
179   PetscCall(MatSetType(A, MATSEQAIJ));
180   PetscCall(MatSetSizes(A, PETSC_DECIDE, PETSC_DECIDE, nrows, ncols));
181   /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported
182    * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and
183    * they will be destroyed when the MKL handle is destroyed.
184    * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */
185   if (csrA) {
186     PetscCall(MatSeqAIJSetPreallocationCSR(A, ai, aj, NULL));
187   } else {
188     /* Since MatSeqAIJSetPreallocationCSR does initial set up and assembly begin/end, we must do that ourselves here. */
189     PetscCall(MatSetUp(A));
190     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
191     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
192   }
193 
194   /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle.
195    * Now turn it into a MATSEQAIJMKL. */
196   PetscCall(MatConvert_SeqAIJ_SeqAIJMKL(A, MATSEQAIJMKL, MAT_INPLACE_MATRIX, &A));
197 
198   aijmkl       = (Mat_SeqAIJMKL *)A->spptr;
199   aijmkl->csrA = csrA;
200 
201   /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but
202    * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one,
203    * and just need to be able to run the MKL optimization step. */
204   aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
205   aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
206   aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
207   if (csrA) {
208     PetscCallExternal(mkl_sparse_set_mv_hint, aijmkl->csrA, SPARSE_OPERATION_NON_TRANSPOSE, aijmkl->descr, 1000);
209     PetscCallExternal(mkl_sparse_set_memory_hint, aijmkl->csrA, SPARSE_MEMORY_AGGRESSIVE);
210   }
211   PetscCall(PetscObjectStateGet((PetscObject)A, &(aijmkl->state)));
212   PetscFunctionReturn(0);
213 }
214 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
215 
216 /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle.
217  * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in
218  * MatMatMultNumeric(). */
219 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
220 static PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A) {
221   PetscInt            i;
222   PetscInt            nrows, ncols;
223   PetscInt            nz;
224   PetscInt           *ai, *aj, *dummy;
225   PetscScalar        *aa;
226   Mat_SeqAIJMKL      *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
227   sparse_index_base_t indexing;
228 
229   PetscFunctionBegin;
230   /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */
231   if (!aijmkl->csrA) PetscFunctionReturn(0);
232 
233   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
234   PetscCallExternal(mkl_sparse_x_export_csr, aijmkl->csrA, &indexing, &nrows, &ncols, &ai, &dummy, &aj, &aa);
235 
236   /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc
237    * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */
238   for (i = 0; i < nrows; i++) {
239     nz = ai[i + 1] - ai[i];
240     PetscCall(MatSetValues_SeqAIJ(A, 1, &i, nz, aj + ai[i], aa + ai[i], INSERT_VALUES));
241   }
242 
243   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
244   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
245 
246   PetscCall(PetscObjectStateGet((PetscObject)A, &(aijmkl->state)));
247   /* At this point our matrix has a valid MKL handle, the contents of which match the PETSc AIJ representation.
248    * The MKL handle has *not* had mkl_sparse_optimize() called on it, though -- the MKL developers have confirmed
249    * that the matrix inspection/optimization step is not performed when matrix-matrix multiplication is finalized. */
250   aijmkl->sparse_optimized = PETSC_FALSE;
251   PetscFunctionReturn(0);
252 }
253 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
254 
255 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
256 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_view_mkl_handle(Mat A, PetscViewer viewer) {
257   PetscInt            i, j, k;
258   PetscInt            nrows, ncols;
259   PetscInt            nz;
260   PetscInt           *ai, *aj, *dummy;
261   PetscScalar        *aa;
262   Mat_SeqAIJMKL      *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
263   sparse_index_base_t indexing;
264 
265   PetscFunctionBegin;
266   PetscCall(PetscViewerASCIIPrintf(viewer, "Contents of MKL sparse matrix handle for MATSEQAIJMKL object:\n"));
267 
268   /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */
269   if (!aijmkl->csrA) {
270     PetscCall(PetscViewerASCIIPrintf(viewer, "MKL matrix handle is NULL\n"));
271     PetscFunctionReturn(0);
272   }
273 
274   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
275   PetscCallExternal(mkl_sparse_x_export_csr, aijmkl->csrA, &indexing, &nrows, &ncols, &ai, &dummy, &aj, &aa);
276 
277   k = 0;
278   for (i = 0; i < nrows; i++) {
279     PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ": ", i));
280     nz = ai[i + 1] - ai[i];
281     for (j = 0; j < nz; j++) {
282       if (aa) {
283         PetscCall(PetscViewerASCIIPrintf(viewer, "(%" PetscInt_FMT ", %g)  ", aj[k], PetscRealPart(aa[k])));
284       } else {
285         PetscCall(PetscViewerASCIIPrintf(viewer, "(%" PetscInt_FMT ", NULL)", aj[k]));
286       }
287       k++;
288     }
289     PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
290   }
291   PetscFunctionReturn(0);
292 }
293 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
294 
295 PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M) {
296   Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
297   Mat_SeqAIJMKL *aijmkl_dest;
298 
299   PetscFunctionBegin;
300   PetscCall(MatDuplicate_SeqAIJ(A, op, M));
301   aijmkl_dest = (Mat_SeqAIJMKL *)(*M)->spptr;
302   PetscCall(PetscArraycpy(aijmkl_dest, aijmkl, 1));
303   aijmkl_dest->sparse_optimized = PETSC_FALSE;
304   if (aijmkl->eager_inspection) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
305   PetscFunctionReturn(0);
306 }
307 
308 PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode) {
309   Mat_SeqAIJ    *a = (Mat_SeqAIJ *)A->data;
310   Mat_SeqAIJMKL *aijmkl;
311 
312   PetscFunctionBegin;
313   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
314 
315   /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some
316    * extra information and some different methods, call the AssemblyEnd
317    * routine for a MATSEQAIJ.
318    * I'm not sure if this is the best way to do this, but it avoids
319    * a lot of code duplication. */
320   a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */
321   PetscCall(MatAssemblyEnd_SeqAIJ(A, mode));
322 
323   /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks).
324    * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */
325   aijmkl = (Mat_SeqAIJMKL *)A->spptr;
326   if (aijmkl->eager_inspection) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
327 
328   PetscFunctionReturn(0);
329 }
330 
331 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
332 PetscErrorCode MatMult_SeqAIJMKL(Mat A, Vec xx, Vec yy) {
333   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
334   const PetscScalar *x;
335   PetscScalar       *y;
336   const MatScalar   *aa;
337   PetscInt           m     = A->rmap->n;
338   PetscInt           n     = A->cmap->n;
339   PetscScalar        alpha = 1.0;
340   PetscScalar        beta  = 0.0;
341   const PetscInt    *aj, *ai;
342   char               matdescra[6];
343 
344   /* Variables not in MatMult_SeqAIJ. */
345   char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */
346 
347   PetscFunctionBegin;
348   matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
349   matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
350   PetscCall(VecGetArrayRead(xx, &x));
351   PetscCall(VecGetArray(yy, &y));
352   aj = a->j; /* aj[k] gives column index for element aa[k]. */
353   aa = a->a; /* Nonzero elements stored row-by-row. */
354   ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
355 
356   /* Call MKL sparse BLAS routine to do the MatMult. */
357   mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, y);
358 
359   PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt));
360   PetscCall(VecRestoreArrayRead(xx, &x));
361   PetscCall(VecRestoreArray(yy, &y));
362   PetscFunctionReturn(0);
363 }
364 #endif
365 
366 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
367 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy) {
368   Mat_SeqAIJ        *a      = (Mat_SeqAIJ *)A->data;
369   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
370   const PetscScalar *x;
371   PetscScalar       *y;
372   PetscObjectState   state;
373 
374   PetscFunctionBegin;
375 
376   /* If there are no nonzero entries, zero yy and return immediately. */
377   if (!a->nz) {
378     PetscCall(VecGetArray(yy, &y));
379     PetscCall(PetscArrayzero(y, A->rmap->n));
380     PetscCall(VecRestoreArray(yy, &y));
381     PetscFunctionReturn(0);
382   }
383 
384   PetscCall(VecGetArrayRead(xx, &x));
385   PetscCall(VecGetArray(yy, &y));
386 
387   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
388    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
389    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
390   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
391   if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
392 
393   /* Call MKL SpMV2 executor routine to do the MatMult. */
394   PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, y);
395 
396   PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt));
397   PetscCall(VecRestoreArrayRead(xx, &x));
398   PetscCall(VecRestoreArray(yy, &y));
399   PetscFunctionReturn(0);
400 }
401 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
402 
403 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
404 PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A, Vec xx, Vec yy) {
405   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
406   const PetscScalar *x;
407   PetscScalar       *y;
408   const MatScalar   *aa;
409   PetscInt           m     = A->rmap->n;
410   PetscInt           n     = A->cmap->n;
411   PetscScalar        alpha = 1.0;
412   PetscScalar        beta  = 0.0;
413   const PetscInt    *aj, *ai;
414   char               matdescra[6];
415 
416   /* Variables not in MatMultTranspose_SeqAIJ. */
417   char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */
418 
419   PetscFunctionBegin;
420   matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
421   matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
422   PetscCall(VecGetArrayRead(xx, &x));
423   PetscCall(VecGetArray(yy, &y));
424   aj = a->j; /* aj[k] gives column index for element aa[k]. */
425   aa = a->a; /* Nonzero elements stored row-by-row. */
426   ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
427 
428   /* Call MKL sparse BLAS routine to do the MatMult. */
429   mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, y);
430 
431   PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt));
432   PetscCall(VecRestoreArrayRead(xx, &x));
433   PetscCall(VecRestoreArray(yy, &y));
434   PetscFunctionReturn(0);
435 }
436 #endif
437 
438 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
439 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy) {
440   Mat_SeqAIJ        *a      = (Mat_SeqAIJ *)A->data;
441   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
442   const PetscScalar *x;
443   PetscScalar       *y;
444   PetscObjectState   state;
445 
446   PetscFunctionBegin;
447 
448   /* If there are no nonzero entries, zero yy and return immediately. */
449   if (!a->nz) {
450     PetscCall(VecGetArray(yy, &y));
451     PetscCall(PetscArrayzero(y, A->cmap->n));
452     PetscCall(VecRestoreArray(yy, &y));
453     PetscFunctionReturn(0);
454   }
455 
456   PetscCall(VecGetArrayRead(xx, &x));
457   PetscCall(VecGetArray(yy, &y));
458 
459   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
460    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
461    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
462   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
463   if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
464 
465   /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
466   PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, y);
467 
468   PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt));
469   PetscCall(VecRestoreArrayRead(xx, &x));
470   PetscCall(VecRestoreArray(yy, &y));
471   PetscFunctionReturn(0);
472 }
473 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
474 
475 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
476 PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A, Vec xx, Vec yy, Vec zz) {
477   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
478   const PetscScalar *x;
479   PetscScalar       *y, *z;
480   const MatScalar   *aa;
481   PetscInt           m = A->rmap->n;
482   PetscInt           n = A->cmap->n;
483   const PetscInt    *aj, *ai;
484   PetscInt           i;
485 
486   /* Variables not in MatMultAdd_SeqAIJ. */
487   char        transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */
488   PetscScalar alpha  = 1.0;
489   PetscScalar beta;
490   char        matdescra[6];
491 
492   PetscFunctionBegin;
493   matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
494   matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
495 
496   PetscCall(VecGetArrayRead(xx, &x));
497   PetscCall(VecGetArrayPair(yy, zz, &y, &z));
498   aj = a->j; /* aj[k] gives column index for element aa[k]. */
499   aa = a->a; /* Nonzero elements stored row-by-row. */
500   ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
501 
502   /* Call MKL sparse BLAS routine to do the MatMult. */
503   if (zz == yy) {
504     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
505     beta = 1.0;
506     mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z);
507   } else {
508     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
509      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
510     beta = 0.0;
511     mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z);
512     for (i = 0; i < m; i++) { z[i] += y[i]; }
513   }
514 
515   PetscCall(PetscLogFlops(2.0 * a->nz));
516   PetscCall(VecRestoreArrayRead(xx, &x));
517   PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
518   PetscFunctionReturn(0);
519 }
520 #endif
521 
522 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
523 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy, Vec zz) {
524   Mat_SeqAIJ        *a      = (Mat_SeqAIJ *)A->data;
525   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
526   const PetscScalar *x;
527   PetscScalar       *y, *z;
528   PetscInt           m = A->rmap->n;
529   PetscInt           i;
530 
531   /* Variables not in MatMultAdd_SeqAIJ. */
532   PetscObjectState state;
533 
534   PetscFunctionBegin;
535 
536   /* If there are no nonzero entries, set zz = yy and return immediately. */
537   if (!a->nz) {
538     PetscCall(VecGetArrayPair(yy, zz, &y, &z));
539     PetscCall(PetscArraycpy(z, y, m));
540     PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
541     PetscFunctionReturn(0);
542   }
543 
544   PetscCall(VecGetArrayRead(xx, &x));
545   PetscCall(VecGetArrayPair(yy, zz, &y, &z));
546 
547   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
548    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
549    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
550   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
551   if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
552 
553   /* Call MKL sparse BLAS routine to do the MatMult. */
554   if (zz == yy) {
555     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
556      * with alpha and beta both set to 1.0. */
557     PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 1.0, z);
558   } else {
559     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
560      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
561     PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, z);
562     for (i = 0; i < m; i++) z[i] += y[i];
563   }
564 
565   PetscCall(PetscLogFlops(2.0 * a->nz));
566   PetscCall(VecRestoreArrayRead(xx, &x));
567   PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
568   PetscFunctionReturn(0);
569 }
570 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
571 
572 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
573 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A, Vec xx, Vec yy, Vec zz) {
574   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
575   const PetscScalar *x;
576   PetscScalar       *y, *z;
577   const MatScalar   *aa;
578   PetscInt           m = A->rmap->n;
579   PetscInt           n = A->cmap->n;
580   const PetscInt    *aj, *ai;
581   PetscInt           i;
582 
583   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
584   char        transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */
585   PetscScalar alpha  = 1.0;
586   PetscScalar beta;
587   char        matdescra[6];
588 
589   PetscFunctionBegin;
590   matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
591   matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
592 
593   PetscCall(VecGetArrayRead(xx, &x));
594   PetscCall(VecGetArrayPair(yy, zz, &y, &z));
595   aj = a->j; /* aj[k] gives column index for element aa[k]. */
596   aa = a->a; /* Nonzero elements stored row-by-row. */
597   ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
598 
599   /* Call MKL sparse BLAS routine to do the MatMult. */
600   if (zz == yy) {
601     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
602     beta = 1.0;
603     mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z);
604   } else {
605     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
606      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
607     beta = 0.0;
608     mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z);
609     for (i = 0; i < n; i++) { z[i] += y[i]; }
610   }
611 
612   PetscCall(PetscLogFlops(2.0 * a->nz));
613   PetscCall(VecRestoreArrayRead(xx, &x));
614   PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
615   PetscFunctionReturn(0);
616 }
617 #endif
618 
619 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
620 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy, Vec zz) {
621   Mat_SeqAIJ        *a      = (Mat_SeqAIJ *)A->data;
622   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
623   const PetscScalar *x;
624   PetscScalar       *y, *z;
625   PetscInt           n = A->cmap->n;
626   PetscInt           i;
627   PetscObjectState   state;
628 
629   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
630 
631   PetscFunctionBegin;
632 
633   /* If there are no nonzero entries, set zz = yy and return immediately. */
634   if (!a->nz) {
635     PetscCall(VecGetArrayPair(yy, zz, &y, &z));
636     PetscCall(PetscArraycpy(z, y, n));
637     PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
638     PetscFunctionReturn(0);
639   }
640 
641   PetscCall(VecGetArrayRead(xx, &x));
642   PetscCall(VecGetArrayPair(yy, zz, &y, &z));
643 
644   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
645    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
646    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
647   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
648   if (!aijmkl->sparse_optimized || aijmkl->state != state) MatSeqAIJMKL_create_mkl_handle(A);
649 
650   /* Call MKL sparse BLAS routine to do the MatMult. */
651   if (zz == yy) {
652     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
653      * with alpha and beta both set to 1.0. */
654     PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 1.0, z);
655   } else {
656     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
657      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
658     PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, z);
659     for (i = 0; i < n; i++) z[i] += y[i];
660   }
661 
662   PetscCall(PetscLogFlops(2.0 * a->nz));
663   PetscCall(VecRestoreArrayRead(xx, &x));
664   PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
665   PetscFunctionReturn(0);
666 }
667 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
668 
669 /* -------------------------- MatProduct code -------------------------- */
670 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
671 static PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(Mat A, const sparse_operation_t transA, Mat B, const sparse_operation_t transB, Mat C) {
672   Mat_SeqAIJMKL      *a = (Mat_SeqAIJMKL *)A->spptr, *b = (Mat_SeqAIJMKL *)B->spptr;
673   sparse_matrix_t     csrA, csrB, csrC;
674   PetscInt            nrows, ncols;
675   struct matrix_descr descr_type_gen;
676   PetscObjectState    state;
677 
678   PetscFunctionBegin;
679   /* Determine the number of rows and columns that the result matrix C will have. We have to do this ourselves because MKL does
680    * not handle sparse matrices with zero rows or columns. */
681   if (transA == SPARSE_OPERATION_NON_TRANSPOSE) nrows = A->rmap->N;
682   else nrows = A->cmap->N;
683   if (transB == SPARSE_OPERATION_NON_TRANSPOSE) ncols = B->cmap->N;
684   else ncols = B->rmap->N;
685 
686   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
687   if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
688   PetscCall(PetscObjectStateGet((PetscObject)B, &state));
689   if (!b->sparse_optimized || b->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(B));
690   csrA                = a->csrA;
691   csrB                = b->csrA;
692   descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
693 
694   if (csrA && csrB) {
695     PetscCallExternal(mkl_sparse_sp2m, transA, descr_type_gen, csrA, transB, descr_type_gen, csrB, SPARSE_STAGE_FULL_MULT_NO_VAL, &csrC);
696   } else {
697     csrC = PETSC_NULL;
698   }
699 
700   PetscCall(MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF, csrC, nrows, ncols, C));
701 
702   PetscFunctionReturn(0);
703 }
704 
705 PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(Mat A, const sparse_operation_t transA, Mat B, const sparse_operation_t transB, Mat C) {
706   Mat_SeqAIJMKL      *a = (Mat_SeqAIJMKL *)A->spptr, *b = (Mat_SeqAIJMKL *)B->spptr, *c = (Mat_SeqAIJMKL *)C->spptr;
707   sparse_matrix_t     csrA, csrB, csrC;
708   struct matrix_descr descr_type_gen;
709   PetscObjectState    state;
710 
711   PetscFunctionBegin;
712   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
713   if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
714   PetscCall(PetscObjectStateGet((PetscObject)B, &state));
715   if (!b->sparse_optimized || b->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(B));
716   csrA                = a->csrA;
717   csrB                = b->csrA;
718   csrC                = c->csrA;
719   descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
720 
721   if (csrA && csrB) {
722     PetscCallExternal(mkl_sparse_sp2m, transA, descr_type_gen, csrA, transB, descr_type_gen, csrB, SPARSE_STAGE_FINALIZE_MULT, &csrC);
723   } else {
724     csrC = PETSC_NULL;
725   }
726 
727   /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */
728   PetscCall(MatSeqAIJMKL_update_from_mkl_handle(C));
729 
730   PetscFunctionReturn(0);
731 }
732 
733 PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C) {
734   PetscFunctionBegin;
735   PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C));
736   PetscFunctionReturn(0);
737 }
738 
739 PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C) {
740   PetscFunctionBegin;
741   PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C));
742   PetscFunctionReturn(0);
743 }
744 
745 PetscErrorCode MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C) {
746   PetscFunctionBegin;
747   PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C));
748   PetscFunctionReturn(0);
749 }
750 
751 PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C) {
752   PetscFunctionBegin;
753   PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C));
754   PetscFunctionReturn(0);
755 }
756 
757 PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C) {
758   PetscFunctionBegin;
759   PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_TRANSPOSE, C));
760   PetscFunctionReturn(0);
761 }
762 
763 PetscErrorCode MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C) {
764   PetscFunctionBegin;
765   PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_TRANSPOSE, C));
766   PetscFunctionReturn(0);
767 }
768 
769 static PetscErrorCode MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL(Mat C) {
770   Mat_Product *product = C->product;
771   Mat          A = product->A, B = product->B;
772 
773   PetscFunctionBegin;
774   PetscCall(MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(A, B, C));
775   PetscFunctionReturn(0);
776 }
777 
778 static PetscErrorCode MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL(Mat C) {
779   Mat_Product *product = C->product;
780   Mat          A = product->A, B = product->B;
781   PetscReal    fill = product->fill;
782 
783   PetscFunctionBegin;
784   PetscCall(MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(A, B, fill, C));
785   C->ops->productnumeric = MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL;
786   PetscFunctionReturn(0);
787 }
788 
789 PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat A, Mat P, Mat C) {
790   Mat                 Ct;
791   Vec                 zeros;
792   Mat_SeqAIJMKL      *a = (Mat_SeqAIJMKL *)A->spptr, *p = (Mat_SeqAIJMKL *)P->spptr, *c = (Mat_SeqAIJMKL *)C->spptr;
793   sparse_matrix_t     csrA, csrP, csrC;
794   PetscBool           set, flag;
795   struct matrix_descr descr_type_sym;
796   PetscObjectState    state;
797 
798   PetscFunctionBegin;
799   PetscCall(MatIsSymmetricKnown(A, &set, &flag));
800   PetscCheck(set && flag, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal() called on matrix A not marked as symmetric");
801 
802   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
803   if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
804   PetscCall(PetscObjectStateGet((PetscObject)P, &state));
805   if (!p->sparse_optimized || p->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(P));
806   csrA                = a->csrA;
807   csrP                = p->csrA;
808   csrC                = c->csrA;
809   descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
810   descr_type_sym.mode = SPARSE_FILL_MODE_UPPER;
811   descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
812 
813   /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
814   PetscCallExternal(mkl_sparse_sypr, SPARSE_OPERATION_TRANSPOSE, csrP, csrA, descr_type_sym, &csrC, SPARSE_STAGE_FINALIZE_MULT);
815 
816   /* Update the PETSc AIJ representation for matrix C from contents of MKL handle.
817    * This is more complicated than it should be: it turns out that, though mkl_sparse_sypr() will accept a full AIJ/CSR matrix,
818    * the output matrix only contains the upper or lower triangle (we arbitrarily have chosen upper) of the symmetric matrix.
819    * We have to fill in the missing portion, which we currently do below by forming the transpose and performing at MatAXPY
820    * operation. This may kill any performance benefit of using the optimized mkl_sparse_sypr() routine. Performance might
821    * improve if we come up with a more efficient way to do this, or we can convince the MKL team to provide an option to output
822    * the full matrix. */
823   PetscCall(MatSeqAIJMKL_update_from_mkl_handle(C));
824   PetscCall(MatTranspose(C, MAT_INITIAL_MATRIX, &Ct));
825   PetscCall(MatCreateVecs(C, &zeros, NULL));
826   PetscCall(VecSetFromOptions(zeros));
827   PetscCall(VecZeroEntries(zeros));
828   PetscCall(MatDiagonalSet(Ct, zeros, INSERT_VALUES));
829   PetscCall(MatAXPY(C, 1.0, Ct, DIFFERENT_NONZERO_PATTERN));
830   /* Note: The MatAXPY() call destroys the MatProduct, so we must recreate it. */
831   PetscCall(MatProductCreateWithMat(A, P, PETSC_NULL, C));
832   PetscCall(MatProductSetType(C, MATPRODUCT_PtAP));
833   PetscCall(MatSeqAIJMKL_create_mkl_handle(C));
834   PetscCall(VecDestroy(&zeros));
835   PetscCall(MatDestroy(&Ct));
836   PetscFunctionReturn(0);
837 }
838 
839 PetscErrorCode MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat C) {
840   Mat_Product        *product = C->product;
841   Mat                 A = product->A, P = product->B;
842   Mat_SeqAIJMKL      *a = (Mat_SeqAIJMKL *)A->spptr, *p = (Mat_SeqAIJMKL *)P->spptr;
843   sparse_matrix_t     csrA, csrP, csrC;
844   struct matrix_descr descr_type_sym;
845   PetscObjectState    state;
846 
847   PetscFunctionBegin;
848   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
849   if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
850   PetscCall(PetscObjectStateGet((PetscObject)P, &state));
851   if (!p->sparse_optimized || p->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(P));
852   csrA                = a->csrA;
853   csrP                = p->csrA;
854   descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
855   descr_type_sym.mode = SPARSE_FILL_MODE_UPPER;
856   descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
857 
858   /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
859   if (csrP && csrA) {
860     PetscCallExternal(mkl_sparse_sypr, SPARSE_OPERATION_TRANSPOSE, csrP, csrA, descr_type_sym, &csrC, SPARSE_STAGE_FULL_MULT_NO_VAL);
861   } else {
862     csrC = PETSC_NULL;
863   }
864 
865   /* Update the I and J arrays of the PETSc AIJ representation for matrix C from contents of MKL handle.
866    * Note that, because mkl_sparse_sypr() only computes one triangle of the symmetric matrix, this representation will only contain
867    * the upper triangle of the symmetric matrix. We fix this in MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(). I believe that
868    * leaving things in this incomplete state is OK because the numeric product should follow soon after, but am not certain if this
869    * is guaranteed. */
870   PetscCall(MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF, csrC, P->cmap->N, P->cmap->N, C));
871 
872   C->ops->productnumeric = MatProductNumeric_PtAP;
873   PetscFunctionReturn(0);
874 }
875 
876 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AB(Mat C) {
877   PetscFunctionBegin;
878   C->ops->productsymbolic = MatProductSymbolic_AB;
879   C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL;
880   PetscFunctionReturn(0);
881 }
882 
883 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AtB(Mat C) {
884   PetscFunctionBegin;
885   C->ops->productsymbolic = MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL;
886   PetscFunctionReturn(0);
887 }
888 
889 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABt(Mat C) {
890   PetscFunctionBegin;
891   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ;
892   C->ops->productsymbolic          = MatProductSymbolic_ABt;
893   PetscFunctionReturn(0);
894 }
895 
896 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_PtAP(Mat C) {
897   Mat_Product *product = C->product;
898   Mat          A       = product->A;
899   PetscBool    set, flag;
900 
901   PetscFunctionBegin;
902   if (PetscDefined(USE_COMPLEX)) {
903     /* By setting C->ops->productsymbolic to NULL, we ensure that MatProductSymbolic_Unsafe() will be used.
904      * We do this in several other locations in this file. This works for the time being, but these
905      * routines are considered unsafe and may be removed from the MatProduct code in the future.
906      * TODO: Add proper MATSEQAIJMKL implementations */
907     C->ops->productsymbolic = NULL;
908   } else {
909     /* AIJMKL only has an optimized routine for PtAP when A is symmetric and real. */
910     PetscCall(MatIsSymmetricKnown(A, &set, &flag));
911     if (set && flag) C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal;
912     else C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
913     /* Note that we don't set C->ops->productnumeric here, as this must happen in MatProductSymbolic_PtAP_XXX(),
914      * depending on whether the algorithm for the general case vs. the real symmetric one is used. */
915   }
916   PetscFunctionReturn(0);
917 }
918 
919 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_RARt(Mat C) {
920   PetscFunctionBegin;
921   C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
922   PetscFunctionReturn(0);
923 }
924 
925 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABC(Mat C) {
926   PetscFunctionBegin;
927   C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
928   PetscFunctionReturn(0);
929 }
930 
931 PetscErrorCode MatProductSetFromOptions_SeqAIJMKL(Mat C) {
932   Mat_Product *product = C->product;
933 
934   PetscFunctionBegin;
935   switch (product->type) {
936   case MATPRODUCT_AB: PetscCall(MatProductSetFromOptions_SeqAIJMKL_AB(C)); break;
937   case MATPRODUCT_AtB: PetscCall(MatProductSetFromOptions_SeqAIJMKL_AtB(C)); break;
938   case MATPRODUCT_ABt: PetscCall(MatProductSetFromOptions_SeqAIJMKL_ABt(C)); break;
939   case MATPRODUCT_PtAP: PetscCall(MatProductSetFromOptions_SeqAIJMKL_PtAP(C)); break;
940   case MATPRODUCT_RARt: PetscCall(MatProductSetFromOptions_SeqAIJMKL_RARt(C)); break;
941   case MATPRODUCT_ABC: PetscCall(MatProductSetFromOptions_SeqAIJMKL_ABC(C)); break;
942   default: break;
943   }
944   PetscFunctionReturn(0);
945 }
946 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
947 /* ------------------------ End MatProduct code ------------------------ */
948 
949 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a
950  * SeqAIJMKL matrix.  This routine is called by the MatCreate_SeqAIJMKL()
951  * routine, but can also be used to convert an assembled SeqAIJ matrix
952  * into a SeqAIJMKL one. */
953 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A, MatType type, MatReuse reuse, Mat *newmat) {
954   Mat            B = *newmat;
955   Mat_SeqAIJMKL *aijmkl;
956   PetscBool      set;
957   PetscBool      sametype;
958 
959   PetscFunctionBegin;
960   if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B));
961 
962   PetscCall(PetscObjectTypeCompare((PetscObject)A, type, &sametype));
963   if (sametype) PetscFunctionReturn(0);
964 
965   PetscCall(PetscNewLog(B, &aijmkl));
966   B->spptr = (void *)aijmkl;
967 
968   /* Set function pointers for methods that we inherit from AIJ but override.
969    * We also parse some command line options below, since those determine some of the methods we point to. */
970   B->ops->duplicate   = MatDuplicate_SeqAIJMKL;
971   B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL;
972   B->ops->destroy     = MatDestroy_SeqAIJMKL;
973 
974   aijmkl->sparse_optimized = PETSC_FALSE;
975 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
976   aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */
977 #else
978   aijmkl->no_SpMV2 = PETSC_TRUE;
979 #endif
980   aijmkl->eager_inspection = PETSC_FALSE;
981 
982   /* Parse command line options. */
983   PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "AIJMKL Options", "Mat");
984   PetscCall(PetscOptionsBool("-mat_aijmkl_no_spmv2", "Disable use of inspector-executor (SpMV 2) routines", "None", (PetscBool)aijmkl->no_SpMV2, (PetscBool *)&aijmkl->no_SpMV2, &set));
985   PetscCall(PetscOptionsBool("-mat_aijmkl_eager_inspection", "Run inspection at matrix assembly time, instead of waiting until needed by an operation", "None", (PetscBool)aijmkl->eager_inspection, (PetscBool *)&aijmkl->eager_inspection, &set));
986   PetscOptionsEnd();
987 #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
988   if (!aijmkl->no_SpMV2) {
989     PetscCall(PetscInfo(B, "User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize();  defaulting to non-SpMV2 routines.\n"));
990     aijmkl->no_SpMV2 = PETSC_TRUE;
991   }
992 #endif
993 
994 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
995   B->ops->mult             = MatMult_SeqAIJMKL_SpMV2;
996   B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL_SpMV2;
997   B->ops->multadd          = MatMultAdd_SeqAIJMKL_SpMV2;
998   B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2;
999 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
1000   B->ops->productsetfromoptions   = MatProductSetFromOptions_SeqAIJMKL;
1001   B->ops->matmultsymbolic         = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL;
1002   B->ops->matmultnumeric          = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL;
1003   B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL;
1004   B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL;
1005 #if !defined(PETSC_USE_COMPLEX)
1006   B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal;
1007 #else
1008   B->ops->ptapnumeric = NULL;
1009 #endif
1010 #endif
1011 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
1012 
1013 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
1014   /* In MKL version 18, update 2, the old sparse BLAS interfaces were marked as deprecated. If "no_SpMV2" has been specified by the
1015    * user and the old SpBLAS interfaces are deprecated in our MKL version, we use the new _SpMV2 routines (set above), but do not
1016    * call mkl_sparse_optimize(), which results in the old numerical kernels (without the inspector-executor model) being used. For
1017    * versions in which the older interface has not been deprecated, we use the old interface. */
1018   if (aijmkl->no_SpMV2) {
1019     B->ops->mult             = MatMult_SeqAIJMKL;
1020     B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL;
1021     B->ops->multadd          = MatMultAdd_SeqAIJMKL;
1022     B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL;
1023   }
1024 #endif
1025 
1026   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqaijmkl_seqaij_C", MatConvert_SeqAIJMKL_SeqAIJ));
1027 
1028   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJMKL));
1029   *newmat = B;
1030   PetscFunctionReturn(0);
1031 }
1032 
1033 /*@C
1034    MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL.
1035    This type inherits from AIJ and is largely identical, but uses sparse BLAS
1036    routines from Intel MKL whenever possible.
1037    If the installed version of MKL supports the "SpMV2" sparse
1038    inspector-executor routines, then those are used by default.
1039    MatMult, MatMultAdd, MatMultTranspose, MatMultTransposeAdd, MatMatMult, MatTransposeMatMult, and MatPtAP (for
1040    symmetric A) operations are currently supported.
1041    Note that MKL version 18, update 2 or later is required for MatPtAP/MatPtAPNumeric and MatMatMultNumeric.
1042 
1043    Collective
1044 
1045    Input Parameters:
1046 +  comm - MPI communicator, set to PETSC_COMM_SELF
1047 .  m - number of rows
1048 .  n - number of columns
1049 .  nz - number of nonzeros per row (same for all rows)
1050 -  nnz - array containing the number of nonzeros in the various rows
1051          (possibly different for each row) or NULL
1052 
1053    Output Parameter:
1054 .  A - the matrix
1055 
1056    Options Database Keys:
1057 +  -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines
1058 -  -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, performing this step the first time the matrix is applied
1059 
1060    Notes:
1061    If nnz is given then nz is ignored
1062 
1063    Level: intermediate
1064 
1065 .seealso: `MatCreate()`, `MatCreateMPIAIJMKL()`, `MatSetValues()`
1066 @*/
1067 PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A) {
1068   PetscFunctionBegin;
1069   PetscCall(MatCreate(comm, A));
1070   PetscCall(MatSetSizes(*A, m, n, m, n));
1071   PetscCall(MatSetType(*A, MATSEQAIJMKL));
1072   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(*A, nz, nnz));
1073   PetscFunctionReturn(0);
1074 }
1075 
1076 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A) {
1077   PetscFunctionBegin;
1078   PetscCall(MatSetType(A, MATSEQAIJ));
1079   PetscCall(MatConvert_SeqAIJ_SeqAIJMKL(A, MATSEQAIJMKL, MAT_INPLACE_MATRIX, &A));
1080   PetscFunctionReturn(0);
1081 }
1082