xref: /petsc/src/mat/impls/aij/seq/aijmkl/aijmkl.c (revision 2d30e087755efd99e28fdfe792ffbeb2ee1ea928)
1 
2 /*
3   Defines basic operations for the MATSEQAIJMKL matrix class.
4   This class is derived from the MATSEQAIJ class and retains the
5   compressed row storage (aka Yale sparse matrix format) but uses
6   sparse BLAS operations from the Intel Math Kernel Library (MKL)
7   wherever possible.
8 */
9 
10 #include <../src/mat/impls/aij/seq/aij.h>
11 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h>
12 #include <mkl_spblas.h>
13 
14 typedef struct {
15   PetscBool        no_SpMV2;         /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */
16   PetscBool        eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */
17   PetscBool        sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
18   PetscObjectState state;
19 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
20   sparse_matrix_t     csrA; /* "Handle" used by SpMV2 inspector-executor routines. */
21   struct matrix_descr descr;
22 #endif
23 } Mat_SeqAIJMKL;
24 
25 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat, MatAssemblyType);
26 
27 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A, MatType type, MatReuse reuse, Mat *newmat) {
28   /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */
29   /* so we will ignore 'MatType type'. */
30   Mat B = *newmat;
31 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
32   Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
33 #endif
34 
35   PetscFunctionBegin;
36   if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B));
37 
38   /* Reset the original function pointers. */
39   B->ops->duplicate               = MatDuplicate_SeqAIJ;
40   B->ops->assemblyend             = MatAssemblyEnd_SeqAIJ;
41   B->ops->destroy                 = MatDestroy_SeqAIJ;
42   B->ops->mult                    = MatMult_SeqAIJ;
43   B->ops->multtranspose           = MatMultTranspose_SeqAIJ;
44   B->ops->multadd                 = MatMultAdd_SeqAIJ;
45   B->ops->multtransposeadd        = MatMultTransposeAdd_SeqAIJ;
46   B->ops->productsetfromoptions   = MatProductSetFromOptions_SeqAIJ;
47   B->ops->matmultsymbolic         = MatMatMultSymbolic_SeqAIJ_SeqAIJ;
48   B->ops->matmultnumeric          = MatMatMultNumeric_SeqAIJ_SeqAIJ;
49   B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ;
50   B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ;
51   B->ops->ptapnumeric             = MatPtAPNumeric_SeqAIJ_SeqAIJ;
52 
53   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqaijmkl_seqaij_C", NULL));
54 
55 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
56   /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this
57    * simply involves destroying the MKL sparse matrix handle and then freeing
58    * the spptr pointer. */
59   if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL *)B->spptr;
60 
61   if (aijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA);
62 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
63   PetscCall(PetscFree(B->spptr));
64 
65   /* Change the type of B to MATSEQAIJ. */
66   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ));
67 
68   *newmat = B;
69   PetscFunctionReturn(0);
70 }
71 
72 PetscErrorCode MatDestroy_SeqAIJMKL(Mat A) {
73   Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
74 
75   PetscFunctionBegin;
76 
77   /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an spptr pointer. */
78   if (aijmkl) {
79     /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */
80 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
81     if (aijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA);
82 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
83     PetscCall(PetscFree(A->spptr));
84   }
85 
86   /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
87    * to destroy everything that remains. */
88   PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ));
89   /* Note that I don't call MatSetType().  I believe this is because that
90    * is only to be called when *building* a matrix.  I could be wrong, but
91    * that is how things work for the SuperLU matrix class. */
92   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqaijmkl_seqaij_C", NULL));
93   PetscCall(MatDestroy_SeqAIJ(A));
94   PetscFunctionReturn(0);
95 }
96 
97 /* MatSeqAIJMKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it,
98  * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize().
99  * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix
100  * handle, creates a new one, and then calls mkl_sparse_optimize().
101  * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been
102  * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of
103  * an unoptimized matrix handle here. */
104 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A) {
105 #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
106   /* If the MKL library does not have mkl_sparse_optimize(), then this routine
107    * does nothing. We make it callable anyway in this case because it cuts
108    * down on littering the code with #ifdefs. */
109   PetscFunctionBegin;
110   PetscFunctionReturn(0);
111 #else
112   Mat_SeqAIJ    *a      = (Mat_SeqAIJ *)A->data;
113   Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
114   PetscInt       m, n;
115   MatScalar     *aa;
116   PetscInt      *aj, *ai;
117 
118   PetscFunctionBegin;
119 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
120   /* For MKL versions that still support the old, non-inspector-executor interfaces versions, we simply exit here if the no_SpMV2
121    * option has been specified. For versions that have deprecated the old interfaces (version 18, update 2 and later), we must
122    * use the new inspector-executor interfaces, but we can still use the old, non-inspector-executor code by not calling
123    * mkl_sparse_optimize() later. */
124   if (aijmkl->no_SpMV2) PetscFunctionReturn(0);
125 #endif
126 
127   if (aijmkl->sparse_optimized) {
128     /* Matrix has been previously assembled and optimized. Must destroy old
129      * matrix handle before running the optimization step again. */
130     PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA);
131   }
132   aijmkl->sparse_optimized = PETSC_FALSE;
133 
134   /* Now perform the SpMV2 setup and matrix optimization. */
135   aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
136   aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
137   aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
138   m                  = A->rmap->n;
139   n                  = A->cmap->n;
140   aj                 = a->j; /* aj[k] gives column index for element aa[k]. */
141   aa                 = a->a; /* Nonzero elements stored row-by-row. */
142   ai                 = a->i; /* ai[k] is the position in aa and aj where row k starts. */
143   if (a->nz && aa && !A->structure_only) {
144     /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
145      * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
146     PetscCallExternal(mkl_sparse_x_create_csr, &aijmkl->csrA, SPARSE_INDEX_BASE_ZERO, m, n, ai, ai + 1, aj, aa);
147     PetscCallExternal(mkl_sparse_set_mv_hint, aijmkl->csrA, SPARSE_OPERATION_NON_TRANSPOSE, aijmkl->descr, 1000);
148     PetscCallExternal(mkl_sparse_set_memory_hint, aijmkl->csrA, SPARSE_MEMORY_AGGRESSIVE);
149     if (!aijmkl->no_SpMV2) PetscCallExternal(mkl_sparse_optimize, aijmkl->csrA);
150     aijmkl->sparse_optimized = PETSC_TRUE;
151     PetscCall(PetscObjectStateGet((PetscObject)A, &(aijmkl->state)));
152   } else {
153     aijmkl->csrA = PETSC_NULL;
154   }
155 
156   PetscFunctionReturn(0);
157 #endif
158 }
159 
160 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
161 /* Take an already created but empty matrix and set up the nonzero structure from an MKL sparse matrix handle. */
162 static PetscErrorCode MatSeqAIJMKL_setup_structure_from_mkl_handle(MPI_Comm comm, sparse_matrix_t csrA, PetscInt nrows, PetscInt ncols, Mat A) {
163   sparse_index_base_t indexing;
164   PetscInt            m, n;
165   PetscInt           *aj, *ai, *dummy;
166   MatScalar          *aa;
167   Mat_SeqAIJMKL      *aijmkl;
168 
169   PetscFunctionBegin;
170   if (csrA) {
171     /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
172     PetscCallExternal(mkl_sparse_x_export_csr, csrA, &indexing, &m, &n, &ai, &dummy, &aj, &aa);
173     PetscCheck((m == nrows) && (n == ncols), PETSC_COMM_SELF, PETSC_ERR_LIB, "Number of rows/columns does not match those from mkl_sparse_x_export_csr()");
174   } else {
175     aj = ai = PETSC_NULL;
176     aa      = PETSC_NULL;
177   }
178 
179   PetscCall(MatSetType(A, MATSEQAIJ));
180   PetscCall(MatSetSizes(A, PETSC_DECIDE, PETSC_DECIDE, nrows, ncols));
181   /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported
182    * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and
183    * they will be destroyed when the MKL handle is destroyed.
184    * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */
185   if (csrA) {
186     PetscCall(MatSeqAIJSetPreallocationCSR(A, ai, aj, NULL));
187   } else {
188     /* Since MatSeqAIJSetPreallocationCSR does initial set up and assembly begin/end, we must do that ourselves here. */
189     PetscCall(MatSetUp(A));
190     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
191     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
192   }
193 
194   /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle.
195    * Now turn it into a MATSEQAIJMKL. */
196   PetscCall(MatConvert_SeqAIJ_SeqAIJMKL(A, MATSEQAIJMKL, MAT_INPLACE_MATRIX, &A));
197 
198   aijmkl       = (Mat_SeqAIJMKL *)A->spptr;
199   aijmkl->csrA = csrA;
200 
201   /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but
202    * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one,
203    * and just need to be able to run the MKL optimization step. */
204   aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
205   aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
206   aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
207   if (csrA) {
208     PetscCallExternal(mkl_sparse_set_mv_hint, aijmkl->csrA, SPARSE_OPERATION_NON_TRANSPOSE, aijmkl->descr, 1000);
209     PetscCallExternal(mkl_sparse_set_memory_hint, aijmkl->csrA, SPARSE_MEMORY_AGGRESSIVE);
210   }
211   PetscCall(PetscObjectStateGet((PetscObject)A, &(aijmkl->state)));
212   PetscFunctionReturn(0);
213 }
214 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
215 
216 /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle.
217  * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in
218  * MatMatMultNumeric(). */
219 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
220 static PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A) {
221   PetscInt            i;
222   PetscInt            nrows, ncols;
223   PetscInt            nz;
224   PetscInt           *ai, *aj, *dummy;
225   PetscScalar        *aa;
226   Mat_SeqAIJMKL      *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
227   sparse_index_base_t indexing;
228 
229   PetscFunctionBegin;
230   /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */
231   if (!aijmkl->csrA) PetscFunctionReturn(0);
232 
233   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
234   PetscCallExternal(mkl_sparse_x_export_csr, aijmkl->csrA, &indexing, &nrows, &ncols, &ai, &dummy, &aj, &aa);
235 
236   /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc
237    * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */
238   for (i = 0; i < nrows; i++) {
239     nz = ai[i + 1] - ai[i];
240     PetscCall(MatSetValues_SeqAIJ(A, 1, &i, nz, aj + ai[i], aa + ai[i], INSERT_VALUES));
241   }
242 
243   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
244   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
245 
246   PetscCall(PetscObjectStateGet((PetscObject)A, &(aijmkl->state)));
247   /* At this point our matrix has a valid MKL handle, the contents of which match the PETSc AIJ representation.
248    * The MKL handle has *not* had mkl_sparse_optimize() called on it, though -- the MKL developers have confirmed
249    * that the matrix inspection/optimization step is not performed when matrix-matrix multiplication is finalized. */
250   aijmkl->sparse_optimized = PETSC_FALSE;
251   PetscFunctionReturn(0);
252 }
253 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
254 
255 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
256 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_view_mkl_handle(Mat A, PetscViewer viewer) {
257   PetscInt            i, j, k;
258   PetscInt            nrows, ncols;
259   PetscInt            nz;
260   PetscInt           *ai, *aj, *dummy;
261   PetscScalar        *aa;
262   Mat_SeqAIJMKL      *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
263   sparse_index_base_t indexing;
264 
265   PetscFunctionBegin;
266   PetscCall(PetscViewerASCIIPrintf(viewer, "Contents of MKL sparse matrix handle for MATSEQAIJMKL object:\n"));
267 
268   /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */
269   if (!aijmkl->csrA) {
270     PetscCall(PetscViewerASCIIPrintf(viewer, "MKL matrix handle is NULL\n"));
271     PetscFunctionReturn(0);
272   }
273 
274   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
275   PetscCallExternal(mkl_sparse_x_export_csr, aijmkl->csrA, &indexing, &nrows, &ncols, &ai, &dummy, &aj, &aa);
276 
277   k = 0;
278   for (i = 0; i < nrows; i++) {
279     PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ": ", i));
280     nz = ai[i + 1] - ai[i];
281     for (j = 0; j < nz; j++) {
282       if (aa) {
283         PetscCall(PetscViewerASCIIPrintf(viewer, "(%" PetscInt_FMT ", %g)  ", aj[k], PetscRealPart(aa[k])));
284       } else {
285         PetscCall(PetscViewerASCIIPrintf(viewer, "(%" PetscInt_FMT ", NULL)", aj[k]));
286       }
287       k++;
288     }
289     PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
290   }
291   PetscFunctionReturn(0);
292 }
293 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
294 
295 PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M) {
296   Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
297   Mat_SeqAIJMKL *aijmkl_dest;
298 
299   PetscFunctionBegin;
300   PetscCall(MatDuplicate_SeqAIJ(A, op, M));
301   aijmkl_dest = (Mat_SeqAIJMKL *)(*M)->spptr;
302   PetscCall(PetscArraycpy(aijmkl_dest, aijmkl, 1));
303   aijmkl_dest->sparse_optimized = PETSC_FALSE;
304   if (aijmkl->eager_inspection) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
305   PetscFunctionReturn(0);
306 }
307 
308 PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode) {
309   Mat_SeqAIJ    *a = (Mat_SeqAIJ *)A->data;
310   Mat_SeqAIJMKL *aijmkl;
311 
312   PetscFunctionBegin;
313   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
314 
315   /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some
316    * extra information and some different methods, call the AssemblyEnd
317    * routine for a MATSEQAIJ.
318    * I'm not sure if this is the best way to do this, but it avoids
319    * a lot of code duplication. */
320   a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */
321   PetscCall(MatAssemblyEnd_SeqAIJ(A, mode));
322 
323   /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks).
324    * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */
325   aijmkl = (Mat_SeqAIJMKL *)A->spptr;
326   if (aijmkl->eager_inspection) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
327 
328   PetscFunctionReturn(0);
329 }
330 
331 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
332 PetscErrorCode MatMult_SeqAIJMKL(Mat A, Vec xx, Vec yy) {
333   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
334   const PetscScalar *x;
335   PetscScalar       *y;
336   const MatScalar   *aa;
337   PetscInt           m     = A->rmap->n;
338   PetscInt           n     = A->cmap->n;
339   PetscScalar        alpha = 1.0;
340   PetscScalar        beta  = 0.0;
341   const PetscInt    *aj, *ai;
342   char               matdescra[6];
343 
344   /* Variables not in MatMult_SeqAIJ. */
345   char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */
346 
347   PetscFunctionBegin;
348   matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
349   matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
350   PetscCall(VecGetArrayRead(xx, &x));
351   PetscCall(VecGetArray(yy, &y));
352   aj = a->j; /* aj[k] gives column index for element aa[k]. */
353   aa = a->a; /* Nonzero elements stored row-by-row. */
354   ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
355 
356   /* Call MKL sparse BLAS routine to do the MatMult. */
357   mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, y);
358 
359   PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt));
360   PetscCall(VecRestoreArrayRead(xx, &x));
361   PetscCall(VecRestoreArray(yy, &y));
362   PetscFunctionReturn(0);
363 }
364 #endif
365 
366 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
367 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy) {
368   Mat_SeqAIJ        *a      = (Mat_SeqAIJ *)A->data;
369   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
370   const PetscScalar *x;
371   PetscScalar       *y;
372   PetscObjectState   state;
373 
374   PetscFunctionBegin;
375 
376   /* If there are no nonzero entries, zero yy and return immediately. */
377   if (!a->nz) {
378     PetscCall(VecGetArray(yy, &y));
379     PetscCall(PetscArrayzero(y, A->rmap->n));
380     PetscCall(VecRestoreArray(yy, &y));
381     PetscFunctionReturn(0);
382   }
383 
384   PetscCall(VecGetArrayRead(xx, &x));
385   PetscCall(VecGetArray(yy, &y));
386 
387   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
388    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
389    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
390   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
391   if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
392 
393   /* Call MKL SpMV2 executor routine to do the MatMult. */
394   PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, y);
395 
396   PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt));
397   PetscCall(VecRestoreArrayRead(xx, &x));
398   PetscCall(VecRestoreArray(yy, &y));
399   PetscFunctionReturn(0);
400 }
401 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
402 
403 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
404 PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A, Vec xx, Vec yy) {
405   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
406   const PetscScalar *x;
407   PetscScalar       *y;
408   const MatScalar   *aa;
409   PetscInt           m     = A->rmap->n;
410   PetscInt           n     = A->cmap->n;
411   PetscScalar        alpha = 1.0;
412   PetscScalar        beta  = 0.0;
413   const PetscInt    *aj, *ai;
414   char               matdescra[6];
415 
416   /* Variables not in MatMultTranspose_SeqAIJ. */
417   char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */
418 
419   PetscFunctionBegin;
420   matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
421   matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
422   PetscCall(VecGetArrayRead(xx, &x));
423   PetscCall(VecGetArray(yy, &y));
424   aj = a->j; /* aj[k] gives column index for element aa[k]. */
425   aa = a->a; /* Nonzero elements stored row-by-row. */
426   ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
427 
428   /* Call MKL sparse BLAS routine to do the MatMult. */
429   mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, y);
430 
431   PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt));
432   PetscCall(VecRestoreArrayRead(xx, &x));
433   PetscCall(VecRestoreArray(yy, &y));
434   PetscFunctionReturn(0);
435 }
436 #endif
437 
438 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
439 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy) {
440   Mat_SeqAIJ        *a      = (Mat_SeqAIJ *)A->data;
441   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
442   const PetscScalar *x;
443   PetscScalar       *y;
444   PetscObjectState   state;
445 
446   PetscFunctionBegin;
447 
448   /* If there are no nonzero entries, zero yy and return immediately. */
449   if (!a->nz) {
450     PetscCall(VecGetArray(yy, &y));
451     PetscCall(PetscArrayzero(y, A->cmap->n));
452     PetscCall(VecRestoreArray(yy, &y));
453     PetscFunctionReturn(0);
454   }
455 
456   PetscCall(VecGetArrayRead(xx, &x));
457   PetscCall(VecGetArray(yy, &y));
458 
459   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
460    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
461    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
462   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
463   if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
464 
465   /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
466   PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, y);
467 
468   PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt));
469   PetscCall(VecRestoreArrayRead(xx, &x));
470   PetscCall(VecRestoreArray(yy, &y));
471   PetscFunctionReturn(0);
472 }
473 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
474 
475 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
476 PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A, Vec xx, Vec yy, Vec zz) {
477   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
478   const PetscScalar *x;
479   PetscScalar       *y, *z;
480   const MatScalar   *aa;
481   PetscInt           m = A->rmap->n;
482   PetscInt           n = A->cmap->n;
483   const PetscInt    *aj, *ai;
484   PetscInt           i;
485 
486   /* Variables not in MatMultAdd_SeqAIJ. */
487   char        transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */
488   PetscScalar alpha  = 1.0;
489   PetscScalar beta;
490   char        matdescra[6];
491 
492   PetscFunctionBegin;
493   matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
494   matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
495 
496   PetscCall(VecGetArrayRead(xx, &x));
497   PetscCall(VecGetArrayPair(yy, zz, &y, &z));
498   aj = a->j; /* aj[k] gives column index for element aa[k]. */
499   aa = a->a; /* Nonzero elements stored row-by-row. */
500   ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
501 
502   /* Call MKL sparse BLAS routine to do the MatMult. */
503   if (zz == yy) {
504     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
505     beta = 1.0;
506     mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z);
507   } else {
508     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
509      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
510     beta = 0.0;
511     mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z);
512     for (i = 0; i < m; i++) z[i] += y[i];
513   }
514 
515   PetscCall(PetscLogFlops(2.0 * a->nz));
516   PetscCall(VecRestoreArrayRead(xx, &x));
517   PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
518   PetscFunctionReturn(0);
519 }
520 #endif
521 
522 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
523 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy, Vec zz) {
524   Mat_SeqAIJ        *a      = (Mat_SeqAIJ *)A->data;
525   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
526   const PetscScalar *x;
527   PetscScalar       *y, *z;
528   PetscInt           m = A->rmap->n;
529   PetscInt           i;
530 
531   /* Variables not in MatMultAdd_SeqAIJ. */
532   PetscObjectState state;
533 
534   PetscFunctionBegin;
535 
536   /* If there are no nonzero entries, set zz = yy and return immediately. */
537   if (!a->nz) {
538     PetscCall(VecGetArrayPair(yy, zz, &y, &z));
539     PetscCall(PetscArraycpy(z, y, m));
540     PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
541     PetscFunctionReturn(0);
542   }
543 
544   PetscCall(VecGetArrayRead(xx, &x));
545   PetscCall(VecGetArrayPair(yy, zz, &y, &z));
546 
547   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
548    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
549    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
550   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
551   if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
552 
553   /* Call MKL sparse BLAS routine to do the MatMult. */
554   if (zz == yy) {
555     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
556      * with alpha and beta both set to 1.0. */
557     PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 1.0, z);
558   } else {
559     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
560      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
561     PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, z);
562     for (i = 0; i < m; i++) z[i] += y[i];
563   }
564 
565   PetscCall(PetscLogFlops(2.0 * a->nz));
566   PetscCall(VecRestoreArrayRead(xx, &x));
567   PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
568   PetscFunctionReturn(0);
569 }
570 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
571 
572 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
573 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A, Vec xx, Vec yy, Vec zz) {
574   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
575   const PetscScalar *x;
576   PetscScalar       *y, *z;
577   const MatScalar   *aa;
578   PetscInt           m = A->rmap->n;
579   PetscInt           n = A->cmap->n;
580   const PetscInt    *aj, *ai;
581   PetscInt           i;
582 
583   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
584   char        transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */
585   PetscScalar alpha  = 1.0;
586   PetscScalar beta;
587   char        matdescra[6];
588 
589   PetscFunctionBegin;
590   matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
591   matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
592 
593   PetscCall(VecGetArrayRead(xx, &x));
594   PetscCall(VecGetArrayPair(yy, zz, &y, &z));
595   aj = a->j; /* aj[k] gives column index for element aa[k]. */
596   aa = a->a; /* Nonzero elements stored row-by-row. */
597   ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
598 
599   /* Call MKL sparse BLAS routine to do the MatMult. */
600   if (zz == yy) {
601     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
602     beta = 1.0;
603     mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z);
604   } else {
605     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
606      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
607     beta = 0.0;
608     mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z);
609     for (i = 0; i < n; i++) z[i] += y[i];
610   }
611 
612   PetscCall(PetscLogFlops(2.0 * a->nz));
613   PetscCall(VecRestoreArrayRead(xx, &x));
614   PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
615   PetscFunctionReturn(0);
616 }
617 #endif
618 
619 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
620 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy, Vec zz) {
621   Mat_SeqAIJ        *a      = (Mat_SeqAIJ *)A->data;
622   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
623   const PetscScalar *x;
624   PetscScalar       *y, *z;
625   PetscInt           n = A->cmap->n;
626   PetscInt           i;
627   PetscObjectState   state;
628 
629   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
630 
631   PetscFunctionBegin;
632 
633   /* If there are no nonzero entries, set zz = yy and return immediately. */
634   if (!a->nz) {
635     PetscCall(VecGetArrayPair(yy, zz, &y, &z));
636     PetscCall(PetscArraycpy(z, y, n));
637     PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
638     PetscFunctionReturn(0);
639   }
640 
641   PetscCall(VecGetArrayRead(xx, &x));
642   PetscCall(VecGetArrayPair(yy, zz, &y, &z));
643 
644   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
645    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
646    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
647   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
648   if (!aijmkl->sparse_optimized || aijmkl->state != state) MatSeqAIJMKL_create_mkl_handle(A);
649 
650   /* Call MKL sparse BLAS routine to do the MatMult. */
651   if (zz == yy) {
652     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
653      * with alpha and beta both set to 1.0. */
654     PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 1.0, z);
655   } else {
656     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
657      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
658     PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, z);
659     for (i = 0; i < n; i++) z[i] += y[i];
660   }
661 
662   PetscCall(PetscLogFlops(2.0 * a->nz));
663   PetscCall(VecRestoreArrayRead(xx, &x));
664   PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
665   PetscFunctionReturn(0);
666 }
667 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
668 
669 /* -------------------------- MatProduct code -------------------------- */
670 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
671 static PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(Mat A, const sparse_operation_t transA, Mat B, const sparse_operation_t transB, Mat C) {
672   Mat_SeqAIJMKL      *a = (Mat_SeqAIJMKL *)A->spptr, *b = (Mat_SeqAIJMKL *)B->spptr;
673   sparse_matrix_t     csrA, csrB, csrC;
674   PetscInt            nrows, ncols;
675   struct matrix_descr descr_type_gen;
676   PetscObjectState    state;
677 
678   PetscFunctionBegin;
679   /* Determine the number of rows and columns that the result matrix C will have. We have to do this ourselves because MKL does
680    * not handle sparse matrices with zero rows or columns. */
681   if (transA == SPARSE_OPERATION_NON_TRANSPOSE) nrows = A->rmap->N;
682   else nrows = A->cmap->N;
683   if (transB == SPARSE_OPERATION_NON_TRANSPOSE) ncols = B->cmap->N;
684   else ncols = B->rmap->N;
685 
686   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
687   if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
688   PetscCall(PetscObjectStateGet((PetscObject)B, &state));
689   if (!b->sparse_optimized || b->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(B));
690   csrA                = a->csrA;
691   csrB                = b->csrA;
692   descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
693 
694   if (csrA && csrB) {
695     PetscCallExternal(mkl_sparse_sp2m, transA, descr_type_gen, csrA, transB, descr_type_gen, csrB, SPARSE_STAGE_FULL_MULT_NO_VAL, &csrC);
696   } else {
697     csrC = PETSC_NULL;
698   }
699 
700   PetscCall(MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF, csrC, nrows, ncols, C));
701 
702   PetscFunctionReturn(0);
703 }
704 
705 PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(Mat A, const sparse_operation_t transA, Mat B, const sparse_operation_t transB, Mat C) {
706   Mat_SeqAIJMKL      *a = (Mat_SeqAIJMKL *)A->spptr, *b = (Mat_SeqAIJMKL *)B->spptr, *c = (Mat_SeqAIJMKL *)C->spptr;
707   sparse_matrix_t     csrA, csrB, csrC;
708   struct matrix_descr descr_type_gen;
709   PetscObjectState    state;
710 
711   PetscFunctionBegin;
712   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
713   if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
714   PetscCall(PetscObjectStateGet((PetscObject)B, &state));
715   if (!b->sparse_optimized || b->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(B));
716   csrA                = a->csrA;
717   csrB                = b->csrA;
718   csrC                = c->csrA;
719   descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
720 
721   if (csrA && csrB) {
722     PetscCallExternal(mkl_sparse_sp2m, transA, descr_type_gen, csrA, transB, descr_type_gen, csrB, SPARSE_STAGE_FINALIZE_MULT, &csrC);
723   } else {
724     csrC = PETSC_NULL;
725   }
726 
727   /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */
728   PetscCall(MatSeqAIJMKL_update_from_mkl_handle(C));
729 
730   PetscFunctionReturn(0);
731 }
732 
733 PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C) {
734   PetscFunctionBegin;
735   PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C));
736   PetscFunctionReturn(0);
737 }
738 
739 PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C) {
740   PetscFunctionBegin;
741   PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C));
742   PetscFunctionReturn(0);
743 }
744 
745 PetscErrorCode MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C) {
746   PetscFunctionBegin;
747   PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C));
748   PetscFunctionReturn(0);
749 }
750 
751 PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C) {
752   PetscFunctionBegin;
753   PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C));
754   PetscFunctionReturn(0);
755 }
756 
757 PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C) {
758   PetscFunctionBegin;
759   PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_TRANSPOSE, C));
760   PetscFunctionReturn(0);
761 }
762 
763 PetscErrorCode MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C) {
764   PetscFunctionBegin;
765   PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_TRANSPOSE, C));
766   PetscFunctionReturn(0);
767 }
768 
769 static PetscErrorCode MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL(Mat C) {
770   Mat_Product *product = C->product;
771   Mat          A = product->A, B = product->B;
772 
773   PetscFunctionBegin;
774   PetscCall(MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(A, B, C));
775   PetscFunctionReturn(0);
776 }
777 
778 static PetscErrorCode MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL(Mat C) {
779   Mat_Product *product = C->product;
780   Mat          A = product->A, B = product->B;
781   PetscReal    fill = product->fill;
782 
783   PetscFunctionBegin;
784   PetscCall(MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(A, B, fill, C));
785   C->ops->productnumeric = MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL;
786   PetscFunctionReturn(0);
787 }
788 
789 PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat A, Mat P, Mat C) {
790   Mat                 Ct;
791   Vec                 zeros;
792   Mat_SeqAIJMKL      *a = (Mat_SeqAIJMKL *)A->spptr, *p = (Mat_SeqAIJMKL *)P->spptr, *c = (Mat_SeqAIJMKL *)C->spptr;
793   sparse_matrix_t     csrA, csrP, csrC;
794   PetscBool           set, flag;
795   struct matrix_descr descr_type_sym;
796   PetscObjectState    state;
797 
798   PetscFunctionBegin;
799   PetscCall(MatIsSymmetricKnown(A, &set, &flag));
800   PetscCheck(set && flag, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal() called on matrix A not marked as symmetric");
801 
802   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
803   if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
804   PetscCall(PetscObjectStateGet((PetscObject)P, &state));
805   if (!p->sparse_optimized || p->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(P));
806   csrA                = a->csrA;
807   csrP                = p->csrA;
808   csrC                = c->csrA;
809   descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
810   descr_type_sym.mode = SPARSE_FILL_MODE_UPPER;
811   descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
812 
813   /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
814   PetscCallExternal(mkl_sparse_sypr, SPARSE_OPERATION_TRANSPOSE, csrP, csrA, descr_type_sym, &csrC, SPARSE_STAGE_FINALIZE_MULT);
815 
816   /* Update the PETSc AIJ representation for matrix C from contents of MKL handle.
817    * This is more complicated than it should be: it turns out that, though mkl_sparse_sypr() will accept a full AIJ/CSR matrix,
818    * the output matrix only contains the upper or lower triangle (we arbitrarily have chosen upper) of the symmetric matrix.
819    * We have to fill in the missing portion, which we currently do below by forming the transpose and performing at MatAXPY
820    * operation. This may kill any performance benefit of using the optimized mkl_sparse_sypr() routine. Performance might
821    * improve if we come up with a more efficient way to do this, or we can convince the MKL team to provide an option to output
822    * the full matrix. */
823   PetscCall(MatSeqAIJMKL_update_from_mkl_handle(C));
824   PetscCall(MatTranspose(C, MAT_INITIAL_MATRIX, &Ct));
825   PetscCall(MatCreateVecs(C, &zeros, NULL));
826   PetscCall(VecSetFromOptions(zeros));
827   PetscCall(VecZeroEntries(zeros));
828   PetscCall(MatDiagonalSet(Ct, zeros, INSERT_VALUES));
829   PetscCall(MatAXPY(C, 1.0, Ct, DIFFERENT_NONZERO_PATTERN));
830   /* Note: The MatAXPY() call destroys the MatProduct, so we must recreate it. */
831   PetscCall(MatProductCreateWithMat(A, P, PETSC_NULL, C));
832   PetscCall(MatProductSetType(C, MATPRODUCT_PtAP));
833   PetscCall(MatSeqAIJMKL_create_mkl_handle(C));
834   PetscCall(VecDestroy(&zeros));
835   PetscCall(MatDestroy(&Ct));
836   PetscFunctionReturn(0);
837 }
838 
839 PetscErrorCode MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat C) {
840   Mat_Product        *product = C->product;
841   Mat                 A = product->A, P = product->B;
842   Mat_SeqAIJMKL      *a = (Mat_SeqAIJMKL *)A->spptr, *p = (Mat_SeqAIJMKL *)P->spptr;
843   sparse_matrix_t     csrA, csrP, csrC;
844   struct matrix_descr descr_type_sym;
845   PetscObjectState    state;
846 
847   PetscFunctionBegin;
848   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
849   if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
850   PetscCall(PetscObjectStateGet((PetscObject)P, &state));
851   if (!p->sparse_optimized || p->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(P));
852   csrA                = a->csrA;
853   csrP                = p->csrA;
854   descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
855   descr_type_sym.mode = SPARSE_FILL_MODE_UPPER;
856   descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
857 
858   /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
859   if (csrP && csrA) {
860     PetscCallExternal(mkl_sparse_sypr, SPARSE_OPERATION_TRANSPOSE, csrP, csrA, descr_type_sym, &csrC, SPARSE_STAGE_FULL_MULT_NO_VAL);
861   } else {
862     csrC = PETSC_NULL;
863   }
864 
865   /* Update the I and J arrays of the PETSc AIJ representation for matrix C from contents of MKL handle.
866    * Note that, because mkl_sparse_sypr() only computes one triangle of the symmetric matrix, this representation will only contain
867    * the upper triangle of the symmetric matrix. We fix this in MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(). I believe that
868    * leaving things in this incomplete state is OK because the numeric product should follow soon after, but am not certain if this
869    * is guaranteed. */
870   PetscCall(MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF, csrC, P->cmap->N, P->cmap->N, C));
871 
872   C->ops->productnumeric = MatProductNumeric_PtAP;
873   PetscFunctionReturn(0);
874 }
875 
876 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AB(Mat C) {
877   PetscFunctionBegin;
878   C->ops->productsymbolic = MatProductSymbolic_AB;
879   C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL;
880   PetscFunctionReturn(0);
881 }
882 
883 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AtB(Mat C) {
884   PetscFunctionBegin;
885   C->ops->productsymbolic = MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL;
886   PetscFunctionReturn(0);
887 }
888 
889 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABt(Mat C) {
890   PetscFunctionBegin;
891   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ;
892   C->ops->productsymbolic          = MatProductSymbolic_ABt;
893   PetscFunctionReturn(0);
894 }
895 
896 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_PtAP(Mat C) {
897   Mat_Product *product = C->product;
898   Mat          A       = product->A;
899   PetscBool    set, flag;
900 
901   PetscFunctionBegin;
902   if (PetscDefined(USE_COMPLEX)) {
903     /* By setting C->ops->productsymbolic to NULL, we ensure that MatProductSymbolic_Unsafe() will be used.
904      * We do this in several other locations in this file. This works for the time being, but these
905      * routines are considered unsafe and may be removed from the MatProduct code in the future.
906      * TODO: Add proper MATSEQAIJMKL implementations */
907     C->ops->productsymbolic = NULL;
908   } else {
909     /* AIJMKL only has an optimized routine for PtAP when A is symmetric and real. */
910     PetscCall(MatIsSymmetricKnown(A, &set, &flag));
911     if (set && flag) C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal;
912     else C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
913     /* Note that we don't set C->ops->productnumeric here, as this must happen in MatProductSymbolic_PtAP_XXX(),
914      * depending on whether the algorithm for the general case vs. the real symmetric one is used. */
915   }
916   PetscFunctionReturn(0);
917 }
918 
919 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_RARt(Mat C) {
920   PetscFunctionBegin;
921   C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
922   PetscFunctionReturn(0);
923 }
924 
925 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABC(Mat C) {
926   PetscFunctionBegin;
927   C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
928   PetscFunctionReturn(0);
929 }
930 
931 PetscErrorCode MatProductSetFromOptions_SeqAIJMKL(Mat C) {
932   Mat_Product *product = C->product;
933 
934   PetscFunctionBegin;
935   switch (product->type) {
936   case MATPRODUCT_AB: PetscCall(MatProductSetFromOptions_SeqAIJMKL_AB(C)); break;
937   case MATPRODUCT_AtB: PetscCall(MatProductSetFromOptions_SeqAIJMKL_AtB(C)); break;
938   case MATPRODUCT_ABt: PetscCall(MatProductSetFromOptions_SeqAIJMKL_ABt(C)); break;
939   case MATPRODUCT_PtAP: PetscCall(MatProductSetFromOptions_SeqAIJMKL_PtAP(C)); break;
940   case MATPRODUCT_RARt: PetscCall(MatProductSetFromOptions_SeqAIJMKL_RARt(C)); break;
941   case MATPRODUCT_ABC: PetscCall(MatProductSetFromOptions_SeqAIJMKL_ABC(C)); break;
942   default: break;
943   }
944   PetscFunctionReturn(0);
945 }
946 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
947 /* ------------------------ End MatProduct code ------------------------ */
948 
949 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a
950  * SeqAIJMKL matrix.  This routine is called by the MatCreate_SeqAIJMKL()
951  * routine, but can also be used to convert an assembled SeqAIJ matrix
952  * into a SeqAIJMKL one. */
953 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A, MatType type, MatReuse reuse, Mat *newmat) {
954   Mat            B = *newmat;
955   Mat_SeqAIJMKL *aijmkl;
956   PetscBool      set;
957   PetscBool      sametype;
958 
959   PetscFunctionBegin;
960   if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B));
961 
962   PetscCall(PetscObjectTypeCompare((PetscObject)A, type, &sametype));
963   if (sametype) PetscFunctionReturn(0);
964 
965   PetscCall(PetscNewLog(B, &aijmkl));
966   B->spptr = (void *)aijmkl;
967 
968   /* Set function pointers for methods that we inherit from AIJ but override.
969    * We also parse some command line options below, since those determine some of the methods we point to. */
970   B->ops->duplicate   = MatDuplicate_SeqAIJMKL;
971   B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL;
972   B->ops->destroy     = MatDestroy_SeqAIJMKL;
973 
974   aijmkl->sparse_optimized = PETSC_FALSE;
975 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
976   aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */
977 #else
978   aijmkl->no_SpMV2 = PETSC_TRUE;
979 #endif
980   aijmkl->eager_inspection = PETSC_FALSE;
981 
982   /* Parse command line options. */
983   PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "AIJMKL Options", "Mat");
984   PetscCall(PetscOptionsBool("-mat_aijmkl_no_spmv2", "Disable use of inspector-executor (SpMV 2) routines", "None", (PetscBool)aijmkl->no_SpMV2, (PetscBool *)&aijmkl->no_SpMV2, &set));
985   PetscCall(PetscOptionsBool("-mat_aijmkl_eager_inspection", "Run inspection at matrix assembly time, instead of waiting until needed by an operation", "None", (PetscBool)aijmkl->eager_inspection, (PetscBool *)&aijmkl->eager_inspection, &set));
986   PetscOptionsEnd();
987 #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
988   if (!aijmkl->no_SpMV2) {
989     PetscCall(PetscInfo(B, "User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize();  defaulting to non-SpMV2 routines.\n"));
990     aijmkl->no_SpMV2 = PETSC_TRUE;
991   }
992 #endif
993 
994 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
995   B->ops->mult             = MatMult_SeqAIJMKL_SpMV2;
996   B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL_SpMV2;
997   B->ops->multadd          = MatMultAdd_SeqAIJMKL_SpMV2;
998   B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2;
999 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
1000   B->ops->productsetfromoptions   = MatProductSetFromOptions_SeqAIJMKL;
1001   B->ops->matmultsymbolic         = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL;
1002   B->ops->matmultnumeric          = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL;
1003   B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL;
1004   B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL;
1005 #if !defined(PETSC_USE_COMPLEX)
1006   B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal;
1007 #else
1008   B->ops->ptapnumeric = NULL;
1009 #endif
1010 #endif
1011 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
1012 
1013 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
1014   /* In MKL version 18, update 2, the old sparse BLAS interfaces were marked as deprecated. If "no_SpMV2" has been specified by the
1015    * user and the old SpBLAS interfaces are deprecated in our MKL version, we use the new _SpMV2 routines (set above), but do not
1016    * call mkl_sparse_optimize(), which results in the old numerical kernels (without the inspector-executor model) being used. For
1017    * versions in which the older interface has not been deprecated, we use the old interface. */
1018   if (aijmkl->no_SpMV2) {
1019     B->ops->mult             = MatMult_SeqAIJMKL;
1020     B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL;
1021     B->ops->multadd          = MatMultAdd_SeqAIJMKL;
1022     B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL;
1023   }
1024 #endif
1025 
1026   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqaijmkl_seqaij_C", MatConvert_SeqAIJMKL_SeqAIJ));
1027 
1028   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJMKL));
1029   *newmat = B;
1030   PetscFunctionReturn(0);
1031 }
1032 
1033 /*@C
1034    MatCreateSeqAIJMKL - Creates a sparse matrix of type `MATSEQAIJMKL`.
1035    This type inherits from `MATSEQAIJ` and is largely identical, but uses sparse BLAS
1036    routines from Intel MKL whenever possible.
1037    If the installed version of MKL supports the "SpMV2" sparse
1038    inspector-executor routines, then those are used by default.
1039    `MatMult()`, `MatMultAdd()`, `MatMultTranspose()`, `MatMultTransposeAdd()`, `MatMatMult()`, `MatTransposeMatMult()`, and `MatPtAP()`
1040    (for symmetric A) operations are currently supported.
1041    Note that MKL version 18, update 2 or later is required for `MatPtAP()`, `MatPtAPNumeric()` and `MatMatMultNumeric()`.
1042 
1043    Collective
1044 
1045    Input Parameters:
1046 +  comm - MPI communicator, set to `PETSC_COMM_SELF`
1047 .  m - number of rows
1048 .  n - number of columns
1049 .  nz - number of nonzeros per row (same for all rows)
1050 -  nnz - array containing the number of nonzeros in the various rows
1051          (possibly different for each row) or NULL
1052 
1053    Output Parameter:
1054 .  A - the matrix
1055 
1056    Options Database Keys:
1057 +  -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines
1058 -  -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, performing this step the first time the matrix is applied
1059 
1060    Note:
1061    If nnz is given then nz is ignored
1062 
1063    Level: intermediate
1064 
1065 .seealso: `MatCreate()`, `MatCreateMPIAIJMKL()`, `MatSetValues()`
1066 @*/
1067 PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A) {
1068   PetscFunctionBegin;
1069   PetscCall(MatCreate(comm, A));
1070   PetscCall(MatSetSizes(*A, m, n, m, n));
1071   PetscCall(MatSetType(*A, MATSEQAIJMKL));
1072   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(*A, nz, nnz));
1073   PetscFunctionReturn(0);
1074 }
1075 
1076 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A) {
1077   PetscFunctionBegin;
1078   PetscCall(MatSetType(A, MATSEQAIJ));
1079   PetscCall(MatConvert_SeqAIJ_SeqAIJMKL(A, MATSEQAIJMKL, MAT_INPLACE_MATRIX, &A));
1080   PetscFunctionReturn(0);
1081 }
1082