xref: /petsc/src/mat/impls/aij/seq/aijmkl/aijmkl.c (revision bcee047adeeb73090d7e36cc71e39fc287cdbb97)
1 
2 /*
3   Defines basic operations for the MATSEQAIJMKL matrix class.
4   This class is derived from the MATSEQAIJ class and retains the
5   compressed row storage (aka Yale sparse matrix format) but uses
6   sparse BLAS operations from the Intel Math Kernel Library (MKL)
7   wherever possible.
8 */
9 
10 #include <../src/mat/impls/aij/seq/aij.h>
11 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h>
12 #if defined(PETSC_HAVE_MKL_INTEL_ILP64)
13   #define MKL_ILP64
14 #endif
15 #include <mkl_spblas.h>
16 
17 typedef struct {
18   PetscBool        no_SpMV2;         /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */
19   PetscBool        eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */
20   PetscBool        sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
21   PetscObjectState state;
22 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
23   sparse_matrix_t     csrA; /* "Handle" used by SpMV2 inspector-executor routines. */
24   struct matrix_descr descr;
25 #endif
26 } Mat_SeqAIJMKL;
27 
28 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat, MatAssemblyType);
29 
30 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A, MatType type, MatReuse reuse, Mat *newmat)
31 {
32   /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */
33   /* so we will ignore 'MatType type'. */
34   Mat B = *newmat;
35 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
36   Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
37 #endif
38 
39   PetscFunctionBegin;
40   if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B));
41 
42   /* Reset the original function pointers. */
43   B->ops->duplicate               = MatDuplicate_SeqAIJ;
44   B->ops->assemblyend             = MatAssemblyEnd_SeqAIJ;
45   B->ops->destroy                 = MatDestroy_SeqAIJ;
46   B->ops->mult                    = MatMult_SeqAIJ;
47   B->ops->multtranspose           = MatMultTranspose_SeqAIJ;
48   B->ops->multadd                 = MatMultAdd_SeqAIJ;
49   B->ops->multtransposeadd        = MatMultTransposeAdd_SeqAIJ;
50   B->ops->productsetfromoptions   = MatProductSetFromOptions_SeqAIJ;
51   B->ops->matmultsymbolic         = MatMatMultSymbolic_SeqAIJ_SeqAIJ;
52   B->ops->matmultnumeric          = MatMatMultNumeric_SeqAIJ_SeqAIJ;
53   B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ;
54   B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ;
55   B->ops->ptapnumeric             = MatPtAPNumeric_SeqAIJ_SeqAIJ;
56 
57   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqaijmkl_seqaij_C", NULL));
58 
59 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
60   /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this
61    * simply involves destroying the MKL sparse matrix handle and then freeing
62    * the spptr pointer. */
63   if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL *)B->spptr;
64 
65   if (aijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA);
66 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
67   PetscCall(PetscFree(B->spptr));
68 
69   /* Change the type of B to MATSEQAIJ. */
70   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ));
71 
72   *newmat = B;
73   PetscFunctionReturn(PETSC_SUCCESS);
74 }
75 
76 PetscErrorCode MatDestroy_SeqAIJMKL(Mat A)
77 {
78   Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
79 
80   PetscFunctionBegin;
81 
82   /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an spptr pointer. */
83   if (aijmkl) {
84     /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */
85 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
86     if (aijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA);
87 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
88     PetscCall(PetscFree(A->spptr));
89   }
90 
91   /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
92    * to destroy everything that remains. */
93   PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ));
94   /* I don't call MatSetType().  I believe this is because that
95    * is only to be called when *building* a matrix.  I could be wrong, but
96    * that is how things work for the SuperLU matrix class. */
97   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqaijmkl_seqaij_C", NULL));
98   PetscCall(MatDestroy_SeqAIJ(A));
99   PetscFunctionReturn(PETSC_SUCCESS);
100 }
101 
102 /* MatSeqAIJMKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it,
103  * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize().
104  * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix
105  * handle, creates a new one, and then calls mkl_sparse_optimize().
106  * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been
107  * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of
108  * an unoptimized matrix handle here. */
109 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A)
110 {
111 #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
112   /* If the MKL library does not have mkl_sparse_optimize(), then this routine
113    * does nothing. We make it callable anyway in this case because it cuts
114    * down on littering the code with #ifdefs. */
115   PetscFunctionBegin;
116   PetscFunctionReturn(PETSC_SUCCESS);
117 #else
118   Mat_SeqAIJ    *a      = (Mat_SeqAIJ *)A->data;
119   Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
120   PetscInt       m, n;
121   MatScalar     *aa;
122   PetscInt      *aj, *ai;
123 
124   PetscFunctionBegin;
125   #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
126   /* For MKL versions that still support the old, non-inspector-executor interfaces versions, we simply exit here if the no_SpMV2
127    * option has been specified. For versions that have deprecated the old interfaces (version 18, update 2 and later), we must
128    * use the new inspector-executor interfaces, but we can still use the old, non-inspector-executor code by not calling
129    * mkl_sparse_optimize() later. */
130   if (aijmkl->no_SpMV2) PetscFunctionReturn(PETSC_SUCCESS);
131   #endif
132 
133   if (aijmkl->sparse_optimized) {
134     /* Matrix has been previously assembled and optimized. Must destroy old
135      * matrix handle before running the optimization step again. */
136     PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA);
137   }
138   aijmkl->sparse_optimized = PETSC_FALSE;
139 
140   /* Now perform the SpMV2 setup and matrix optimization. */
141   aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
142   aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
143   aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
144   m                  = A->rmap->n;
145   n                  = A->cmap->n;
146   aj                 = a->j; /* aj[k] gives column index for element aa[k]. */
147   aa                 = a->a; /* Nonzero elements stored row-by-row. */
148   ai                 = a->i; /* ai[k] is the position in aa and aj where row k starts. */
149   if (a->nz && aa && !A->structure_only) {
150     /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
151      * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
152     PetscCallExternal(mkl_sparse_x_create_csr, &aijmkl->csrA, SPARSE_INDEX_BASE_ZERO, (MKL_INT)m, (MKL_INT)n, (MKL_INT *)ai, (MKL_INT *)(ai + 1), (MKL_INT *)aj, aa);
153     PetscCallExternal(mkl_sparse_set_mv_hint, aijmkl->csrA, SPARSE_OPERATION_NON_TRANSPOSE, aijmkl->descr, 1000);
154     PetscCallExternal(mkl_sparse_set_memory_hint, aijmkl->csrA, SPARSE_MEMORY_AGGRESSIVE);
155     if (!aijmkl->no_SpMV2) PetscCallExternal(mkl_sparse_optimize, aijmkl->csrA);
156     aijmkl->sparse_optimized = PETSC_TRUE;
157     PetscCall(PetscObjectStateGet((PetscObject)A, &(aijmkl->state)));
158   } else {
159     aijmkl->csrA = NULL;
160   }
161 
162   PetscFunctionReturn(PETSC_SUCCESS);
163 #endif
164 }
165 
166 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
167 /* Take an already created but empty matrix and set up the nonzero structure from an MKL sparse matrix handle. */
168 static PetscErrorCode MatSeqAIJMKL_setup_structure_from_mkl_handle(MPI_Comm comm, sparse_matrix_t csrA, PetscInt nrows, PetscInt ncols, Mat A)
169 {
170   sparse_index_base_t indexing;
171   PetscInt            m, n;
172   PetscInt           *aj, *ai, *dummy;
173   MatScalar          *aa;
174   Mat_SeqAIJMKL      *aijmkl;
175 
176   PetscFunctionBegin;
177   if (csrA) {
178     /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
179     PetscCallExternal(mkl_sparse_x_export_csr, csrA, &indexing, (MKL_INT *)&m, (MKL_INT *)&n, (MKL_INT **)&ai, (MKL_INT **)&dummy, (MKL_INT **)&aj, &aa);
180     PetscCheck((m == nrows) && (n == ncols), PETSC_COMM_SELF, PETSC_ERR_LIB, "Number of rows/columns does not match those from mkl_sparse_x_export_csr()");
181   } else {
182     aj = ai = NULL;
183     aa      = NULL;
184   }
185 
186   PetscCall(MatSetType(A, MATSEQAIJ));
187   PetscCall(MatSetSizes(A, PETSC_DECIDE, PETSC_DECIDE, nrows, ncols));
188   /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported
189    * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and
190    * they will be destroyed when the MKL handle is destroyed.
191    * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */
192   if (csrA) {
193     PetscCall(MatSeqAIJSetPreallocationCSR(A, ai, aj, NULL));
194   } else {
195     /* Since MatSeqAIJSetPreallocationCSR does initial set up and assembly begin/end, we must do that ourselves here. */
196     PetscCall(MatSetUp(A));
197     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
198     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
199   }
200 
201   /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle.
202    * Now turn it into a MATSEQAIJMKL. */
203   PetscCall(MatConvert_SeqAIJ_SeqAIJMKL(A, MATSEQAIJMKL, MAT_INPLACE_MATRIX, &A));
204 
205   aijmkl       = (Mat_SeqAIJMKL *)A->spptr;
206   aijmkl->csrA = csrA;
207 
208   /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but
209    * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one,
210    * and just need to be able to run the MKL optimization step. */
211   aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
212   aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
213   aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
214   if (csrA) {
215     PetscCallExternal(mkl_sparse_set_mv_hint, aijmkl->csrA, SPARSE_OPERATION_NON_TRANSPOSE, aijmkl->descr, 1000);
216     PetscCallExternal(mkl_sparse_set_memory_hint, aijmkl->csrA, SPARSE_MEMORY_AGGRESSIVE);
217   }
218   PetscCall(PetscObjectStateGet((PetscObject)A, &(aijmkl->state)));
219   PetscFunctionReturn(PETSC_SUCCESS);
220 }
221 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
222 
223 /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle.
224  * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in
225  * MatMatMultNumeric(). */
226 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
227 static PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A)
228 {
229   PetscInt            i;
230   PetscInt            nrows, ncols;
231   PetscInt            nz;
232   PetscInt           *ai, *aj, *dummy;
233   PetscScalar        *aa;
234   Mat_SeqAIJMKL      *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
235   sparse_index_base_t indexing;
236 
237   PetscFunctionBegin;
238   /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */
239   if (!aijmkl->csrA) PetscFunctionReturn(PETSC_SUCCESS);
240 
241   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
242   PetscCallExternal(mkl_sparse_x_export_csr, aijmkl->csrA, &indexing, (MKL_INT *)&nrows, (MKL_INT *)&ncols, (MKL_INT **)&ai, (MKL_INT **)&dummy, (MKL_INT **)&aj, &aa);
243 
244   /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc
245    * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */
246   for (i = 0; i < nrows; i++) {
247     nz = ai[i + 1] - ai[i];
248     PetscCall(MatSetValues_SeqAIJ(A, 1, &i, nz, aj + ai[i], aa + ai[i], INSERT_VALUES));
249   }
250 
251   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
252   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
253 
254   PetscCall(PetscObjectStateGet((PetscObject)A, &(aijmkl->state)));
255   /* At this point our matrix has a valid MKL handle, the contents of which match the PETSc AIJ representation.
256    * The MKL handle has *not* had mkl_sparse_optimize() called on it, though -- the MKL developers have confirmed
257    * that the matrix inspection/optimization step is not performed when matrix-matrix multiplication is finalized. */
258   aijmkl->sparse_optimized = PETSC_FALSE;
259   PetscFunctionReturn(PETSC_SUCCESS);
260 }
261 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
262 
263 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
264 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_view_mkl_handle(Mat A, PetscViewer viewer)
265 {
266   PetscInt            i, j, k;
267   PetscInt            nrows, ncols;
268   PetscInt            nz;
269   PetscInt           *ai, *aj, *dummy;
270   PetscScalar        *aa;
271   Mat_SeqAIJMKL      *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
272   sparse_index_base_t indexing;
273 
274   PetscFunctionBegin;
275   PetscCall(PetscViewerASCIIPrintf(viewer, "Contents of MKL sparse matrix handle for MATSEQAIJMKL object:\n"));
276 
277   /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */
278   if (!aijmkl->csrA) {
279     PetscCall(PetscViewerASCIIPrintf(viewer, "MKL matrix handle is NULL\n"));
280     PetscFunctionReturn(PETSC_SUCCESS);
281   }
282 
283   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
284   PetscCallExternal(mkl_sparse_x_export_csr, aijmkl->csrA, &indexing, (MKL_INT *)&nrows, (MKL_INT *)&ncols, (MKL_INT **)&ai, (MKL_INT **)&dummy, (MKL_INT **)&aj, &aa);
285 
286   k = 0;
287   for (i = 0; i < nrows; i++) {
288     PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ": ", i));
289     nz = ai[i + 1] - ai[i];
290     for (j = 0; j < nz; j++) {
291       if (aa) {
292         PetscCall(PetscViewerASCIIPrintf(viewer, "(%" PetscInt_FMT ", %g)  ", aj[k], PetscRealPart(aa[k])));
293       } else {
294         PetscCall(PetscViewerASCIIPrintf(viewer, "(%" PetscInt_FMT ", NULL)", aj[k]));
295       }
296       k++;
297     }
298     PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
299   }
300   PetscFunctionReturn(PETSC_SUCCESS);
301 }
302 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
303 
304 PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
305 {
306   Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
307   Mat_SeqAIJMKL *aijmkl_dest;
308 
309   PetscFunctionBegin;
310   PetscCall(MatDuplicate_SeqAIJ(A, op, M));
311   aijmkl_dest = (Mat_SeqAIJMKL *)(*M)->spptr;
312   PetscCall(PetscArraycpy(aijmkl_dest, aijmkl, 1));
313   aijmkl_dest->sparse_optimized = PETSC_FALSE;
314   if (aijmkl->eager_inspection) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
315   PetscFunctionReturn(PETSC_SUCCESS);
316 }
317 
318 PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode)
319 {
320   Mat_SeqAIJ    *a = (Mat_SeqAIJ *)A->data;
321   Mat_SeqAIJMKL *aijmkl;
322 
323   PetscFunctionBegin;
324   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(PETSC_SUCCESS);
325 
326   /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some
327    * extra information and some different methods, call the AssemblyEnd
328    * routine for a MATSEQAIJ.
329    * I'm not sure if this is the best way to do this, but it avoids
330    * a lot of code duplication. */
331   a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */
332   PetscCall(MatAssemblyEnd_SeqAIJ(A, mode));
333 
334   /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks).
335    * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */
336   aijmkl = (Mat_SeqAIJMKL *)A->spptr;
337   if (aijmkl->eager_inspection) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
338 
339   PetscFunctionReturn(PETSC_SUCCESS);
340 }
341 
342 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
343 PetscErrorCode MatMult_SeqAIJMKL(Mat A, Vec xx, Vec yy)
344 {
345   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
346   const PetscScalar *x;
347   PetscScalar       *y;
348   const MatScalar   *aa;
349   PetscInt           m     = A->rmap->n;
350   PetscInt           n     = A->cmap->n;
351   PetscScalar        alpha = 1.0;
352   PetscScalar        beta  = 0.0;
353   const PetscInt    *aj, *ai;
354   char               matdescra[6];
355 
356   /* Variables not in MatMult_SeqAIJ. */
357   char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */
358 
359   PetscFunctionBegin;
360   matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
361   matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
362   PetscCall(VecGetArrayRead(xx, &x));
363   PetscCall(VecGetArray(yy, &y));
364   aj = a->j; /* aj[k] gives column index for element aa[k]. */
365   aa = a->a; /* Nonzero elements stored row-by-row. */
366   ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
367 
368   /* Call MKL sparse BLAS routine to do the MatMult. */
369   mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, y);
370 
371   PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt));
372   PetscCall(VecRestoreArrayRead(xx, &x));
373   PetscCall(VecRestoreArray(yy, &y));
374   PetscFunctionReturn(PETSC_SUCCESS);
375 }
376 #endif
377 
378 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
379 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy)
380 {
381   Mat_SeqAIJ        *a      = (Mat_SeqAIJ *)A->data;
382   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
383   const PetscScalar *x;
384   PetscScalar       *y;
385   PetscObjectState   state;
386 
387   PetscFunctionBegin;
388 
389   /* If there are no nonzero entries, zero yy and return immediately. */
390   if (!a->nz) {
391     PetscCall(VecGetArray(yy, &y));
392     PetscCall(PetscArrayzero(y, A->rmap->n));
393     PetscCall(VecRestoreArray(yy, &y));
394     PetscFunctionReturn(PETSC_SUCCESS);
395   }
396 
397   PetscCall(VecGetArrayRead(xx, &x));
398   PetscCall(VecGetArray(yy, &y));
399 
400   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
401    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
402    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
403   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
404   if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
405 
406   /* Call MKL SpMV2 executor routine to do the MatMult. */
407   PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, y);
408 
409   PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt));
410   PetscCall(VecRestoreArrayRead(xx, &x));
411   PetscCall(VecRestoreArray(yy, &y));
412   PetscFunctionReturn(PETSC_SUCCESS);
413 }
414 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
415 
416 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
417 PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A, Vec xx, Vec yy)
418 {
419   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
420   const PetscScalar *x;
421   PetscScalar       *y;
422   const MatScalar   *aa;
423   PetscInt           m     = A->rmap->n;
424   PetscInt           n     = A->cmap->n;
425   PetscScalar        alpha = 1.0;
426   PetscScalar        beta  = 0.0;
427   const PetscInt    *aj, *ai;
428   char               matdescra[6];
429 
430   /* Variables not in MatMultTranspose_SeqAIJ. */
431   char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */
432 
433   PetscFunctionBegin;
434   matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
435   matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
436   PetscCall(VecGetArrayRead(xx, &x));
437   PetscCall(VecGetArray(yy, &y));
438   aj = a->j; /* aj[k] gives column index for element aa[k]. */
439   aa = a->a; /* Nonzero elements stored row-by-row. */
440   ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
441 
442   /* Call MKL sparse BLAS routine to do the MatMult. */
443   mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, y);
444 
445   PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt));
446   PetscCall(VecRestoreArrayRead(xx, &x));
447   PetscCall(VecRestoreArray(yy, &y));
448   PetscFunctionReturn(PETSC_SUCCESS);
449 }
450 #endif
451 
452 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
453 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy)
454 {
455   Mat_SeqAIJ        *a      = (Mat_SeqAIJ *)A->data;
456   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
457   const PetscScalar *x;
458   PetscScalar       *y;
459   PetscObjectState   state;
460 
461   PetscFunctionBegin;
462 
463   /* If there are no nonzero entries, zero yy and return immediately. */
464   if (!a->nz) {
465     PetscCall(VecGetArray(yy, &y));
466     PetscCall(PetscArrayzero(y, A->cmap->n));
467     PetscCall(VecRestoreArray(yy, &y));
468     PetscFunctionReturn(PETSC_SUCCESS);
469   }
470 
471   PetscCall(VecGetArrayRead(xx, &x));
472   PetscCall(VecGetArray(yy, &y));
473 
474   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
475    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
476    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
477   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
478   if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
479 
480   /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
481   PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, y);
482 
483   PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt));
484   PetscCall(VecRestoreArrayRead(xx, &x));
485   PetscCall(VecRestoreArray(yy, &y));
486   PetscFunctionReturn(PETSC_SUCCESS);
487 }
488 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
489 
490 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
491 PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A, Vec xx, Vec yy, Vec zz)
492 {
493   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
494   const PetscScalar *x;
495   PetscScalar       *y, *z;
496   const MatScalar   *aa;
497   PetscInt           m = A->rmap->n;
498   PetscInt           n = A->cmap->n;
499   const PetscInt    *aj, *ai;
500   PetscInt           i;
501 
502   /* Variables not in MatMultAdd_SeqAIJ. */
503   char        transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */
504   PetscScalar alpha  = 1.0;
505   PetscScalar beta;
506   char        matdescra[6];
507 
508   PetscFunctionBegin;
509   matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
510   matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
511 
512   PetscCall(VecGetArrayRead(xx, &x));
513   PetscCall(VecGetArrayPair(yy, zz, &y, &z));
514   aj = a->j; /* aj[k] gives column index for element aa[k]. */
515   aa = a->a; /* Nonzero elements stored row-by-row. */
516   ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
517 
518   /* Call MKL sparse BLAS routine to do the MatMult. */
519   if (zz == yy) {
520     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
521     beta = 1.0;
522     mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z);
523   } else {
524     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
525      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
526     beta = 0.0;
527     mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z);
528     for (i = 0; i < m; i++) z[i] += y[i];
529   }
530 
531   PetscCall(PetscLogFlops(2.0 * a->nz));
532   PetscCall(VecRestoreArrayRead(xx, &x));
533   PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
534   PetscFunctionReturn(PETSC_SUCCESS);
535 }
536 #endif
537 
538 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
539 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy, Vec zz)
540 {
541   Mat_SeqAIJ        *a      = (Mat_SeqAIJ *)A->data;
542   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
543   const PetscScalar *x;
544   PetscScalar       *y, *z;
545   PetscInt           m = A->rmap->n;
546   PetscInt           i;
547 
548   /* Variables not in MatMultAdd_SeqAIJ. */
549   PetscObjectState state;
550 
551   PetscFunctionBegin;
552 
553   /* If there are no nonzero entries, set zz = yy and return immediately. */
554   if (!a->nz) {
555     PetscCall(VecGetArrayPair(yy, zz, &y, &z));
556     PetscCall(PetscArraycpy(z, y, m));
557     PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
558     PetscFunctionReturn(PETSC_SUCCESS);
559   }
560 
561   PetscCall(VecGetArrayRead(xx, &x));
562   PetscCall(VecGetArrayPair(yy, zz, &y, &z));
563 
564   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
565    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
566    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
567   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
568   if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
569 
570   /* Call MKL sparse BLAS routine to do the MatMult. */
571   if (zz == yy) {
572     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
573      * with alpha and beta both set to 1.0. */
574     PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 1.0, z);
575   } else {
576     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
577      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
578     PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, z);
579     for (i = 0; i < m; i++) z[i] += y[i];
580   }
581 
582   PetscCall(PetscLogFlops(2.0 * a->nz));
583   PetscCall(VecRestoreArrayRead(xx, &x));
584   PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
585   PetscFunctionReturn(PETSC_SUCCESS);
586 }
587 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
588 
589 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
590 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A, Vec xx, Vec yy, Vec zz)
591 {
592   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
593   const PetscScalar *x;
594   PetscScalar       *y, *z;
595   const MatScalar   *aa;
596   PetscInt           m = A->rmap->n;
597   PetscInt           n = A->cmap->n;
598   const PetscInt    *aj, *ai;
599   PetscInt           i;
600 
601   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
602   char        transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */
603   PetscScalar alpha  = 1.0;
604   PetscScalar beta;
605   char        matdescra[6];
606 
607   PetscFunctionBegin;
608   matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
609   matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
610 
611   PetscCall(VecGetArrayRead(xx, &x));
612   PetscCall(VecGetArrayPair(yy, zz, &y, &z));
613   aj = a->j; /* aj[k] gives column index for element aa[k]. */
614   aa = a->a; /* Nonzero elements stored row-by-row. */
615   ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
616 
617   /* Call MKL sparse BLAS routine to do the MatMult. */
618   if (zz == yy) {
619     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
620     beta = 1.0;
621     mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z);
622   } else {
623     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
624      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
625     beta = 0.0;
626     mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z);
627     for (i = 0; i < n; i++) z[i] += y[i];
628   }
629 
630   PetscCall(PetscLogFlops(2.0 * a->nz));
631   PetscCall(VecRestoreArrayRead(xx, &x));
632   PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
633   PetscFunctionReturn(PETSC_SUCCESS);
634 }
635 #endif
636 
637 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
638 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy, Vec zz)
639 {
640   Mat_SeqAIJ        *a      = (Mat_SeqAIJ *)A->data;
641   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
642   const PetscScalar *x;
643   PetscScalar       *y, *z;
644   PetscInt           n = A->cmap->n;
645   PetscInt           i;
646   PetscObjectState   state;
647 
648   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
649 
650   PetscFunctionBegin;
651 
652   /* If there are no nonzero entries, set zz = yy and return immediately. */
653   if (!a->nz) {
654     PetscCall(VecGetArrayPair(yy, zz, &y, &z));
655     PetscCall(PetscArraycpy(z, y, n));
656     PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
657     PetscFunctionReturn(PETSC_SUCCESS);
658   }
659 
660   PetscCall(VecGetArrayRead(xx, &x));
661   PetscCall(VecGetArrayPair(yy, zz, &y, &z));
662 
663   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
664    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
665    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
666   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
667   if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
668 
669   /* Call MKL sparse BLAS routine to do the MatMult. */
670   if (zz == yy) {
671     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
672      * with alpha and beta both set to 1.0. */
673     PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 1.0, z);
674   } else {
675     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
676      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
677     PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, z);
678     for (i = 0; i < n; i++) z[i] += y[i];
679   }
680 
681   PetscCall(PetscLogFlops(2.0 * a->nz));
682   PetscCall(VecRestoreArrayRead(xx, &x));
683   PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
684   PetscFunctionReturn(PETSC_SUCCESS);
685 }
686 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
687 
688 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
689 static PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(Mat A, const sparse_operation_t transA, Mat B, const sparse_operation_t transB, Mat C)
690 {
691   Mat_SeqAIJMKL      *a = (Mat_SeqAIJMKL *)A->spptr, *b = (Mat_SeqAIJMKL *)B->spptr;
692   sparse_matrix_t     csrA, csrB, csrC;
693   PetscInt            nrows, ncols;
694   struct matrix_descr descr_type_gen;
695   PetscObjectState    state;
696 
697   PetscFunctionBegin;
698   /* Determine the number of rows and columns that the result matrix C will have. We have to do this ourselves because MKL does
699    * not handle sparse matrices with zero rows or columns. */
700   if (transA == SPARSE_OPERATION_NON_TRANSPOSE) nrows = A->rmap->N;
701   else nrows = A->cmap->N;
702   if (transB == SPARSE_OPERATION_NON_TRANSPOSE) ncols = B->cmap->N;
703   else ncols = B->rmap->N;
704 
705   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
706   if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
707   PetscCall(PetscObjectStateGet((PetscObject)B, &state));
708   if (!b->sparse_optimized || b->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(B));
709   csrA                = a->csrA;
710   csrB                = b->csrA;
711   descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
712 
713   if (csrA && csrB) {
714     PetscCallExternal(mkl_sparse_sp2m, transA, descr_type_gen, csrA, transB, descr_type_gen, csrB, SPARSE_STAGE_FULL_MULT_NO_VAL, &csrC);
715   } else {
716     csrC = NULL;
717   }
718 
719   PetscCall(MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF, csrC, nrows, ncols, C));
720 
721   PetscFunctionReturn(PETSC_SUCCESS);
722 }
723 
724 PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(Mat A, const sparse_operation_t transA, Mat B, const sparse_operation_t transB, Mat C)
725 {
726   Mat_SeqAIJMKL      *a = (Mat_SeqAIJMKL *)A->spptr, *b = (Mat_SeqAIJMKL *)B->spptr, *c = (Mat_SeqAIJMKL *)C->spptr;
727   sparse_matrix_t     csrA, csrB, csrC;
728   struct matrix_descr descr_type_gen;
729   PetscObjectState    state;
730 
731   PetscFunctionBegin;
732   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
733   if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
734   PetscCall(PetscObjectStateGet((PetscObject)B, &state));
735   if (!b->sparse_optimized || b->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(B));
736   csrA                = a->csrA;
737   csrB                = b->csrA;
738   csrC                = c->csrA;
739   descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
740 
741   if (csrA && csrB) {
742     PetscCallExternal(mkl_sparse_sp2m, transA, descr_type_gen, csrA, transB, descr_type_gen, csrB, SPARSE_STAGE_FINALIZE_MULT, &csrC);
743   } else {
744     csrC = NULL;
745   }
746 
747   /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */
748   PetscCall(MatSeqAIJMKL_update_from_mkl_handle(C));
749 
750   PetscFunctionReturn(PETSC_SUCCESS);
751 }
752 
753 PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C)
754 {
755   PetscFunctionBegin;
756   PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C));
757   PetscFunctionReturn(PETSC_SUCCESS);
758 }
759 
760 PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C)
761 {
762   PetscFunctionBegin;
763   PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C));
764   PetscFunctionReturn(PETSC_SUCCESS);
765 }
766 
767 PetscErrorCode MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C)
768 {
769   PetscFunctionBegin;
770   PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C));
771   PetscFunctionReturn(PETSC_SUCCESS);
772 }
773 
774 PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C)
775 {
776   PetscFunctionBegin;
777   PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C));
778   PetscFunctionReturn(PETSC_SUCCESS);
779 }
780 
781 PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C)
782 {
783   PetscFunctionBegin;
784   PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_TRANSPOSE, C));
785   PetscFunctionReturn(PETSC_SUCCESS);
786 }
787 
788 PetscErrorCode MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C)
789 {
790   PetscFunctionBegin;
791   PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_TRANSPOSE, C));
792   PetscFunctionReturn(PETSC_SUCCESS);
793 }
794 
795 static PetscErrorCode MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL(Mat C)
796 {
797   Mat_Product *product = C->product;
798   Mat          A = product->A, B = product->B;
799 
800   PetscFunctionBegin;
801   PetscCall(MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(A, B, C));
802   PetscFunctionReturn(PETSC_SUCCESS);
803 }
804 
805 static PetscErrorCode MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL(Mat C)
806 {
807   Mat_Product *product = C->product;
808   Mat          A = product->A, B = product->B;
809   PetscReal    fill = product->fill;
810 
811   PetscFunctionBegin;
812   PetscCall(MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(A, B, fill, C));
813   C->ops->productnumeric = MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL;
814   PetscFunctionReturn(PETSC_SUCCESS);
815 }
816 
817 PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat A, Mat P, Mat C)
818 {
819   Mat                 Ct;
820   Vec                 zeros;
821   Mat_SeqAIJMKL      *a = (Mat_SeqAIJMKL *)A->spptr, *p = (Mat_SeqAIJMKL *)P->spptr, *c = (Mat_SeqAIJMKL *)C->spptr;
822   sparse_matrix_t     csrA, csrP, csrC;
823   PetscBool           set, flag;
824   struct matrix_descr descr_type_sym;
825   PetscObjectState    state;
826 
827   PetscFunctionBegin;
828   PetscCall(MatIsSymmetricKnown(A, &set, &flag));
829   PetscCheck(set && flag, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal() called on matrix A not marked as symmetric");
830 
831   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
832   if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
833   PetscCall(PetscObjectStateGet((PetscObject)P, &state));
834   if (!p->sparse_optimized || p->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(P));
835   csrA                = a->csrA;
836   csrP                = p->csrA;
837   csrC                = c->csrA;
838   descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
839   descr_type_sym.mode = SPARSE_FILL_MODE_UPPER;
840   descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
841 
842   /* the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
843   PetscCallExternal(mkl_sparse_sypr, SPARSE_OPERATION_TRANSPOSE, csrP, csrA, descr_type_sym, &csrC, SPARSE_STAGE_FINALIZE_MULT);
844 
845   /* Update the PETSc AIJ representation for matrix C from contents of MKL handle.
846    * This is more complicated than it should be: it turns out that, though mkl_sparse_sypr() will accept a full AIJ/CSR matrix,
847    * the output matrix only contains the upper or lower triangle (we arbitrarily have chosen upper) of the symmetric matrix.
848    * We have to fill in the missing portion, which we currently do below by forming the transpose and performing at MatAXPY
849    * operation. This may kill any performance benefit of using the optimized mkl_sparse_sypr() routine. Performance might
850    * improve if we come up with a more efficient way to do this, or we can convince the MKL team to provide an option to output
851    * the full matrix. */
852   PetscCall(MatSeqAIJMKL_update_from_mkl_handle(C));
853   PetscCall(MatTranspose(C, MAT_INITIAL_MATRIX, &Ct));
854   PetscCall(MatCreateVecs(C, &zeros, NULL));
855   PetscCall(VecSetFromOptions(zeros));
856   PetscCall(VecZeroEntries(zeros));
857   PetscCall(MatDiagonalSet(Ct, zeros, INSERT_VALUES));
858   PetscCall(MatAXPY(C, 1.0, Ct, DIFFERENT_NONZERO_PATTERN));
859   /* Note: The MatAXPY() call destroys the MatProduct, so we must recreate it. */
860   PetscCall(MatProductCreateWithMat(A, P, NULL, C));
861   PetscCall(MatProductSetType(C, MATPRODUCT_PtAP));
862   PetscCall(MatSeqAIJMKL_create_mkl_handle(C));
863   PetscCall(VecDestroy(&zeros));
864   PetscCall(MatDestroy(&Ct));
865   PetscFunctionReturn(PETSC_SUCCESS);
866 }
867 
868 PetscErrorCode MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat C)
869 {
870   Mat_Product        *product = C->product;
871   Mat                 A = product->A, P = product->B;
872   Mat_SeqAIJMKL      *a = (Mat_SeqAIJMKL *)A->spptr, *p = (Mat_SeqAIJMKL *)P->spptr;
873   sparse_matrix_t     csrA, csrP, csrC;
874   struct matrix_descr descr_type_sym;
875   PetscObjectState    state;
876 
877   PetscFunctionBegin;
878   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
879   if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
880   PetscCall(PetscObjectStateGet((PetscObject)P, &state));
881   if (!p->sparse_optimized || p->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(P));
882   csrA                = a->csrA;
883   csrP                = p->csrA;
884   descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
885   descr_type_sym.mode = SPARSE_FILL_MODE_UPPER;
886   descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
887 
888   /* the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
889   if (csrP && csrA) {
890     PetscCallExternal(mkl_sparse_sypr, SPARSE_OPERATION_TRANSPOSE, csrP, csrA, descr_type_sym, &csrC, SPARSE_STAGE_FULL_MULT_NO_VAL);
891   } else {
892     csrC = NULL;
893   }
894 
895   /* Update the I and J arrays of the PETSc AIJ representation for matrix C from contents of MKL handle.
896    * Note that, because mkl_sparse_sypr() only computes one triangle of the symmetric matrix, this representation will only contain
897    * the upper triangle of the symmetric matrix. We fix this in MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(). I believe that
898    * leaving things in this incomplete state is OK because the numeric product should follow soon after, but am not certain if this
899    * is guaranteed. */
900   PetscCall(MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF, csrC, P->cmap->N, P->cmap->N, C));
901 
902   C->ops->productnumeric = MatProductNumeric_PtAP;
903   PetscFunctionReturn(PETSC_SUCCESS);
904 }
905 
906 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AB(Mat C)
907 {
908   PetscFunctionBegin;
909   C->ops->productsymbolic = MatProductSymbolic_AB;
910   C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL;
911   PetscFunctionReturn(PETSC_SUCCESS);
912 }
913 
914 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AtB(Mat C)
915 {
916   PetscFunctionBegin;
917   C->ops->productsymbolic = MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL;
918   PetscFunctionReturn(PETSC_SUCCESS);
919 }
920 
921 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABt(Mat C)
922 {
923   PetscFunctionBegin;
924   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ;
925   C->ops->productsymbolic          = MatProductSymbolic_ABt;
926   PetscFunctionReturn(PETSC_SUCCESS);
927 }
928 
929 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_PtAP(Mat C)
930 {
931   Mat_Product *product = C->product;
932   Mat          A       = product->A;
933   PetscBool    set, flag;
934 
935   PetscFunctionBegin;
936   if (PetscDefined(USE_COMPLEX)) {
937     /* By setting C->ops->productsymbolic to NULL, we ensure that MatProductSymbolic_Unsafe() will be used.
938      * We do this in several other locations in this file. This works for the time being, but these
939      * routines are considered unsafe and may be removed from the MatProduct code in the future.
940      * TODO: Add proper MATSEQAIJMKL implementations */
941     C->ops->productsymbolic = NULL;
942   } else {
943     /* AIJMKL only has an optimized routine for PtAP when A is symmetric and real. */
944     PetscCall(MatIsSymmetricKnown(A, &set, &flag));
945     if (set && flag) C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal;
946     else C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
947     /* we don't set C->ops->productnumeric here, as this must happen in MatProductSymbolic_PtAP_XXX(),
948      * depending on whether the algorithm for the general case vs. the real symmetric one is used. */
949   }
950   PetscFunctionReturn(PETSC_SUCCESS);
951 }
952 
953 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_RARt(Mat C)
954 {
955   PetscFunctionBegin;
956   C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
957   PetscFunctionReturn(PETSC_SUCCESS);
958 }
959 
960 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABC(Mat C)
961 {
962   PetscFunctionBegin;
963   C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
964   PetscFunctionReturn(PETSC_SUCCESS);
965 }
966 
967 PetscErrorCode MatProductSetFromOptions_SeqAIJMKL(Mat C)
968 {
969   Mat_Product *product = C->product;
970 
971   PetscFunctionBegin;
972   switch (product->type) {
973   case MATPRODUCT_AB:
974     PetscCall(MatProductSetFromOptions_SeqAIJMKL_AB(C));
975     break;
976   case MATPRODUCT_AtB:
977     PetscCall(MatProductSetFromOptions_SeqAIJMKL_AtB(C));
978     break;
979   case MATPRODUCT_ABt:
980     PetscCall(MatProductSetFromOptions_SeqAIJMKL_ABt(C));
981     break;
982   case MATPRODUCT_PtAP:
983     PetscCall(MatProductSetFromOptions_SeqAIJMKL_PtAP(C));
984     break;
985   case MATPRODUCT_RARt:
986     PetscCall(MatProductSetFromOptions_SeqAIJMKL_RARt(C));
987     break;
988   case MATPRODUCT_ABC:
989     PetscCall(MatProductSetFromOptions_SeqAIJMKL_ABC(C));
990     break;
991   default:
992     break;
993   }
994   PetscFunctionReturn(PETSC_SUCCESS);
995 }
996 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
997 
998 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a
999  * SeqAIJMKL matrix.  This routine is called by the MatCreate_SeqAIJMKL()
1000  * routine, but can also be used to convert an assembled SeqAIJ matrix
1001  * into a SeqAIJMKL one. */
1002 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A, MatType type, MatReuse reuse, Mat *newmat)
1003 {
1004   Mat            B = *newmat;
1005   Mat_SeqAIJMKL *aijmkl;
1006   PetscBool      set;
1007   PetscBool      sametype;
1008 
1009   PetscFunctionBegin;
1010   if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B));
1011 
1012   PetscCall(PetscObjectTypeCompare((PetscObject)A, type, &sametype));
1013   if (sametype) PetscFunctionReturn(PETSC_SUCCESS);
1014 
1015   PetscCall(PetscNew(&aijmkl));
1016   B->spptr = (void *)aijmkl;
1017 
1018   /* Set function pointers for methods that we inherit from AIJ but override.
1019    * We also parse some command line options below, since those determine some of the methods we point to. */
1020   B->ops->duplicate   = MatDuplicate_SeqAIJMKL;
1021   B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL;
1022   B->ops->destroy     = MatDestroy_SeqAIJMKL;
1023 
1024   aijmkl->sparse_optimized = PETSC_FALSE;
1025 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1026   aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */
1027 #else
1028   aijmkl->no_SpMV2 = PETSC_TRUE;
1029 #endif
1030   aijmkl->eager_inspection = PETSC_FALSE;
1031 
1032   /* Parse command line options. */
1033   PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "AIJMKL Options", "Mat");
1034   PetscCall(PetscOptionsBool("-mat_aijmkl_no_spmv2", "Disable use of inspector-executor (SpMV 2) routines", "None", (PetscBool)aijmkl->no_SpMV2, (PetscBool *)&aijmkl->no_SpMV2, &set));
1035   PetscCall(PetscOptionsBool("-mat_aijmkl_eager_inspection", "Run inspection at matrix assembly time, instead of waiting until needed by an operation", "None", (PetscBool)aijmkl->eager_inspection, (PetscBool *)&aijmkl->eager_inspection, &set));
1036   PetscOptionsEnd();
1037 #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1038   if (!aijmkl->no_SpMV2) {
1039     PetscCall(PetscInfo(B, "User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize();  defaulting to non-SpMV2 routines.\n"));
1040     aijmkl->no_SpMV2 = PETSC_TRUE;
1041   }
1042 #endif
1043 
1044 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1045   B->ops->mult             = MatMult_SeqAIJMKL_SpMV2;
1046   B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL_SpMV2;
1047   B->ops->multadd          = MatMultAdd_SeqAIJMKL_SpMV2;
1048   B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2;
1049   #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
1050   B->ops->productsetfromoptions   = MatProductSetFromOptions_SeqAIJMKL;
1051   B->ops->matmultsymbolic         = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL;
1052   B->ops->matmultnumeric          = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL;
1053   B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL;
1054   B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL;
1055     #if !defined(PETSC_USE_COMPLEX)
1056   B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal;
1057     #else
1058   B->ops->ptapnumeric = NULL;
1059     #endif
1060   #endif
1061 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
1062 
1063 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
1064   /* In MKL version 18, update 2, the old sparse BLAS interfaces were marked as deprecated. If "no_SpMV2" has been specified by the
1065    * user and the old SpBLAS interfaces are deprecated in our MKL version, we use the new _SpMV2 routines (set above), but do not
1066    * call mkl_sparse_optimize(), which results in the old numerical kernels (without the inspector-executor model) being used. For
1067    * versions in which the older interface has not been deprecated, we use the old interface. */
1068   if (aijmkl->no_SpMV2) {
1069     B->ops->mult             = MatMult_SeqAIJMKL;
1070     B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL;
1071     B->ops->multadd          = MatMultAdd_SeqAIJMKL;
1072     B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL;
1073   }
1074 #endif
1075 
1076   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqaijmkl_seqaij_C", MatConvert_SeqAIJMKL_SeqAIJ));
1077 
1078   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJMKL));
1079   *newmat = B;
1080   PetscFunctionReturn(PETSC_SUCCESS);
1081 }
1082 
1083 /*@C
1084    MatCreateSeqAIJMKL - Creates a sparse matrix of type `MATSEQAIJMKL`.
1085 
1086    Collective
1087 
1088    Input Parameters:
1089 +  comm - MPI communicator, set to `PETSC_COMM_SELF`
1090 .  m - number of rows
1091 .  n - number of columns
1092 .  nz - number of nonzeros per row (same for all rows)
1093 -  nnz - array containing the number of nonzeros in the various rows
1094          (possibly different for each row) or `NULL`
1095 
1096    Output Parameter:
1097 .  A - the matrix
1098 
1099    Options Database Keys:
1100 +  -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines
1101 -  -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection,
1102                                   performing this step the first time the matrix is applied
1103 
1104    Level: intermediate
1105 
1106    Notes:
1107    If `nnz` is given then `nz` is ignored
1108 
1109    This type inherits from `MATSEQAIJ` and is largely identical, but uses sparse BLAS
1110    routines from Intel MKL whenever possible.
1111 
1112   If the installed version of MKL supports the "SpMV2" sparse
1113    inspector-executor routines, then those are used by default.
1114 
1115   `MatMult()`, `MatMultAdd()`, `MatMultTranspose()`, `MatMultTransposeAdd()`, `MatMatMult()`, `MatTransposeMatMult()`, and `MatPtAP()`
1116    (for symmetric A) operations are currently supported.
1117    MKL version 18, update 2 or later is required for `MatPtAP()`, `MatPtAPNumeric()` and `MatMatMultNumeric()`.
1118 
1119 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateMPIAIJMKL()`, `MatSetValues()`
1120 @*/
1121 PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A)
1122 {
1123   PetscFunctionBegin;
1124   PetscCall(MatCreate(comm, A));
1125   PetscCall(MatSetSizes(*A, m, n, m, n));
1126   PetscCall(MatSetType(*A, MATSEQAIJMKL));
1127   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(*A, nz, nnz));
1128   PetscFunctionReturn(PETSC_SUCCESS);
1129 }
1130 
1131 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A)
1132 {
1133   PetscFunctionBegin;
1134   PetscCall(MatSetType(A, MATSEQAIJ));
1135   PetscCall(MatConvert_SeqAIJ_SeqAIJMKL(A, MATSEQAIJMKL, MAT_INPLACE_MATRIX, &A));
1136   PetscFunctionReturn(PETSC_SUCCESS);
1137 }
1138