xref: /petsc/src/mat/impls/aij/seq/aijmkl/aijmkl.c (revision d9acb416d05abeed0a33bde3a81aeb2ea0364f6a)
1 /*
2   Defines basic operations for the MATSEQAIJMKL matrix class.
3   This class is derived from the MATSEQAIJ class and retains the
4   compressed row storage (aka Yale sparse matrix format) but uses
5   sparse BLAS operations from the Intel Math Kernel Library (MKL)
6   wherever possible.
7 */
8 
9 #include <../src/mat/impls/aij/seq/aij.h>
10 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h>
11 #if defined(PETSC_HAVE_MKL_INTEL_ILP64)
12   #define MKL_ILP64
13 #endif
14 #include <mkl_spblas.h>
15 
16 typedef struct {
17   PetscBool        no_SpMV2;         /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */
18   PetscBool        eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */
19   PetscBool        sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
20   PetscObjectState state;
21 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
22   sparse_matrix_t     csrA; /* "Handle" used by SpMV2 inspector-executor routines. */
23   struct matrix_descr descr;
24 #endif
25 } Mat_SeqAIJMKL;
26 
27 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat, MatAssemblyType);
28 
29 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A, MatType type, MatReuse reuse, Mat *newmat)
30 {
31   /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */
32   /* so we will ignore 'MatType type'. */
33   Mat B = *newmat;
34 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
35   Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
36 #endif
37 
38   PetscFunctionBegin;
39   if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B));
40 
41   /* Reset the original function pointers. */
42   B->ops->duplicate               = MatDuplicate_SeqAIJ;
43   B->ops->assemblyend             = MatAssemblyEnd_SeqAIJ;
44   B->ops->destroy                 = MatDestroy_SeqAIJ;
45   B->ops->mult                    = MatMult_SeqAIJ;
46   B->ops->multtranspose           = MatMultTranspose_SeqAIJ;
47   B->ops->multadd                 = MatMultAdd_SeqAIJ;
48   B->ops->multtransposeadd        = MatMultTransposeAdd_SeqAIJ;
49   B->ops->productsetfromoptions   = MatProductSetFromOptions_SeqAIJ;
50   B->ops->matmultsymbolic         = MatMatMultSymbolic_SeqAIJ_SeqAIJ;
51   B->ops->matmultnumeric          = MatMatMultNumeric_SeqAIJ_SeqAIJ;
52   B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ;
53   B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ;
54   B->ops->ptapnumeric             = MatPtAPNumeric_SeqAIJ_SeqAIJ;
55 
56   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqaijmkl_seqaij_C", NULL));
57 
58 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
59   /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this
60    * simply involves destroying the MKL sparse matrix handle and then freeing
61    * the spptr pointer. */
62   if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL *)B->spptr;
63 
64   if (aijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA);
65 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
66   PetscCall(PetscFree(B->spptr));
67 
68   /* Change the type of B to MATSEQAIJ. */
69   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ));
70 
71   *newmat = B;
72   PetscFunctionReturn(PETSC_SUCCESS);
73 }
74 
75 static PetscErrorCode MatDestroy_SeqAIJMKL(Mat A)
76 {
77   Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
78 
79   PetscFunctionBegin;
80 
81   /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an spptr pointer. */
82   if (aijmkl) {
83     /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */
84 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
85     if (aijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA);
86 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
87     PetscCall(PetscFree(A->spptr));
88   }
89 
90   /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
91    * to destroy everything that remains. */
92   PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ));
93   /* I don't call MatSetType().  I believe this is because that
94    * is only to be called when *building* a matrix.  I could be wrong, but
95    * that is how things work for the SuperLU matrix class. */
96   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqaijmkl_seqaij_C", NULL));
97   PetscCall(MatDestroy_SeqAIJ(A));
98   PetscFunctionReturn(PETSC_SUCCESS);
99 }
100 
101 /* MatSeqAIJMKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it,
102  * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize().
103  * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix
104  * handle, creates a new one, and then calls mkl_sparse_optimize().
105  * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been
106  * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of
107  * an unoptimized matrix handle here. */
108 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A)
109 {
110 #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
111   /* If the MKL library does not have mkl_sparse_optimize(), then this routine
112    * does nothing. We make it callable anyway in this case because it cuts
113    * down on littering the code with #ifdefs. */
114   PetscFunctionBegin;
115   PetscFunctionReturn(PETSC_SUCCESS);
116 #else
117   Mat_SeqAIJ    *a      = (Mat_SeqAIJ *)A->data;
118   Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
119   PetscInt       m, n;
120   MatScalar     *aa;
121   PetscInt      *aj, *ai;
122 
123   PetscFunctionBegin;
124   #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
125   /* For MKL versions that still support the old, non-inspector-executor interfaces versions, we simply exit here if the no_SpMV2
126    * option has been specified. For versions that have deprecated the old interfaces (version 18, update 2 and later), we must
127    * use the new inspector-executor interfaces, but we can still use the old, non-inspector-executor code by not calling
128    * mkl_sparse_optimize() later. */
129   if (aijmkl->no_SpMV2) PetscFunctionReturn(PETSC_SUCCESS);
130   #endif
131 
132   if (aijmkl->sparse_optimized) {
133     /* Matrix has been previously assembled and optimized. Must destroy old
134      * matrix handle before running the optimization step again. */
135     PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA);
136   }
137   aijmkl->sparse_optimized = PETSC_FALSE;
138 
139   /* Now perform the SpMV2 setup and matrix optimization. */
140   aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
141   aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
142   aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
143   m                  = A->rmap->n;
144   n                  = A->cmap->n;
145   aj                 = a->j; /* aj[k] gives column index for element aa[k]. */
146   aa                 = a->a; /* Nonzero elements stored row-by-row. */
147   ai                 = a->i; /* ai[k] is the position in aa and aj where row k starts. */
148   if (a->nz && aa && !A->structure_only) {
149     /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
150      * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
151     PetscCallExternal(mkl_sparse_x_create_csr, &aijmkl->csrA, SPARSE_INDEX_BASE_ZERO, (MKL_INT)m, (MKL_INT)n, (MKL_INT *)ai, (MKL_INT *)(ai + 1), (MKL_INT *)aj, aa);
152     PetscCallExternal(mkl_sparse_set_mv_hint, aijmkl->csrA, SPARSE_OPERATION_NON_TRANSPOSE, aijmkl->descr, 1000);
153     PetscCallExternal(mkl_sparse_set_memory_hint, aijmkl->csrA, SPARSE_MEMORY_AGGRESSIVE);
154     if (!aijmkl->no_SpMV2) PetscCallExternal(mkl_sparse_optimize, aijmkl->csrA);
155     aijmkl->sparse_optimized = PETSC_TRUE;
156     PetscCall(PetscObjectStateGet((PetscObject)A, &(aijmkl->state)));
157   } else {
158     aijmkl->csrA = NULL;
159   }
160 
161   PetscFunctionReturn(PETSC_SUCCESS);
162 #endif
163 }
164 
165 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
166 /* Take an already created but empty matrix and set up the nonzero structure from an MKL sparse matrix handle. */
167 static PetscErrorCode MatSeqAIJMKL_setup_structure_from_mkl_handle(MPI_Comm comm, sparse_matrix_t csrA, PetscInt nrows, PetscInt ncols, Mat A)
168 {
169   sparse_index_base_t indexing;
170   PetscInt            m, n;
171   PetscInt           *aj, *ai, *dummy;
172   MatScalar          *aa;
173   Mat_SeqAIJMKL      *aijmkl;
174 
175   PetscFunctionBegin;
176   if (csrA) {
177     /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
178     PetscCallExternal(mkl_sparse_x_export_csr, csrA, &indexing, (MKL_INT *)&m, (MKL_INT *)&n, (MKL_INT **)&ai, (MKL_INT **)&dummy, (MKL_INT **)&aj, &aa);
179     PetscCheck((m == nrows) && (n == ncols), PETSC_COMM_SELF, PETSC_ERR_LIB, "Number of rows/columns does not match those from mkl_sparse_x_export_csr()");
180   } else {
181     aj = ai = NULL;
182     aa      = NULL;
183   }
184 
185   PetscCall(MatSetType(A, MATSEQAIJ));
186   PetscCall(MatSetSizes(A, PETSC_DECIDE, PETSC_DECIDE, nrows, ncols));
187   /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported
188    * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and
189    * they will be destroyed when the MKL handle is destroyed.
190    * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */
191   if (csrA) {
192     PetscCall(MatSeqAIJSetPreallocationCSR(A, ai, aj, NULL));
193   } else {
194     /* Since MatSeqAIJSetPreallocationCSR does initial set up and assembly begin/end, we must do that ourselves here. */
195     PetscCall(MatSetUp(A));
196     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
197     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
198   }
199 
200   /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle.
201    * Now turn it into a MATSEQAIJMKL. */
202   PetscCall(MatConvert_SeqAIJ_SeqAIJMKL(A, MATSEQAIJMKL, MAT_INPLACE_MATRIX, &A));
203 
204   aijmkl       = (Mat_SeqAIJMKL *)A->spptr;
205   aijmkl->csrA = csrA;
206 
207   /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but
208    * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one,
209    * and just need to be able to run the MKL optimization step. */
210   aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
211   aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
212   aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
213   if (csrA) {
214     PetscCallExternal(mkl_sparse_set_mv_hint, aijmkl->csrA, SPARSE_OPERATION_NON_TRANSPOSE, aijmkl->descr, 1000);
215     PetscCallExternal(mkl_sparse_set_memory_hint, aijmkl->csrA, SPARSE_MEMORY_AGGRESSIVE);
216   }
217   PetscCall(PetscObjectStateGet((PetscObject)A, &(aijmkl->state)));
218   PetscFunctionReturn(PETSC_SUCCESS);
219 }
220 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
221 
222 /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle.
223  * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in
224  * MatMatMultNumeric(). */
225 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
226 static PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A)
227 {
228   PetscInt            i;
229   PetscInt            nrows, ncols;
230   PetscInt            nz;
231   PetscInt           *ai, *aj, *dummy;
232   PetscScalar        *aa;
233   Mat_SeqAIJMKL      *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
234   sparse_index_base_t indexing;
235 
236   PetscFunctionBegin;
237   /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */
238   if (!aijmkl->csrA) PetscFunctionReturn(PETSC_SUCCESS);
239 
240   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
241   PetscCallExternal(mkl_sparse_x_export_csr, aijmkl->csrA, &indexing, (MKL_INT *)&nrows, (MKL_INT *)&ncols, (MKL_INT **)&ai, (MKL_INT **)&dummy, (MKL_INT **)&aj, &aa);
242 
243   /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc
244    * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */
245   for (i = 0; i < nrows; i++) {
246     nz = ai[i + 1] - ai[i];
247     PetscCall(MatSetValues_SeqAIJ(A, 1, &i, nz, aj + ai[i], aa + ai[i], INSERT_VALUES));
248   }
249 
250   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
251   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
252 
253   PetscCall(PetscObjectStateGet((PetscObject)A, &(aijmkl->state)));
254   /* At this point our matrix has a valid MKL handle, the contents of which match the PETSc AIJ representation.
255    * The MKL handle has *not* had mkl_sparse_optimize() called on it, though -- the MKL developers have confirmed
256    * that the matrix inspection/optimization step is not performed when matrix-matrix multiplication is finalized. */
257   aijmkl->sparse_optimized = PETSC_FALSE;
258   PetscFunctionReturn(PETSC_SUCCESS);
259 }
260 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
261 
262 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
263 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_view_mkl_handle(Mat A, PetscViewer viewer)
264 {
265   PetscInt            i, j, k;
266   PetscInt            nrows, ncols;
267   PetscInt            nz;
268   PetscInt           *ai, *aj, *dummy;
269   PetscScalar        *aa;
270   Mat_SeqAIJMKL      *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
271   sparse_index_base_t indexing;
272 
273   PetscFunctionBegin;
274   PetscCall(PetscViewerASCIIPrintf(viewer, "Contents of MKL sparse matrix handle for MATSEQAIJMKL object:\n"));
275 
276   /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */
277   if (!aijmkl->csrA) {
278     PetscCall(PetscViewerASCIIPrintf(viewer, "MKL matrix handle is NULL\n"));
279     PetscFunctionReturn(PETSC_SUCCESS);
280   }
281 
282   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
283   PetscCallExternal(mkl_sparse_x_export_csr, aijmkl->csrA, &indexing, (MKL_INT *)&nrows, (MKL_INT *)&ncols, (MKL_INT **)&ai, (MKL_INT **)&dummy, (MKL_INT **)&aj, &aa);
284 
285   k = 0;
286   for (i = 0; i < nrows; i++) {
287     PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ": ", i));
288     nz = ai[i + 1] - ai[i];
289     for (j = 0; j < nz; j++) {
290       if (aa) {
291         PetscCall(PetscViewerASCIIPrintf(viewer, "(%" PetscInt_FMT ", %g)  ", aj[k], PetscRealPart(aa[k])));
292       } else {
293         PetscCall(PetscViewerASCIIPrintf(viewer, "(%" PetscInt_FMT ", NULL)", aj[k]));
294       }
295       k++;
296     }
297     PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
298   }
299   PetscFunctionReturn(PETSC_SUCCESS);
300 }
301 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
302 
303 static PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
304 {
305   Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
306   Mat_SeqAIJMKL *aijmkl_dest;
307 
308   PetscFunctionBegin;
309   PetscCall(MatDuplicate_SeqAIJ(A, op, M));
310   aijmkl_dest = (Mat_SeqAIJMKL *)(*M)->spptr;
311   PetscCall(PetscArraycpy(aijmkl_dest, aijmkl, 1));
312   aijmkl_dest->sparse_optimized = PETSC_FALSE;
313   if (aijmkl->eager_inspection) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
314   PetscFunctionReturn(PETSC_SUCCESS);
315 }
316 
317 static PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode)
318 {
319   Mat_SeqAIJ    *a = (Mat_SeqAIJ *)A->data;
320   Mat_SeqAIJMKL *aijmkl;
321 
322   PetscFunctionBegin;
323   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(PETSC_SUCCESS);
324 
325   /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some
326    * extra information and some different methods, call the AssemblyEnd
327    * routine for a MATSEQAIJ.
328    * I'm not sure if this is the best way to do this, but it avoids
329    * a lot of code duplication. */
330   a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */
331   PetscCall(MatAssemblyEnd_SeqAIJ(A, mode));
332 
333   /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks).
334    * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */
335   aijmkl = (Mat_SeqAIJMKL *)A->spptr;
336   if (aijmkl->eager_inspection) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
337 
338   PetscFunctionReturn(PETSC_SUCCESS);
339 }
340 
341 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
342 static PetscErrorCode MatMult_SeqAIJMKL(Mat A, Vec xx, Vec yy)
343 {
344   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
345   const PetscScalar *x;
346   PetscScalar       *y;
347   const MatScalar   *aa;
348   PetscInt           m     = A->rmap->n;
349   PetscInt           n     = A->cmap->n;
350   PetscScalar        alpha = 1.0;
351   PetscScalar        beta  = 0.0;
352   const PetscInt    *aj, *ai;
353   char               matdescra[6];
354 
355   /* Variables not in MatMult_SeqAIJ. */
356   char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */
357 
358   PetscFunctionBegin;
359   matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
360   matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
361   PetscCall(VecGetArrayRead(xx, &x));
362   PetscCall(VecGetArray(yy, &y));
363   aj = a->j; /* aj[k] gives column index for element aa[k]. */
364   aa = a->a; /* Nonzero elements stored row-by-row. */
365   ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
366 
367   /* Call MKL sparse BLAS routine to do the MatMult. */
368   mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, y);
369 
370   PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt));
371   PetscCall(VecRestoreArrayRead(xx, &x));
372   PetscCall(VecRestoreArray(yy, &y));
373   PetscFunctionReturn(PETSC_SUCCESS);
374 }
375 #endif
376 
377 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
378 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy)
379 {
380   Mat_SeqAIJ        *a      = (Mat_SeqAIJ *)A->data;
381   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
382   const PetscScalar *x;
383   PetscScalar       *y;
384   PetscObjectState   state;
385 
386   PetscFunctionBegin;
387 
388   /* If there are no nonzero entries, zero yy and return immediately. */
389   if (!a->nz) {
390     PetscCall(VecGetArray(yy, &y));
391     PetscCall(PetscArrayzero(y, A->rmap->n));
392     PetscCall(VecRestoreArray(yy, &y));
393     PetscFunctionReturn(PETSC_SUCCESS);
394   }
395 
396   PetscCall(VecGetArrayRead(xx, &x));
397   PetscCall(VecGetArray(yy, &y));
398 
399   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
400    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
401    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
402   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
403   if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
404 
405   /* Call MKL SpMV2 executor routine to do the MatMult. */
406   PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, y);
407 
408   PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt));
409   PetscCall(VecRestoreArrayRead(xx, &x));
410   PetscCall(VecRestoreArray(yy, &y));
411   PetscFunctionReturn(PETSC_SUCCESS);
412 }
413 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
414 
415 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
416 static PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A, Vec xx, Vec yy)
417 {
418   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
419   const PetscScalar *x;
420   PetscScalar       *y;
421   const MatScalar   *aa;
422   PetscInt           m     = A->rmap->n;
423   PetscInt           n     = A->cmap->n;
424   PetscScalar        alpha = 1.0;
425   PetscScalar        beta  = 0.0;
426   const PetscInt    *aj, *ai;
427   char               matdescra[6];
428 
429   /* Variables not in MatMultTranspose_SeqAIJ. */
430   char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */
431 
432   PetscFunctionBegin;
433   matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
434   matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
435   PetscCall(VecGetArrayRead(xx, &x));
436   PetscCall(VecGetArray(yy, &y));
437   aj = a->j; /* aj[k] gives column index for element aa[k]. */
438   aa = a->a; /* Nonzero elements stored row-by-row. */
439   ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
440 
441   /* Call MKL sparse BLAS routine to do the MatMult. */
442   mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, y);
443 
444   PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt));
445   PetscCall(VecRestoreArrayRead(xx, &x));
446   PetscCall(VecRestoreArray(yy, &y));
447   PetscFunctionReturn(PETSC_SUCCESS);
448 }
449 #endif
450 
451 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
452 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy)
453 {
454   Mat_SeqAIJ        *a      = (Mat_SeqAIJ *)A->data;
455   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
456   const PetscScalar *x;
457   PetscScalar       *y;
458   PetscObjectState   state;
459 
460   PetscFunctionBegin;
461 
462   /* If there are no nonzero entries, zero yy and return immediately. */
463   if (!a->nz) {
464     PetscCall(VecGetArray(yy, &y));
465     PetscCall(PetscArrayzero(y, A->cmap->n));
466     PetscCall(VecRestoreArray(yy, &y));
467     PetscFunctionReturn(PETSC_SUCCESS);
468   }
469 
470   PetscCall(VecGetArrayRead(xx, &x));
471   PetscCall(VecGetArray(yy, &y));
472 
473   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
474    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
475    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
476   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
477   if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
478 
479   /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
480   PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, y);
481 
482   PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt));
483   PetscCall(VecRestoreArrayRead(xx, &x));
484   PetscCall(VecRestoreArray(yy, &y));
485   PetscFunctionReturn(PETSC_SUCCESS);
486 }
487 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
488 
489 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
490 static PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A, Vec xx, Vec yy, Vec zz)
491 {
492   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
493   const PetscScalar *x;
494   PetscScalar       *y, *z;
495   const MatScalar   *aa;
496   PetscInt           m = A->rmap->n;
497   PetscInt           n = A->cmap->n;
498   const PetscInt    *aj, *ai;
499   PetscInt           i;
500 
501   /* Variables not in MatMultAdd_SeqAIJ. */
502   char        transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */
503   PetscScalar alpha  = 1.0;
504   PetscScalar beta;
505   char        matdescra[6];
506 
507   PetscFunctionBegin;
508   matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
509   matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
510 
511   PetscCall(VecGetArrayRead(xx, &x));
512   PetscCall(VecGetArrayPair(yy, zz, &y, &z));
513   aj = a->j; /* aj[k] gives column index for element aa[k]. */
514   aa = a->a; /* Nonzero elements stored row-by-row. */
515   ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
516 
517   /* Call MKL sparse BLAS routine to do the MatMult. */
518   if (zz == yy) {
519     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
520     beta = 1.0;
521     mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z);
522   } else {
523     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
524      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
525     beta = 0.0;
526     mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z);
527     for (i = 0; i < m; i++) z[i] += y[i];
528   }
529 
530   PetscCall(PetscLogFlops(2.0 * a->nz));
531   PetscCall(VecRestoreArrayRead(xx, &x));
532   PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
533   PetscFunctionReturn(PETSC_SUCCESS);
534 }
535 #endif
536 
537 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
538 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy, Vec zz)
539 {
540   Mat_SeqAIJ        *a      = (Mat_SeqAIJ *)A->data;
541   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
542   const PetscScalar *x;
543   PetscScalar       *y, *z;
544   PetscInt           m = A->rmap->n;
545   PetscInt           i;
546 
547   /* Variables not in MatMultAdd_SeqAIJ. */
548   PetscObjectState state;
549 
550   PetscFunctionBegin;
551 
552   /* If there are no nonzero entries, set zz = yy and return immediately. */
553   if (!a->nz) {
554     PetscCall(VecGetArrayPair(yy, zz, &y, &z));
555     PetscCall(PetscArraycpy(z, y, m));
556     PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
557     PetscFunctionReturn(PETSC_SUCCESS);
558   }
559 
560   PetscCall(VecGetArrayRead(xx, &x));
561   PetscCall(VecGetArrayPair(yy, zz, &y, &z));
562 
563   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
564    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
565    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
566   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
567   if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
568 
569   /* Call MKL sparse BLAS routine to do the MatMult. */
570   if (zz == yy) {
571     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
572      * with alpha and beta both set to 1.0. */
573     PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 1.0, z);
574   } else {
575     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
576      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
577     PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, z);
578     for (i = 0; i < m; i++) z[i] += y[i];
579   }
580 
581   PetscCall(PetscLogFlops(2.0 * a->nz));
582   PetscCall(VecRestoreArrayRead(xx, &x));
583   PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
584   PetscFunctionReturn(PETSC_SUCCESS);
585 }
586 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
587 
588 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
589 static PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A, Vec xx, Vec yy, Vec zz)
590 {
591   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
592   const PetscScalar *x;
593   PetscScalar       *y, *z;
594   const MatScalar   *aa;
595   PetscInt           m = A->rmap->n;
596   PetscInt           n = A->cmap->n;
597   const PetscInt    *aj, *ai;
598   PetscInt           i;
599 
600   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
601   char        transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */
602   PetscScalar alpha  = 1.0;
603   PetscScalar beta;
604   char        matdescra[6];
605 
606   PetscFunctionBegin;
607   matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
608   matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
609 
610   PetscCall(VecGetArrayRead(xx, &x));
611   PetscCall(VecGetArrayPair(yy, zz, &y, &z));
612   aj = a->j; /* aj[k] gives column index for element aa[k]. */
613   aa = a->a; /* Nonzero elements stored row-by-row. */
614   ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
615 
616   /* Call MKL sparse BLAS routine to do the MatMult. */
617   if (zz == yy) {
618     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
619     beta = 1.0;
620     mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z);
621   } else {
622     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
623      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
624     beta = 0.0;
625     mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z);
626     for (i = 0; i < n; i++) z[i] += y[i];
627   }
628 
629   PetscCall(PetscLogFlops(2.0 * a->nz));
630   PetscCall(VecRestoreArrayRead(xx, &x));
631   PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
632   PetscFunctionReturn(PETSC_SUCCESS);
633 }
634 #endif
635 
636 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
637 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy, Vec zz)
638 {
639   Mat_SeqAIJ        *a      = (Mat_SeqAIJ *)A->data;
640   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
641   const PetscScalar *x;
642   PetscScalar       *y, *z;
643   PetscInt           n = A->cmap->n;
644   PetscInt           i;
645   PetscObjectState   state;
646 
647   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
648 
649   PetscFunctionBegin;
650 
651   /* If there are no nonzero entries, set zz = yy and return immediately. */
652   if (!a->nz) {
653     PetscCall(VecGetArrayPair(yy, zz, &y, &z));
654     PetscCall(PetscArraycpy(z, y, n));
655     PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
656     PetscFunctionReturn(PETSC_SUCCESS);
657   }
658 
659   PetscCall(VecGetArrayRead(xx, &x));
660   PetscCall(VecGetArrayPair(yy, zz, &y, &z));
661 
662   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
663    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
664    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
665   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
666   if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
667 
668   /* Call MKL sparse BLAS routine to do the MatMult. */
669   if (zz == yy) {
670     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
671      * with alpha and beta both set to 1.0. */
672     PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 1.0, z);
673   } else {
674     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
675      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
676     PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, z);
677     for (i = 0; i < n; i++) z[i] += y[i];
678   }
679 
680   PetscCall(PetscLogFlops(2.0 * a->nz));
681   PetscCall(VecRestoreArrayRead(xx, &x));
682   PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
683   PetscFunctionReturn(PETSC_SUCCESS);
684 }
685 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
686 
687 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
688 static PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(Mat A, const sparse_operation_t transA, Mat B, const sparse_operation_t transB, Mat C)
689 {
690   Mat_SeqAIJMKL      *a = (Mat_SeqAIJMKL *)A->spptr, *b = (Mat_SeqAIJMKL *)B->spptr;
691   sparse_matrix_t     csrA, csrB, csrC;
692   PetscInt            nrows, ncols;
693   struct matrix_descr descr_type_gen;
694   PetscObjectState    state;
695 
696   PetscFunctionBegin;
697   /* Determine the number of rows and columns that the result matrix C will have. We have to do this ourselves because MKL does
698    * not handle sparse matrices with zero rows or columns. */
699   if (transA == SPARSE_OPERATION_NON_TRANSPOSE) nrows = A->rmap->N;
700   else nrows = A->cmap->N;
701   if (transB == SPARSE_OPERATION_NON_TRANSPOSE) ncols = B->cmap->N;
702   else ncols = B->rmap->N;
703 
704   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
705   if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
706   PetscCall(PetscObjectStateGet((PetscObject)B, &state));
707   if (!b->sparse_optimized || b->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(B));
708   csrA                = a->csrA;
709   csrB                = b->csrA;
710   descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
711 
712   if (csrA && csrB) {
713     PetscCallExternal(mkl_sparse_sp2m, transA, descr_type_gen, csrA, transB, descr_type_gen, csrB, SPARSE_STAGE_FULL_MULT_NO_VAL, &csrC);
714   } else {
715     csrC = NULL;
716   }
717 
718   PetscCall(MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF, csrC, nrows, ncols, C));
719 
720   PetscFunctionReturn(PETSC_SUCCESS);
721 }
722 
723 PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(Mat A, const sparse_operation_t transA, Mat B, const sparse_operation_t transB, Mat C)
724 {
725   Mat_SeqAIJMKL      *a = (Mat_SeqAIJMKL *)A->spptr, *b = (Mat_SeqAIJMKL *)B->spptr, *c = (Mat_SeqAIJMKL *)C->spptr;
726   sparse_matrix_t     csrA, csrB, csrC;
727   struct matrix_descr descr_type_gen;
728   PetscObjectState    state;
729 
730   PetscFunctionBegin;
731   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
732   if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
733   PetscCall(PetscObjectStateGet((PetscObject)B, &state));
734   if (!b->sparse_optimized || b->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(B));
735   csrA                = a->csrA;
736   csrB                = b->csrA;
737   csrC                = c->csrA;
738   descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
739 
740   if (csrA && csrB) {
741     PetscCallExternal(mkl_sparse_sp2m, transA, descr_type_gen, csrA, transB, descr_type_gen, csrB, SPARSE_STAGE_FINALIZE_MULT, &csrC);
742   } else {
743     csrC = NULL;
744   }
745 
746   /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */
747   PetscCall(MatSeqAIJMKL_update_from_mkl_handle(C));
748 
749   PetscFunctionReturn(PETSC_SUCCESS);
750 }
751 
752 PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C)
753 {
754   PetscFunctionBegin;
755   PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C));
756   PetscFunctionReturn(PETSC_SUCCESS);
757 }
758 
759 PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C)
760 {
761   PetscFunctionBegin;
762   PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C));
763   PetscFunctionReturn(PETSC_SUCCESS);
764 }
765 
766 PetscErrorCode MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C)
767 {
768   PetscFunctionBegin;
769   PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C));
770   PetscFunctionReturn(PETSC_SUCCESS);
771 }
772 
773 PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C)
774 {
775   PetscFunctionBegin;
776   PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C));
777   PetscFunctionReturn(PETSC_SUCCESS);
778 }
779 
780 PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C)
781 {
782   PetscFunctionBegin;
783   PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_TRANSPOSE, C));
784   PetscFunctionReturn(PETSC_SUCCESS);
785 }
786 
787 PetscErrorCode MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C)
788 {
789   PetscFunctionBegin;
790   PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_TRANSPOSE, C));
791   PetscFunctionReturn(PETSC_SUCCESS);
792 }
793 
794 static PetscErrorCode MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL(Mat C)
795 {
796   Mat_Product *product = C->product;
797   Mat          A = product->A, B = product->B;
798 
799   PetscFunctionBegin;
800   PetscCall(MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(A, B, C));
801   PetscFunctionReturn(PETSC_SUCCESS);
802 }
803 
804 static PetscErrorCode MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL(Mat C)
805 {
806   Mat_Product *product = C->product;
807   Mat          A = product->A, B = product->B;
808   PetscReal    fill = product->fill;
809 
810   PetscFunctionBegin;
811   PetscCall(MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(A, B, fill, C));
812   C->ops->productnumeric = MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL;
813   PetscFunctionReturn(PETSC_SUCCESS);
814 }
815 
816 PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat A, Mat P, Mat C)
817 {
818   Mat                 Ct;
819   Vec                 zeros;
820   Mat_SeqAIJMKL      *a = (Mat_SeqAIJMKL *)A->spptr, *p = (Mat_SeqAIJMKL *)P->spptr, *c = (Mat_SeqAIJMKL *)C->spptr;
821   sparse_matrix_t     csrA, csrP, csrC;
822   PetscBool           set, flag;
823   struct matrix_descr descr_type_sym;
824   PetscObjectState    state;
825 
826   PetscFunctionBegin;
827   PetscCall(MatIsSymmetricKnown(A, &set, &flag));
828   PetscCheck(set && flag, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal() called on matrix A not marked as symmetric");
829 
830   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
831   if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
832   PetscCall(PetscObjectStateGet((PetscObject)P, &state));
833   if (!p->sparse_optimized || p->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(P));
834   csrA                = a->csrA;
835   csrP                = p->csrA;
836   csrC                = c->csrA;
837   descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
838   descr_type_sym.mode = SPARSE_FILL_MODE_UPPER;
839   descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
840 
841   /* the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
842   PetscCallExternal(mkl_sparse_sypr, SPARSE_OPERATION_TRANSPOSE, csrP, csrA, descr_type_sym, &csrC, SPARSE_STAGE_FINALIZE_MULT);
843 
844   /* Update the PETSc AIJ representation for matrix C from contents of MKL handle.
845    * This is more complicated than it should be: it turns out that, though mkl_sparse_sypr() will accept a full AIJ/CSR matrix,
846    * the output matrix only contains the upper or lower triangle (we arbitrarily have chosen upper) of the symmetric matrix.
847    * We have to fill in the missing portion, which we currently do below by forming the transpose and performing at MatAXPY
848    * operation. This may kill any performance benefit of using the optimized mkl_sparse_sypr() routine. Performance might
849    * improve if we come up with a more efficient way to do this, or we can convince the MKL team to provide an option to output
850    * the full matrix. */
851   PetscCall(MatSeqAIJMKL_update_from_mkl_handle(C));
852   PetscCall(MatTranspose(C, MAT_INITIAL_MATRIX, &Ct));
853   PetscCall(MatCreateVecs(C, &zeros, NULL));
854   PetscCall(VecSetFromOptions(zeros));
855   PetscCall(VecZeroEntries(zeros));
856   PetscCall(MatDiagonalSet(Ct, zeros, INSERT_VALUES));
857   PetscCall(MatAXPY(C, 1.0, Ct, DIFFERENT_NONZERO_PATTERN));
858   /* Note: The MatAXPY() call destroys the MatProduct, so we must recreate it. */
859   PetscCall(MatProductCreateWithMat(A, P, NULL, C));
860   PetscCall(MatProductSetType(C, MATPRODUCT_PtAP));
861   PetscCall(MatSeqAIJMKL_create_mkl_handle(C));
862   PetscCall(VecDestroy(&zeros));
863   PetscCall(MatDestroy(&Ct));
864   PetscFunctionReturn(PETSC_SUCCESS);
865 }
866 
867 PetscErrorCode MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat C)
868 {
869   Mat_Product        *product = C->product;
870   Mat                 A = product->A, P = product->B;
871   Mat_SeqAIJMKL      *a = (Mat_SeqAIJMKL *)A->spptr, *p = (Mat_SeqAIJMKL *)P->spptr;
872   sparse_matrix_t     csrA, csrP, csrC;
873   struct matrix_descr descr_type_sym;
874   PetscObjectState    state;
875 
876   PetscFunctionBegin;
877   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
878   if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
879   PetscCall(PetscObjectStateGet((PetscObject)P, &state));
880   if (!p->sparse_optimized || p->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(P));
881   csrA                = a->csrA;
882   csrP                = p->csrA;
883   descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
884   descr_type_sym.mode = SPARSE_FILL_MODE_UPPER;
885   descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
886 
887   /* the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
888   if (csrP && csrA) {
889     PetscCallExternal(mkl_sparse_sypr, SPARSE_OPERATION_TRANSPOSE, csrP, csrA, descr_type_sym, &csrC, SPARSE_STAGE_FULL_MULT_NO_VAL);
890   } else {
891     csrC = NULL;
892   }
893 
894   /* Update the I and J arrays of the PETSc AIJ representation for matrix C from contents of MKL handle.
895    * Note that, because mkl_sparse_sypr() only computes one triangle of the symmetric matrix, this representation will only contain
896    * the upper triangle of the symmetric matrix. We fix this in MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(). I believe that
897    * leaving things in this incomplete state is OK because the numeric product should follow soon after, but am not certain if this
898    * is guaranteed. */
899   PetscCall(MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF, csrC, P->cmap->N, P->cmap->N, C));
900 
901   C->ops->productnumeric = MatProductNumeric_PtAP;
902   PetscFunctionReturn(PETSC_SUCCESS);
903 }
904 
905 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AB(Mat C)
906 {
907   PetscFunctionBegin;
908   C->ops->productsymbolic = MatProductSymbolic_AB;
909   C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL;
910   PetscFunctionReturn(PETSC_SUCCESS);
911 }
912 
913 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AtB(Mat C)
914 {
915   PetscFunctionBegin;
916   C->ops->productsymbolic = MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL;
917   PetscFunctionReturn(PETSC_SUCCESS);
918 }
919 
920 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABt(Mat C)
921 {
922   PetscFunctionBegin;
923   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ;
924   C->ops->productsymbolic          = MatProductSymbolic_ABt;
925   PetscFunctionReturn(PETSC_SUCCESS);
926 }
927 
928 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_PtAP(Mat C)
929 {
930   Mat_Product *product = C->product;
931   Mat          A       = product->A;
932   PetscBool    set, flag;
933 
934   PetscFunctionBegin;
935   if (PetscDefined(USE_COMPLEX)) {
936     /* By setting C->ops->productsymbolic to NULL, we ensure that MatProductSymbolic_Unsafe() will be used.
937      * We do this in several other locations in this file. This works for the time being, but these
938      * routines are considered unsafe and may be removed from the MatProduct code in the future.
939      * TODO: Add proper MATSEQAIJMKL implementations */
940     C->ops->productsymbolic = NULL;
941   } else {
942     /* AIJMKL only has an optimized routine for PtAP when A is symmetric and real. */
943     PetscCall(MatIsSymmetricKnown(A, &set, &flag));
944     if (set && flag) C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal;
945     else C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
946     /* we don't set C->ops->productnumeric here, as this must happen in MatProductSymbolic_PtAP_XXX(),
947      * depending on whether the algorithm for the general case vs. the real symmetric one is used. */
948   }
949   PetscFunctionReturn(PETSC_SUCCESS);
950 }
951 
952 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_RARt(Mat C)
953 {
954   PetscFunctionBegin;
955   C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
956   PetscFunctionReturn(PETSC_SUCCESS);
957 }
958 
959 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABC(Mat C)
960 {
961   PetscFunctionBegin;
962   C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
963   PetscFunctionReturn(PETSC_SUCCESS);
964 }
965 
966 PetscErrorCode MatProductSetFromOptions_SeqAIJMKL(Mat C)
967 {
968   Mat_Product *product = C->product;
969 
970   PetscFunctionBegin;
971   switch (product->type) {
972   case MATPRODUCT_AB:
973     PetscCall(MatProductSetFromOptions_SeqAIJMKL_AB(C));
974     break;
975   case MATPRODUCT_AtB:
976     PetscCall(MatProductSetFromOptions_SeqAIJMKL_AtB(C));
977     break;
978   case MATPRODUCT_ABt:
979     PetscCall(MatProductSetFromOptions_SeqAIJMKL_ABt(C));
980     break;
981   case MATPRODUCT_PtAP:
982     PetscCall(MatProductSetFromOptions_SeqAIJMKL_PtAP(C));
983     break;
984   case MATPRODUCT_RARt:
985     PetscCall(MatProductSetFromOptions_SeqAIJMKL_RARt(C));
986     break;
987   case MATPRODUCT_ABC:
988     PetscCall(MatProductSetFromOptions_SeqAIJMKL_ABC(C));
989     break;
990   default:
991     break;
992   }
993   PetscFunctionReturn(PETSC_SUCCESS);
994 }
995 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
996 
997 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a
998  * SeqAIJMKL matrix.  This routine is called by the MatCreate_SeqAIJMKL()
999  * routine, but can also be used to convert an assembled SeqAIJ matrix
1000  * into a SeqAIJMKL one. */
1001 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A, MatType type, MatReuse reuse, Mat *newmat)
1002 {
1003   Mat            B = *newmat;
1004   Mat_SeqAIJMKL *aijmkl;
1005   PetscBool      set;
1006   PetscBool      sametype;
1007 
1008   PetscFunctionBegin;
1009   if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B));
1010 
1011   PetscCall(PetscObjectTypeCompare((PetscObject)A, type, &sametype));
1012   if (sametype) PetscFunctionReturn(PETSC_SUCCESS);
1013 
1014   PetscCall(PetscNew(&aijmkl));
1015   B->spptr = (void *)aijmkl;
1016 
1017   /* Set function pointers for methods that we inherit from AIJ but override.
1018    * We also parse some command line options below, since those determine some of the methods we point to. */
1019   B->ops->duplicate   = MatDuplicate_SeqAIJMKL;
1020   B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL;
1021   B->ops->destroy     = MatDestroy_SeqAIJMKL;
1022 
1023   aijmkl->sparse_optimized = PETSC_FALSE;
1024 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1025   aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */
1026 #else
1027   aijmkl->no_SpMV2 = PETSC_TRUE;
1028 #endif
1029   aijmkl->eager_inspection = PETSC_FALSE;
1030 
1031   /* Parse command line options. */
1032   PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "AIJMKL Options", "Mat");
1033   PetscCall(PetscOptionsBool("-mat_aijmkl_no_spmv2", "Disable use of inspector-executor (SpMV 2) routines", "None", (PetscBool)aijmkl->no_SpMV2, (PetscBool *)&aijmkl->no_SpMV2, &set));
1034   PetscCall(PetscOptionsBool("-mat_aijmkl_eager_inspection", "Run inspection at matrix assembly time, instead of waiting until needed by an operation", "None", (PetscBool)aijmkl->eager_inspection, (PetscBool *)&aijmkl->eager_inspection, &set));
1035   PetscOptionsEnd();
1036 #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1037   if (!aijmkl->no_SpMV2) {
1038     PetscCall(PetscInfo(B, "User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize();  defaulting to non-SpMV2 routines.\n"));
1039     aijmkl->no_SpMV2 = PETSC_TRUE;
1040   }
1041 #endif
1042 
1043 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1044   B->ops->mult             = MatMult_SeqAIJMKL_SpMV2;
1045   B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL_SpMV2;
1046   B->ops->multadd          = MatMultAdd_SeqAIJMKL_SpMV2;
1047   B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2;
1048   #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
1049   B->ops->productsetfromoptions   = MatProductSetFromOptions_SeqAIJMKL;
1050   B->ops->matmultsymbolic         = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL;
1051   B->ops->matmultnumeric          = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL;
1052   B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL;
1053   B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL;
1054     #if !defined(PETSC_USE_COMPLEX)
1055   B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal;
1056     #else
1057   B->ops->ptapnumeric = NULL;
1058     #endif
1059   #endif
1060 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
1061 
1062 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
1063   /* In MKL version 18, update 2, the old sparse BLAS interfaces were marked as deprecated. If "no_SpMV2" has been specified by the
1064    * user and the old SpBLAS interfaces are deprecated in our MKL version, we use the new _SpMV2 routines (set above), but do not
1065    * call mkl_sparse_optimize(), which results in the old numerical kernels (without the inspector-executor model) being used. For
1066    * versions in which the older interface has not been deprecated, we use the old interface. */
1067   if (aijmkl->no_SpMV2) {
1068     B->ops->mult             = MatMult_SeqAIJMKL;
1069     B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL;
1070     B->ops->multadd          = MatMultAdd_SeqAIJMKL;
1071     B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL;
1072   }
1073 #endif
1074 
1075   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqaijmkl_seqaij_C", MatConvert_SeqAIJMKL_SeqAIJ));
1076 
1077   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJMKL));
1078   *newmat = B;
1079   PetscFunctionReturn(PETSC_SUCCESS);
1080 }
1081 
1082 /*@C
1083   MatCreateSeqAIJMKL - Creates a sparse matrix of type `MATSEQAIJMKL`.
1084 
1085   Collective
1086 
1087   Input Parameters:
1088 + comm - MPI communicator, set to `PETSC_COMM_SELF`
1089 . m    - number of rows
1090 . n    - number of columns
1091 . nz   - number of nonzeros per row (same for all rows)
1092 - nnz  - array containing the number of nonzeros in the various rows
1093          (possibly different for each row) or `NULL`
1094 
1095   Output Parameter:
1096 . A - the matrix
1097 
1098   Options Database Keys:
1099 + -mat_aijmkl_no_spmv2         - disable use of the SpMV2 inspector-executor routines
1100 - -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection,
1101                                   performing this step the first time the matrix is applied
1102 
1103   Level: intermediate
1104 
1105   Notes:
1106   If `nnz` is given then `nz` is ignored
1107 
1108   This type inherits from `MATSEQAIJ` and is largely identical, but uses sparse BLAS
1109   routines from Intel MKL whenever possible.
1110 
1111   If the installed version of MKL supports the "SpMV2" sparse
1112   inspector-executor routines, then those are used by default.
1113 
1114   `MatMult()`, `MatMultAdd()`, `MatMultTranspose()`, `MatMultTransposeAdd()`, `MatMatMult()`, `MatTransposeMatMult()`, and `MatPtAP()`
1115   (for symmetric A) operations are currently supported.
1116   MKL version 18, update 2 or later is required for `MatPtAP()`, `MatPtAPNumeric()` and `MatMatMultNumeric()`.
1117 
1118 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateMPIAIJMKL()`, `MatSetValues()`
1119 @*/
1120 PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A)
1121 {
1122   PetscFunctionBegin;
1123   PetscCall(MatCreate(comm, A));
1124   PetscCall(MatSetSizes(*A, m, n, m, n));
1125   PetscCall(MatSetType(*A, MATSEQAIJMKL));
1126   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(*A, nz, nnz));
1127   PetscFunctionReturn(PETSC_SUCCESS);
1128 }
1129 
1130 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A)
1131 {
1132   PetscFunctionBegin;
1133   PetscCall(MatSetType(A, MATSEQAIJ));
1134   PetscCall(MatConvert_SeqAIJ_SeqAIJMKL(A, MATSEQAIJMKL, MAT_INPLACE_MATRIX, &A));
1135   PetscFunctionReturn(PETSC_SUCCESS);
1136 }
1137