xref: /petsc/src/mat/impls/aij/seq/aijmkl/aijmkl.c (revision c748140286bc7db2f79d09b77ad2f995cbbeb47c)
1 
2 /*
3   Defines basic operations for the MATSEQAIJMKL matrix class.
4   This class is derived from the MATSEQAIJ class and retains the
5   compressed row storage (aka Yale sparse matrix format) but uses
6   sparse BLAS operations from the Intel Math Kernel Library (MKL)
7   wherever possible.
8 */
9 
10 #include <../src/mat/impls/aij/seq/aij.h>
11 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h>
12 #include <mkl_spblas.h>
13 
14 typedef struct {
15   PetscBool        no_SpMV2;         /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */
16   PetscBool        eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */
17   PetscBool        sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
18   PetscObjectState state;
19 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
20   sparse_matrix_t     csrA; /* "Handle" used by SpMV2 inspector-executor routines. */
21   struct matrix_descr descr;
22 #endif
23 } Mat_SeqAIJMKL;
24 
25 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat, MatAssemblyType);
26 
27 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A, MatType type, MatReuse reuse, Mat *newmat)
28 {
29   /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */
30   /* so we will ignore 'MatType type'. */
31   Mat B = *newmat;
32 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
33   Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
34 #endif
35 
36   PetscFunctionBegin;
37   if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B));
38 
39   /* Reset the original function pointers. */
40   B->ops->duplicate               = MatDuplicate_SeqAIJ;
41   B->ops->assemblyend             = MatAssemblyEnd_SeqAIJ;
42   B->ops->destroy                 = MatDestroy_SeqAIJ;
43   B->ops->mult                    = MatMult_SeqAIJ;
44   B->ops->multtranspose           = MatMultTranspose_SeqAIJ;
45   B->ops->multadd                 = MatMultAdd_SeqAIJ;
46   B->ops->multtransposeadd        = MatMultTransposeAdd_SeqAIJ;
47   B->ops->productsetfromoptions   = MatProductSetFromOptions_SeqAIJ;
48   B->ops->matmultsymbolic         = MatMatMultSymbolic_SeqAIJ_SeqAIJ;
49   B->ops->matmultnumeric          = MatMatMultNumeric_SeqAIJ_SeqAIJ;
50   B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ;
51   B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ;
52   B->ops->ptapnumeric             = MatPtAPNumeric_SeqAIJ_SeqAIJ;
53 
54   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqaijmkl_seqaij_C", NULL));
55 
56 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
57   /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this
58    * simply involves destroying the MKL sparse matrix handle and then freeing
59    * the spptr pointer. */
60   if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL *)B->spptr;
61 
62   if (aijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA);
63 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
64   PetscCall(PetscFree(B->spptr));
65 
66   /* Change the type of B to MATSEQAIJ. */
67   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ));
68 
69   *newmat = B;
70   PetscFunctionReturn(0);
71 }
72 
73 PetscErrorCode MatDestroy_SeqAIJMKL(Mat A)
74 {
75   Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
76 
77   PetscFunctionBegin;
78 
79   /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an spptr pointer. */
80   if (aijmkl) {
81     /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */
82 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
83     if (aijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA);
84 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
85     PetscCall(PetscFree(A->spptr));
86   }
87 
88   /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
89    * to destroy everything that remains. */
90   PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ));
91   /* Note that I don't call MatSetType().  I believe this is because that
92    * is only to be called when *building* a matrix.  I could be wrong, but
93    * that is how things work for the SuperLU matrix class. */
94   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqaijmkl_seqaij_C", NULL));
95   PetscCall(MatDestroy_SeqAIJ(A));
96   PetscFunctionReturn(0);
97 }
98 
99 /* MatSeqAIJMKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it,
100  * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize().
101  * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix
102  * handle, creates a new one, and then calls mkl_sparse_optimize().
103  * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been
104  * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of
105  * an unoptimized matrix handle here. */
106 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A)
107 {
108 #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
109   /* If the MKL library does not have mkl_sparse_optimize(), then this routine
110    * does nothing. We make it callable anyway in this case because it cuts
111    * down on littering the code with #ifdefs. */
112   PetscFunctionBegin;
113   PetscFunctionReturn(0);
114 #else
115   Mat_SeqAIJ    *a      = (Mat_SeqAIJ *)A->data;
116   Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
117   PetscInt       m, n;
118   MatScalar     *aa;
119   PetscInt      *aj, *ai;
120 
121   PetscFunctionBegin;
122   #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
123   /* For MKL versions that still support the old, non-inspector-executor interfaces versions, we simply exit here if the no_SpMV2
124    * option has been specified. For versions that have deprecated the old interfaces (version 18, update 2 and later), we must
125    * use the new inspector-executor interfaces, but we can still use the old, non-inspector-executor code by not calling
126    * mkl_sparse_optimize() later. */
127   if (aijmkl->no_SpMV2) PetscFunctionReturn(0);
128   #endif
129 
130   if (aijmkl->sparse_optimized) {
131     /* Matrix has been previously assembled and optimized. Must destroy old
132      * matrix handle before running the optimization step again. */
133     PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA);
134   }
135   aijmkl->sparse_optimized = PETSC_FALSE;
136 
137   /* Now perform the SpMV2 setup and matrix optimization. */
138   aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
139   aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
140   aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
141   m                  = A->rmap->n;
142   n                  = A->cmap->n;
143   aj                 = a->j; /* aj[k] gives column index for element aa[k]. */
144   aa                 = a->a; /* Nonzero elements stored row-by-row. */
145   ai                 = a->i; /* ai[k] is the position in aa and aj where row k starts. */
146   if (a->nz && aa && !A->structure_only) {
147     /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
148      * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
149     PetscCallExternal(mkl_sparse_x_create_csr, &aijmkl->csrA, SPARSE_INDEX_BASE_ZERO, m, n, ai, ai + 1, aj, aa);
150     PetscCallExternal(mkl_sparse_set_mv_hint, aijmkl->csrA, SPARSE_OPERATION_NON_TRANSPOSE, aijmkl->descr, 1000);
151     PetscCallExternal(mkl_sparse_set_memory_hint, aijmkl->csrA, SPARSE_MEMORY_AGGRESSIVE);
152     if (!aijmkl->no_SpMV2) PetscCallExternal(mkl_sparse_optimize, aijmkl->csrA);
153     aijmkl->sparse_optimized = PETSC_TRUE;
154     PetscCall(PetscObjectStateGet((PetscObject)A, &(aijmkl->state)));
155   } else {
156     aijmkl->csrA = PETSC_NULL;
157   }
158 
159   PetscFunctionReturn(0);
160 #endif
161 }
162 
163 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
164 /* Take an already created but empty matrix and set up the nonzero structure from an MKL sparse matrix handle. */
165 static PetscErrorCode MatSeqAIJMKL_setup_structure_from_mkl_handle(MPI_Comm comm, sparse_matrix_t csrA, PetscInt nrows, PetscInt ncols, Mat A)
166 {
167   sparse_index_base_t indexing;
168   PetscInt            m, n;
169   PetscInt           *aj, *ai, *dummy;
170   MatScalar          *aa;
171   Mat_SeqAIJMKL      *aijmkl;
172 
173   PetscFunctionBegin;
174   if (csrA) {
175     /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
176     PetscCallExternal(mkl_sparse_x_export_csr, csrA, &indexing, &m, &n, &ai, &dummy, &aj, &aa);
177     PetscCheck((m == nrows) && (n == ncols), PETSC_COMM_SELF, PETSC_ERR_LIB, "Number of rows/columns does not match those from mkl_sparse_x_export_csr()");
178   } else {
179     aj = ai = PETSC_NULL;
180     aa      = PETSC_NULL;
181   }
182 
183   PetscCall(MatSetType(A, MATSEQAIJ));
184   PetscCall(MatSetSizes(A, PETSC_DECIDE, PETSC_DECIDE, nrows, ncols));
185   /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported
186    * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and
187    * they will be destroyed when the MKL handle is destroyed.
188    * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */
189   if (csrA) {
190     PetscCall(MatSeqAIJSetPreallocationCSR(A, ai, aj, NULL));
191   } else {
192     /* Since MatSeqAIJSetPreallocationCSR does initial set up and assembly begin/end, we must do that ourselves here. */
193     PetscCall(MatSetUp(A));
194     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
195     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
196   }
197 
198   /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle.
199    * Now turn it into a MATSEQAIJMKL. */
200   PetscCall(MatConvert_SeqAIJ_SeqAIJMKL(A, MATSEQAIJMKL, MAT_INPLACE_MATRIX, &A));
201 
202   aijmkl       = (Mat_SeqAIJMKL *)A->spptr;
203   aijmkl->csrA = csrA;
204 
205   /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but
206    * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one,
207    * and just need to be able to run the MKL optimization step. */
208   aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
209   aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
210   aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
211   if (csrA) {
212     PetscCallExternal(mkl_sparse_set_mv_hint, aijmkl->csrA, SPARSE_OPERATION_NON_TRANSPOSE, aijmkl->descr, 1000);
213     PetscCallExternal(mkl_sparse_set_memory_hint, aijmkl->csrA, SPARSE_MEMORY_AGGRESSIVE);
214   }
215   PetscCall(PetscObjectStateGet((PetscObject)A, &(aijmkl->state)));
216   PetscFunctionReturn(0);
217 }
218 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
219 
220 /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle.
221  * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in
222  * MatMatMultNumeric(). */
223 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
224 static PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A)
225 {
226   PetscInt            i;
227   PetscInt            nrows, ncols;
228   PetscInt            nz;
229   PetscInt           *ai, *aj, *dummy;
230   PetscScalar        *aa;
231   Mat_SeqAIJMKL      *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
232   sparse_index_base_t indexing;
233 
234   PetscFunctionBegin;
235   /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */
236   if (!aijmkl->csrA) PetscFunctionReturn(0);
237 
238   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
239   PetscCallExternal(mkl_sparse_x_export_csr, aijmkl->csrA, &indexing, &nrows, &ncols, &ai, &dummy, &aj, &aa);
240 
241   /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc
242    * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */
243   for (i = 0; i < nrows; i++) {
244     nz = ai[i + 1] - ai[i];
245     PetscCall(MatSetValues_SeqAIJ(A, 1, &i, nz, aj + ai[i], aa + ai[i], INSERT_VALUES));
246   }
247 
248   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
249   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
250 
251   PetscCall(PetscObjectStateGet((PetscObject)A, &(aijmkl->state)));
252   /* At this point our matrix has a valid MKL handle, the contents of which match the PETSc AIJ representation.
253    * The MKL handle has *not* had mkl_sparse_optimize() called on it, though -- the MKL developers have confirmed
254    * that the matrix inspection/optimization step is not performed when matrix-matrix multiplication is finalized. */
255   aijmkl->sparse_optimized = PETSC_FALSE;
256   PetscFunctionReturn(0);
257 }
258 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
259 
260 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
261 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_view_mkl_handle(Mat A, PetscViewer viewer)
262 {
263   PetscInt            i, j, k;
264   PetscInt            nrows, ncols;
265   PetscInt            nz;
266   PetscInt           *ai, *aj, *dummy;
267   PetscScalar        *aa;
268   Mat_SeqAIJMKL      *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
269   sparse_index_base_t indexing;
270 
271   PetscFunctionBegin;
272   PetscCall(PetscViewerASCIIPrintf(viewer, "Contents of MKL sparse matrix handle for MATSEQAIJMKL object:\n"));
273 
274   /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */
275   if (!aijmkl->csrA) {
276     PetscCall(PetscViewerASCIIPrintf(viewer, "MKL matrix handle is NULL\n"));
277     PetscFunctionReturn(0);
278   }
279 
280   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
281   PetscCallExternal(mkl_sparse_x_export_csr, aijmkl->csrA, &indexing, &nrows, &ncols, &ai, &dummy, &aj, &aa);
282 
283   k = 0;
284   for (i = 0; i < nrows; i++) {
285     PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ": ", i));
286     nz = ai[i + 1] - ai[i];
287     for (j = 0; j < nz; j++) {
288       if (aa) {
289         PetscCall(PetscViewerASCIIPrintf(viewer, "(%" PetscInt_FMT ", %g)  ", aj[k], PetscRealPart(aa[k])));
290       } else {
291         PetscCall(PetscViewerASCIIPrintf(viewer, "(%" PetscInt_FMT ", NULL)", aj[k]));
292       }
293       k++;
294     }
295     PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
296   }
297   PetscFunctionReturn(0);
298 }
299 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
300 
301 PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
302 {
303   Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
304   Mat_SeqAIJMKL *aijmkl_dest;
305 
306   PetscFunctionBegin;
307   PetscCall(MatDuplicate_SeqAIJ(A, op, M));
308   aijmkl_dest = (Mat_SeqAIJMKL *)(*M)->spptr;
309   PetscCall(PetscArraycpy(aijmkl_dest, aijmkl, 1));
310   aijmkl_dest->sparse_optimized = PETSC_FALSE;
311   if (aijmkl->eager_inspection) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
312   PetscFunctionReturn(0);
313 }
314 
315 PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode)
316 {
317   Mat_SeqAIJ    *a = (Mat_SeqAIJ *)A->data;
318   Mat_SeqAIJMKL *aijmkl;
319 
320   PetscFunctionBegin;
321   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
322 
323   /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some
324    * extra information and some different methods, call the AssemblyEnd
325    * routine for a MATSEQAIJ.
326    * I'm not sure if this is the best way to do this, but it avoids
327    * a lot of code duplication. */
328   a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */
329   PetscCall(MatAssemblyEnd_SeqAIJ(A, mode));
330 
331   /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks).
332    * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */
333   aijmkl = (Mat_SeqAIJMKL *)A->spptr;
334   if (aijmkl->eager_inspection) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
335 
336   PetscFunctionReturn(0);
337 }
338 
339 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
340 PetscErrorCode MatMult_SeqAIJMKL(Mat A, Vec xx, Vec yy)
341 {
342   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
343   const PetscScalar *x;
344   PetscScalar       *y;
345   const MatScalar   *aa;
346   PetscInt           m     = A->rmap->n;
347   PetscInt           n     = A->cmap->n;
348   PetscScalar        alpha = 1.0;
349   PetscScalar        beta  = 0.0;
350   const PetscInt    *aj, *ai;
351   char               matdescra[6];
352 
353   /* Variables not in MatMult_SeqAIJ. */
354   char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */
355 
356   PetscFunctionBegin;
357   matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
358   matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
359   PetscCall(VecGetArrayRead(xx, &x));
360   PetscCall(VecGetArray(yy, &y));
361   aj = a->j; /* aj[k] gives column index for element aa[k]. */
362   aa = a->a; /* Nonzero elements stored row-by-row. */
363   ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
364 
365   /* Call MKL sparse BLAS routine to do the MatMult. */
366   mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, y);
367 
368   PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt));
369   PetscCall(VecRestoreArrayRead(xx, &x));
370   PetscCall(VecRestoreArray(yy, &y));
371   PetscFunctionReturn(0);
372 }
373 #endif
374 
375 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
376 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy)
377 {
378   Mat_SeqAIJ        *a      = (Mat_SeqAIJ *)A->data;
379   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
380   const PetscScalar *x;
381   PetscScalar       *y;
382   PetscObjectState   state;
383 
384   PetscFunctionBegin;
385 
386   /* If there are no nonzero entries, zero yy and return immediately. */
387   if (!a->nz) {
388     PetscCall(VecGetArray(yy, &y));
389     PetscCall(PetscArrayzero(y, A->rmap->n));
390     PetscCall(VecRestoreArray(yy, &y));
391     PetscFunctionReturn(0);
392   }
393 
394   PetscCall(VecGetArrayRead(xx, &x));
395   PetscCall(VecGetArray(yy, &y));
396 
397   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
398    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
399    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
400   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
401   if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
402 
403   /* Call MKL SpMV2 executor routine to do the MatMult. */
404   PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, y);
405 
406   PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt));
407   PetscCall(VecRestoreArrayRead(xx, &x));
408   PetscCall(VecRestoreArray(yy, &y));
409   PetscFunctionReturn(0);
410 }
411 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
412 
413 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
414 PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A, Vec xx, Vec yy)
415 {
416   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
417   const PetscScalar *x;
418   PetscScalar       *y;
419   const MatScalar   *aa;
420   PetscInt           m     = A->rmap->n;
421   PetscInt           n     = A->cmap->n;
422   PetscScalar        alpha = 1.0;
423   PetscScalar        beta  = 0.0;
424   const PetscInt    *aj, *ai;
425   char               matdescra[6];
426 
427   /* Variables not in MatMultTranspose_SeqAIJ. */
428   char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */
429 
430   PetscFunctionBegin;
431   matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
432   matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
433   PetscCall(VecGetArrayRead(xx, &x));
434   PetscCall(VecGetArray(yy, &y));
435   aj = a->j; /* aj[k] gives column index for element aa[k]. */
436   aa = a->a; /* Nonzero elements stored row-by-row. */
437   ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
438 
439   /* Call MKL sparse BLAS routine to do the MatMult. */
440   mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, y);
441 
442   PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt));
443   PetscCall(VecRestoreArrayRead(xx, &x));
444   PetscCall(VecRestoreArray(yy, &y));
445   PetscFunctionReturn(0);
446 }
447 #endif
448 
449 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
450 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy)
451 {
452   Mat_SeqAIJ        *a      = (Mat_SeqAIJ *)A->data;
453   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
454   const PetscScalar *x;
455   PetscScalar       *y;
456   PetscObjectState   state;
457 
458   PetscFunctionBegin;
459 
460   /* If there are no nonzero entries, zero yy and return immediately. */
461   if (!a->nz) {
462     PetscCall(VecGetArray(yy, &y));
463     PetscCall(PetscArrayzero(y, A->cmap->n));
464     PetscCall(VecRestoreArray(yy, &y));
465     PetscFunctionReturn(0);
466   }
467 
468   PetscCall(VecGetArrayRead(xx, &x));
469   PetscCall(VecGetArray(yy, &y));
470 
471   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
472    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
473    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
474   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
475   if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
476 
477   /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
478   PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, y);
479 
480   PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt));
481   PetscCall(VecRestoreArrayRead(xx, &x));
482   PetscCall(VecRestoreArray(yy, &y));
483   PetscFunctionReturn(0);
484 }
485 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
486 
487 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
488 PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A, Vec xx, Vec yy, Vec zz)
489 {
490   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
491   const PetscScalar *x;
492   PetscScalar       *y, *z;
493   const MatScalar   *aa;
494   PetscInt           m = A->rmap->n;
495   PetscInt           n = A->cmap->n;
496   const PetscInt    *aj, *ai;
497   PetscInt           i;
498 
499   /* Variables not in MatMultAdd_SeqAIJ. */
500   char        transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */
501   PetscScalar alpha  = 1.0;
502   PetscScalar beta;
503   char        matdescra[6];
504 
505   PetscFunctionBegin;
506   matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
507   matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
508 
509   PetscCall(VecGetArrayRead(xx, &x));
510   PetscCall(VecGetArrayPair(yy, zz, &y, &z));
511   aj = a->j; /* aj[k] gives column index for element aa[k]. */
512   aa = a->a; /* Nonzero elements stored row-by-row. */
513   ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
514 
515   /* Call MKL sparse BLAS routine to do the MatMult. */
516   if (zz == yy) {
517     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
518     beta = 1.0;
519     mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z);
520   } else {
521     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
522      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
523     beta = 0.0;
524     mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z);
525     for (i = 0; i < m; i++) z[i] += y[i];
526   }
527 
528   PetscCall(PetscLogFlops(2.0 * a->nz));
529   PetscCall(VecRestoreArrayRead(xx, &x));
530   PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
531   PetscFunctionReturn(0);
532 }
533 #endif
534 
535 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
536 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy, Vec zz)
537 {
538   Mat_SeqAIJ        *a      = (Mat_SeqAIJ *)A->data;
539   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
540   const PetscScalar *x;
541   PetscScalar       *y, *z;
542   PetscInt           m = A->rmap->n;
543   PetscInt           i;
544 
545   /* Variables not in MatMultAdd_SeqAIJ. */
546   PetscObjectState state;
547 
548   PetscFunctionBegin;
549 
550   /* If there are no nonzero entries, set zz = yy and return immediately. */
551   if (!a->nz) {
552     PetscCall(VecGetArrayPair(yy, zz, &y, &z));
553     PetscCall(PetscArraycpy(z, y, m));
554     PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
555     PetscFunctionReturn(0);
556   }
557 
558   PetscCall(VecGetArrayRead(xx, &x));
559   PetscCall(VecGetArrayPair(yy, zz, &y, &z));
560 
561   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
562    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
563    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
564   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
565   if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
566 
567   /* Call MKL sparse BLAS routine to do the MatMult. */
568   if (zz == yy) {
569     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
570      * with alpha and beta both set to 1.0. */
571     PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 1.0, z);
572   } else {
573     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
574      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
575     PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, z);
576     for (i = 0; i < m; i++) z[i] += y[i];
577   }
578 
579   PetscCall(PetscLogFlops(2.0 * a->nz));
580   PetscCall(VecRestoreArrayRead(xx, &x));
581   PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
582   PetscFunctionReturn(0);
583 }
584 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
585 
586 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
587 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A, Vec xx, Vec yy, Vec zz)
588 {
589   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
590   const PetscScalar *x;
591   PetscScalar       *y, *z;
592   const MatScalar   *aa;
593   PetscInt           m = A->rmap->n;
594   PetscInt           n = A->cmap->n;
595   const PetscInt    *aj, *ai;
596   PetscInt           i;
597 
598   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
599   char        transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */
600   PetscScalar alpha  = 1.0;
601   PetscScalar beta;
602   char        matdescra[6];
603 
604   PetscFunctionBegin;
605   matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
606   matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
607 
608   PetscCall(VecGetArrayRead(xx, &x));
609   PetscCall(VecGetArrayPair(yy, zz, &y, &z));
610   aj = a->j; /* aj[k] gives column index for element aa[k]. */
611   aa = a->a; /* Nonzero elements stored row-by-row. */
612   ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
613 
614   /* Call MKL sparse BLAS routine to do the MatMult. */
615   if (zz == yy) {
616     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
617     beta = 1.0;
618     mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z);
619   } else {
620     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
621      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
622     beta = 0.0;
623     mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z);
624     for (i = 0; i < n; i++) z[i] += y[i];
625   }
626 
627   PetscCall(PetscLogFlops(2.0 * a->nz));
628   PetscCall(VecRestoreArrayRead(xx, &x));
629   PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
630   PetscFunctionReturn(0);
631 }
632 #endif
633 
634 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
635 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy, Vec zz)
636 {
637   Mat_SeqAIJ        *a      = (Mat_SeqAIJ *)A->data;
638   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
639   const PetscScalar *x;
640   PetscScalar       *y, *z;
641   PetscInt           n = A->cmap->n;
642   PetscInt           i;
643   PetscObjectState   state;
644 
645   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
646 
647   PetscFunctionBegin;
648 
649   /* If there are no nonzero entries, set zz = yy and return immediately. */
650   if (!a->nz) {
651     PetscCall(VecGetArrayPair(yy, zz, &y, &z));
652     PetscCall(PetscArraycpy(z, y, n));
653     PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
654     PetscFunctionReturn(0);
655   }
656 
657   PetscCall(VecGetArrayRead(xx, &x));
658   PetscCall(VecGetArrayPair(yy, zz, &y, &z));
659 
660   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
661    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
662    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
663   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
664   if (!aijmkl->sparse_optimized || aijmkl->state != state) MatSeqAIJMKL_create_mkl_handle(A);
665 
666   /* Call MKL sparse BLAS routine to do the MatMult. */
667   if (zz == yy) {
668     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
669      * with alpha and beta both set to 1.0. */
670     PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 1.0, z);
671   } else {
672     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
673      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
674     PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, z);
675     for (i = 0; i < n; i++) z[i] += y[i];
676   }
677 
678   PetscCall(PetscLogFlops(2.0 * a->nz));
679   PetscCall(VecRestoreArrayRead(xx, &x));
680   PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
681   PetscFunctionReturn(0);
682 }
683 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
684 
685 /* -------------------------- MatProduct code -------------------------- */
686 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
687 static PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(Mat A, const sparse_operation_t transA, Mat B, const sparse_operation_t transB, Mat C)
688 {
689   Mat_SeqAIJMKL      *a = (Mat_SeqAIJMKL *)A->spptr, *b = (Mat_SeqAIJMKL *)B->spptr;
690   sparse_matrix_t     csrA, csrB, csrC;
691   PetscInt            nrows, ncols;
692   struct matrix_descr descr_type_gen;
693   PetscObjectState    state;
694 
695   PetscFunctionBegin;
696   /* Determine the number of rows and columns that the result matrix C will have. We have to do this ourselves because MKL does
697    * not handle sparse matrices with zero rows or columns. */
698   if (transA == SPARSE_OPERATION_NON_TRANSPOSE) nrows = A->rmap->N;
699   else nrows = A->cmap->N;
700   if (transB == SPARSE_OPERATION_NON_TRANSPOSE) ncols = B->cmap->N;
701   else ncols = B->rmap->N;
702 
703   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
704   if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
705   PetscCall(PetscObjectStateGet((PetscObject)B, &state));
706   if (!b->sparse_optimized || b->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(B));
707   csrA                = a->csrA;
708   csrB                = b->csrA;
709   descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
710 
711   if (csrA && csrB) {
712     PetscCallExternal(mkl_sparse_sp2m, transA, descr_type_gen, csrA, transB, descr_type_gen, csrB, SPARSE_STAGE_FULL_MULT_NO_VAL, &csrC);
713   } else {
714     csrC = PETSC_NULL;
715   }
716 
717   PetscCall(MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF, csrC, nrows, ncols, C));
718 
719   PetscFunctionReturn(0);
720 }
721 
722 PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(Mat A, const sparse_operation_t transA, Mat B, const sparse_operation_t transB, Mat C)
723 {
724   Mat_SeqAIJMKL      *a = (Mat_SeqAIJMKL *)A->spptr, *b = (Mat_SeqAIJMKL *)B->spptr, *c = (Mat_SeqAIJMKL *)C->spptr;
725   sparse_matrix_t     csrA, csrB, csrC;
726   struct matrix_descr descr_type_gen;
727   PetscObjectState    state;
728 
729   PetscFunctionBegin;
730   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
731   if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
732   PetscCall(PetscObjectStateGet((PetscObject)B, &state));
733   if (!b->sparse_optimized || b->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(B));
734   csrA                = a->csrA;
735   csrB                = b->csrA;
736   csrC                = c->csrA;
737   descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
738 
739   if (csrA && csrB) {
740     PetscCallExternal(mkl_sparse_sp2m, transA, descr_type_gen, csrA, transB, descr_type_gen, csrB, SPARSE_STAGE_FINALIZE_MULT, &csrC);
741   } else {
742     csrC = PETSC_NULL;
743   }
744 
745   /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */
746   PetscCall(MatSeqAIJMKL_update_from_mkl_handle(C));
747 
748   PetscFunctionReturn(0);
749 }
750 
751 PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C)
752 {
753   PetscFunctionBegin;
754   PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C));
755   PetscFunctionReturn(0);
756 }
757 
758 PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C)
759 {
760   PetscFunctionBegin;
761   PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C));
762   PetscFunctionReturn(0);
763 }
764 
765 PetscErrorCode MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C)
766 {
767   PetscFunctionBegin;
768   PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C));
769   PetscFunctionReturn(0);
770 }
771 
772 PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C)
773 {
774   PetscFunctionBegin;
775   PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C));
776   PetscFunctionReturn(0);
777 }
778 
779 PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C)
780 {
781   PetscFunctionBegin;
782   PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_TRANSPOSE, C));
783   PetscFunctionReturn(0);
784 }
785 
786 PetscErrorCode MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C)
787 {
788   PetscFunctionBegin;
789   PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_TRANSPOSE, C));
790   PetscFunctionReturn(0);
791 }
792 
793 static PetscErrorCode MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL(Mat C)
794 {
795   Mat_Product *product = C->product;
796   Mat          A = product->A, B = product->B;
797 
798   PetscFunctionBegin;
799   PetscCall(MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(A, B, C));
800   PetscFunctionReturn(0);
801 }
802 
803 static PetscErrorCode MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL(Mat C)
804 {
805   Mat_Product *product = C->product;
806   Mat          A = product->A, B = product->B;
807   PetscReal    fill = product->fill;
808 
809   PetscFunctionBegin;
810   PetscCall(MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(A, B, fill, C));
811   C->ops->productnumeric = MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL;
812   PetscFunctionReturn(0);
813 }
814 
815 PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat A, Mat P, Mat C)
816 {
817   Mat                 Ct;
818   Vec                 zeros;
819   Mat_SeqAIJMKL      *a = (Mat_SeqAIJMKL *)A->spptr, *p = (Mat_SeqAIJMKL *)P->spptr, *c = (Mat_SeqAIJMKL *)C->spptr;
820   sparse_matrix_t     csrA, csrP, csrC;
821   PetscBool           set, flag;
822   struct matrix_descr descr_type_sym;
823   PetscObjectState    state;
824 
825   PetscFunctionBegin;
826   PetscCall(MatIsSymmetricKnown(A, &set, &flag));
827   PetscCheck(set && flag, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal() called on matrix A not marked as symmetric");
828 
829   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
830   if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
831   PetscCall(PetscObjectStateGet((PetscObject)P, &state));
832   if (!p->sparse_optimized || p->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(P));
833   csrA                = a->csrA;
834   csrP                = p->csrA;
835   csrC                = c->csrA;
836   descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
837   descr_type_sym.mode = SPARSE_FILL_MODE_UPPER;
838   descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
839 
840   /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
841   PetscCallExternal(mkl_sparse_sypr, SPARSE_OPERATION_TRANSPOSE, csrP, csrA, descr_type_sym, &csrC, SPARSE_STAGE_FINALIZE_MULT);
842 
843   /* Update the PETSc AIJ representation for matrix C from contents of MKL handle.
844    * This is more complicated than it should be: it turns out that, though mkl_sparse_sypr() will accept a full AIJ/CSR matrix,
845    * the output matrix only contains the upper or lower triangle (we arbitrarily have chosen upper) of the symmetric matrix.
846    * We have to fill in the missing portion, which we currently do below by forming the transpose and performing at MatAXPY
847    * operation. This may kill any performance benefit of using the optimized mkl_sparse_sypr() routine. Performance might
848    * improve if we come up with a more efficient way to do this, or we can convince the MKL team to provide an option to output
849    * the full matrix. */
850   PetscCall(MatSeqAIJMKL_update_from_mkl_handle(C));
851   PetscCall(MatTranspose(C, MAT_INITIAL_MATRIX, &Ct));
852   PetscCall(MatCreateVecs(C, &zeros, NULL));
853   PetscCall(VecSetFromOptions(zeros));
854   PetscCall(VecZeroEntries(zeros));
855   PetscCall(MatDiagonalSet(Ct, zeros, INSERT_VALUES));
856   PetscCall(MatAXPY(C, 1.0, Ct, DIFFERENT_NONZERO_PATTERN));
857   /* Note: The MatAXPY() call destroys the MatProduct, so we must recreate it. */
858   PetscCall(MatProductCreateWithMat(A, P, PETSC_NULL, C));
859   PetscCall(MatProductSetType(C, MATPRODUCT_PtAP));
860   PetscCall(MatSeqAIJMKL_create_mkl_handle(C));
861   PetscCall(VecDestroy(&zeros));
862   PetscCall(MatDestroy(&Ct));
863   PetscFunctionReturn(0);
864 }
865 
866 PetscErrorCode MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat C)
867 {
868   Mat_Product        *product = C->product;
869   Mat                 A = product->A, P = product->B;
870   Mat_SeqAIJMKL      *a = (Mat_SeqAIJMKL *)A->spptr, *p = (Mat_SeqAIJMKL *)P->spptr;
871   sparse_matrix_t     csrA, csrP, csrC;
872   struct matrix_descr descr_type_sym;
873   PetscObjectState    state;
874 
875   PetscFunctionBegin;
876   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
877   if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
878   PetscCall(PetscObjectStateGet((PetscObject)P, &state));
879   if (!p->sparse_optimized || p->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(P));
880   csrA                = a->csrA;
881   csrP                = p->csrA;
882   descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
883   descr_type_sym.mode = SPARSE_FILL_MODE_UPPER;
884   descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
885 
886   /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
887   if (csrP && csrA) {
888     PetscCallExternal(mkl_sparse_sypr, SPARSE_OPERATION_TRANSPOSE, csrP, csrA, descr_type_sym, &csrC, SPARSE_STAGE_FULL_MULT_NO_VAL);
889   } else {
890     csrC = PETSC_NULL;
891   }
892 
893   /* Update the I and J arrays of the PETSc AIJ representation for matrix C from contents of MKL handle.
894    * Note that, because mkl_sparse_sypr() only computes one triangle of the symmetric matrix, this representation will only contain
895    * the upper triangle of the symmetric matrix. We fix this in MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(). I believe that
896    * leaving things in this incomplete state is OK because the numeric product should follow soon after, but am not certain if this
897    * is guaranteed. */
898   PetscCall(MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF, csrC, P->cmap->N, P->cmap->N, C));
899 
900   C->ops->productnumeric = MatProductNumeric_PtAP;
901   PetscFunctionReturn(0);
902 }
903 
904 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AB(Mat C)
905 {
906   PetscFunctionBegin;
907   C->ops->productsymbolic = MatProductSymbolic_AB;
908   C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL;
909   PetscFunctionReturn(0);
910 }
911 
912 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AtB(Mat C)
913 {
914   PetscFunctionBegin;
915   C->ops->productsymbolic = MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL;
916   PetscFunctionReturn(0);
917 }
918 
919 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABt(Mat C)
920 {
921   PetscFunctionBegin;
922   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ;
923   C->ops->productsymbolic          = MatProductSymbolic_ABt;
924   PetscFunctionReturn(0);
925 }
926 
927 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_PtAP(Mat C)
928 {
929   Mat_Product *product = C->product;
930   Mat          A       = product->A;
931   PetscBool    set, flag;
932 
933   PetscFunctionBegin;
934   if (PetscDefined(USE_COMPLEX)) {
935     /* By setting C->ops->productsymbolic to NULL, we ensure that MatProductSymbolic_Unsafe() will be used.
936      * We do this in several other locations in this file. This works for the time being, but these
937      * routines are considered unsafe and may be removed from the MatProduct code in the future.
938      * TODO: Add proper MATSEQAIJMKL implementations */
939     C->ops->productsymbolic = NULL;
940   } else {
941     /* AIJMKL only has an optimized routine for PtAP when A is symmetric and real. */
942     PetscCall(MatIsSymmetricKnown(A, &set, &flag));
943     if (set && flag) C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal;
944     else C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
945     /* Note that we don't set C->ops->productnumeric here, as this must happen in MatProductSymbolic_PtAP_XXX(),
946      * depending on whether the algorithm for the general case vs. the real symmetric one is used. */
947   }
948   PetscFunctionReturn(0);
949 }
950 
951 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_RARt(Mat C)
952 {
953   PetscFunctionBegin;
954   C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
955   PetscFunctionReturn(0);
956 }
957 
958 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABC(Mat C)
959 {
960   PetscFunctionBegin;
961   C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
962   PetscFunctionReturn(0);
963 }
964 
965 PetscErrorCode MatProductSetFromOptions_SeqAIJMKL(Mat C)
966 {
967   Mat_Product *product = C->product;
968 
969   PetscFunctionBegin;
970   switch (product->type) {
971   case MATPRODUCT_AB:
972     PetscCall(MatProductSetFromOptions_SeqAIJMKL_AB(C));
973     break;
974   case MATPRODUCT_AtB:
975     PetscCall(MatProductSetFromOptions_SeqAIJMKL_AtB(C));
976     break;
977   case MATPRODUCT_ABt:
978     PetscCall(MatProductSetFromOptions_SeqAIJMKL_ABt(C));
979     break;
980   case MATPRODUCT_PtAP:
981     PetscCall(MatProductSetFromOptions_SeqAIJMKL_PtAP(C));
982     break;
983   case MATPRODUCT_RARt:
984     PetscCall(MatProductSetFromOptions_SeqAIJMKL_RARt(C));
985     break;
986   case MATPRODUCT_ABC:
987     PetscCall(MatProductSetFromOptions_SeqAIJMKL_ABC(C));
988     break;
989   default:
990     break;
991   }
992   PetscFunctionReturn(0);
993 }
994 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
995 /* ------------------------ End MatProduct code ------------------------ */
996 
997 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a
998  * SeqAIJMKL matrix.  This routine is called by the MatCreate_SeqAIJMKL()
999  * routine, but can also be used to convert an assembled SeqAIJ matrix
1000  * into a SeqAIJMKL one. */
1001 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A, MatType type, MatReuse reuse, Mat *newmat)
1002 {
1003   Mat            B = *newmat;
1004   Mat_SeqAIJMKL *aijmkl;
1005   PetscBool      set;
1006   PetscBool      sametype;
1007 
1008   PetscFunctionBegin;
1009   if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B));
1010 
1011   PetscCall(PetscObjectTypeCompare((PetscObject)A, type, &sametype));
1012   if (sametype) PetscFunctionReturn(0);
1013 
1014   PetscCall(PetscNew(&aijmkl));
1015   B->spptr = (void *)aijmkl;
1016 
1017   /* Set function pointers for methods that we inherit from AIJ but override.
1018    * We also parse some command line options below, since those determine some of the methods we point to. */
1019   B->ops->duplicate   = MatDuplicate_SeqAIJMKL;
1020   B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL;
1021   B->ops->destroy     = MatDestroy_SeqAIJMKL;
1022 
1023   aijmkl->sparse_optimized = PETSC_FALSE;
1024 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1025   aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */
1026 #else
1027   aijmkl->no_SpMV2 = PETSC_TRUE;
1028 #endif
1029   aijmkl->eager_inspection = PETSC_FALSE;
1030 
1031   /* Parse command line options. */
1032   PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "AIJMKL Options", "Mat");
1033   PetscCall(PetscOptionsBool("-mat_aijmkl_no_spmv2", "Disable use of inspector-executor (SpMV 2) routines", "None", (PetscBool)aijmkl->no_SpMV2, (PetscBool *)&aijmkl->no_SpMV2, &set));
1034   PetscCall(PetscOptionsBool("-mat_aijmkl_eager_inspection", "Run inspection at matrix assembly time, instead of waiting until needed by an operation", "None", (PetscBool)aijmkl->eager_inspection, (PetscBool *)&aijmkl->eager_inspection, &set));
1035   PetscOptionsEnd();
1036 #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1037   if (!aijmkl->no_SpMV2) {
1038     PetscCall(PetscInfo(B, "User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize();  defaulting to non-SpMV2 routines.\n"));
1039     aijmkl->no_SpMV2 = PETSC_TRUE;
1040   }
1041 #endif
1042 
1043 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1044   B->ops->mult             = MatMult_SeqAIJMKL_SpMV2;
1045   B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL_SpMV2;
1046   B->ops->multadd          = MatMultAdd_SeqAIJMKL_SpMV2;
1047   B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2;
1048   #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
1049   B->ops->productsetfromoptions   = MatProductSetFromOptions_SeqAIJMKL;
1050   B->ops->matmultsymbolic         = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL;
1051   B->ops->matmultnumeric          = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL;
1052   B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL;
1053   B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL;
1054     #if !defined(PETSC_USE_COMPLEX)
1055   B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal;
1056     #else
1057   B->ops->ptapnumeric = NULL;
1058     #endif
1059   #endif
1060 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
1061 
1062 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
1063   /* In MKL version 18, update 2, the old sparse BLAS interfaces were marked as deprecated. If "no_SpMV2" has been specified by the
1064    * user and the old SpBLAS interfaces are deprecated in our MKL version, we use the new _SpMV2 routines (set above), but do not
1065    * call mkl_sparse_optimize(), which results in the old numerical kernels (without the inspector-executor model) being used. For
1066    * versions in which the older interface has not been deprecated, we use the old interface. */
1067   if (aijmkl->no_SpMV2) {
1068     B->ops->mult             = MatMult_SeqAIJMKL;
1069     B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL;
1070     B->ops->multadd          = MatMultAdd_SeqAIJMKL;
1071     B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL;
1072   }
1073 #endif
1074 
1075   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqaijmkl_seqaij_C", MatConvert_SeqAIJMKL_SeqAIJ));
1076 
1077   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJMKL));
1078   *newmat = B;
1079   PetscFunctionReturn(0);
1080 }
1081 
1082 /*@C
1083    MatCreateSeqAIJMKL - Creates a sparse matrix of type `MATSEQAIJMKL`.
1084    This type inherits from `MATSEQAIJ` and is largely identical, but uses sparse BLAS
1085    routines from Intel MKL whenever possible.
1086    If the installed version of MKL supports the "SpMV2" sparse
1087    inspector-executor routines, then those are used by default.
1088    `MatMult()`, `MatMultAdd()`, `MatMultTranspose()`, `MatMultTransposeAdd()`, `MatMatMult()`, `MatTransposeMatMult()`, and `MatPtAP()`
1089    (for symmetric A) operations are currently supported.
1090    Note that MKL version 18, update 2 or later is required for `MatPtAP()`, `MatPtAPNumeric()` and `MatMatMultNumeric()`.
1091 
1092    Collective
1093 
1094    Input Parameters:
1095 +  comm - MPI communicator, set to `PETSC_COMM_SELF`
1096 .  m - number of rows
1097 .  n - number of columns
1098 .  nz - number of nonzeros per row (same for all rows)
1099 -  nnz - array containing the number of nonzeros in the various rows
1100          (possibly different for each row) or NULL
1101 
1102    Output Parameter:
1103 .  A - the matrix
1104 
1105    Options Database Keys:
1106 +  -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines
1107 -  -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, performing this step the first time the matrix is applied
1108 
1109    Note:
1110    If nnz is given then nz is ignored
1111 
1112    Level: intermediate
1113 
1114 .seealso: `MatCreate()`, `MatCreateMPIAIJMKL()`, `MatSetValues()`
1115 @*/
1116 PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A)
1117 {
1118   PetscFunctionBegin;
1119   PetscCall(MatCreate(comm, A));
1120   PetscCall(MatSetSizes(*A, m, n, m, n));
1121   PetscCall(MatSetType(*A, MATSEQAIJMKL));
1122   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(*A, nz, nnz));
1123   PetscFunctionReturn(0);
1124 }
1125 
1126 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A)
1127 {
1128   PetscFunctionBegin;
1129   PetscCall(MatSetType(A, MATSEQAIJ));
1130   PetscCall(MatConvert_SeqAIJ_SeqAIJMKL(A, MATSEQAIJMKL, MAT_INPLACE_MATRIX, &A));
1131   PetscFunctionReturn(0);
1132 }
1133