xref: /petsc/src/mat/impls/aij/seq/aijmkl/aijmkl.c (revision f13dfd9ea68e0ddeee984e65c377a1819eab8a8a)
1 /*
2   Defines basic operations for the MATSEQAIJMKL matrix class.
3   This class is derived from the MATSEQAIJ class and retains the
4   compressed row storage (aka Yale sparse matrix format) but uses
5   sparse BLAS operations from the Intel Math Kernel Library (MKL)
6   wherever possible.
7 */
8 
9 #include <../src/mat/impls/aij/seq/aij.h>
10 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h>
11 #if defined(PETSC_HAVE_MKL_INTEL_ILP64)
12   #define MKL_ILP64
13 #endif
14 #include <mkl_spblas.h>
15 
16 typedef struct {
17   PetscBool        no_SpMV2;         /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */
18   PetscBool        eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */
19   PetscBool        sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
20   PetscObjectState state;
21 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
22   sparse_matrix_t     csrA; /* "Handle" used by SpMV2 inspector-executor routines. */
23   struct matrix_descr descr;
24 #endif
25 } Mat_SeqAIJMKL;
26 
27 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat, MatAssemblyType);
28 
29 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A, MatType type, MatReuse reuse, Mat *newmat)
30 {
31   /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */
32   /* so we will ignore 'MatType type'. */
33   Mat B = *newmat;
34 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
35   Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
36 #endif
37 
38   PetscFunctionBegin;
39   if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B));
40 
41   /* Reset the original function pointers. */
42   B->ops->duplicate               = MatDuplicate_SeqAIJ;
43   B->ops->assemblyend             = MatAssemblyEnd_SeqAIJ;
44   B->ops->destroy                 = MatDestroy_SeqAIJ;
45   B->ops->mult                    = MatMult_SeqAIJ;
46   B->ops->multtranspose           = MatMultTranspose_SeqAIJ;
47   B->ops->multadd                 = MatMultAdd_SeqAIJ;
48   B->ops->multtransposeadd        = MatMultTransposeAdd_SeqAIJ;
49   B->ops->productsetfromoptions   = MatProductSetFromOptions_SeqAIJ;
50   B->ops->matmultsymbolic         = MatMatMultSymbolic_SeqAIJ_SeqAIJ;
51   B->ops->matmultnumeric          = MatMatMultNumeric_SeqAIJ_SeqAIJ;
52   B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ;
53   B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ;
54   B->ops->ptapnumeric             = MatPtAPNumeric_SeqAIJ_SeqAIJ;
55 
56   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqaijmkl_seqaij_C", NULL));
57 
58 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
59   /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this
60    * simply involves destroying the MKL sparse matrix handle and then freeing
61    * the spptr pointer. */
62   if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL *)B->spptr;
63 
64   if (aijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA);
65 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
66   PetscCall(PetscFree(B->spptr));
67 
68   /* Change the type of B to MATSEQAIJ. */
69   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ));
70 
71   *newmat = B;
72   PetscFunctionReturn(PETSC_SUCCESS);
73 }
74 
75 static PetscErrorCode MatDestroy_SeqAIJMKL(Mat A)
76 {
77   Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
78 
79   PetscFunctionBegin;
80   /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an spptr pointer. */
81   if (aijmkl) {
82     /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */
83 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
84     if (aijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA);
85 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
86     PetscCall(PetscFree(A->spptr));
87   }
88 
89   /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
90    * to destroy everything that remains. */
91   PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ));
92   /* I don't call MatSetType().  I believe this is because that
93    * is only to be called when *building* a matrix.  I could be wrong, but
94    * that is how things work for the SuperLU matrix class. */
95   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqaijmkl_seqaij_C", NULL));
96   PetscCall(MatDestroy_SeqAIJ(A));
97   PetscFunctionReturn(PETSC_SUCCESS);
98 }
99 
100 /* MatSeqAIJMKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it,
101  * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize().
102  * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix
103  * handle, creates a new one, and then calls mkl_sparse_optimize().
104  * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been
105  * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of
106  * an unoptimized matrix handle here. */
107 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A)
108 {
109 #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
110   /* If the MKL library does not have mkl_sparse_optimize(), then this routine
111    * does nothing. We make it callable anyway in this case because it cuts
112    * down on littering the code with #ifdefs. */
113   PetscFunctionBegin;
114   PetscFunctionReturn(PETSC_SUCCESS);
115 #else
116   Mat_SeqAIJ    *a      = (Mat_SeqAIJ *)A->data;
117   Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
118   PetscInt       m, n;
119   MatScalar     *aa;
120   PetscInt      *aj, *ai;
121 
122   PetscFunctionBegin;
123   #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
124   /* For MKL versions that still support the old, non-inspector-executor interfaces versions, we simply exit here if the no_SpMV2
125    * option has been specified. For versions that have deprecated the old interfaces (version 18, update 2 and later), we must
126    * use the new inspector-executor interfaces, but we can still use the old, non-inspector-executor code by not calling
127    * mkl_sparse_optimize() later. */
128   if (aijmkl->no_SpMV2) PetscFunctionReturn(PETSC_SUCCESS);
129   #endif
130 
131   if (aijmkl->sparse_optimized) {
132     /* Matrix has been previously assembled and optimized. Must destroy old
133      * matrix handle before running the optimization step again. */
134     PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA);
135   }
136   aijmkl->sparse_optimized = PETSC_FALSE;
137 
138   /* Now perform the SpMV2 setup and matrix optimization. */
139   aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
140   aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
141   aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
142   m                  = A->rmap->n;
143   n                  = A->cmap->n;
144   aj                 = a->j; /* aj[k] gives column index for element aa[k]. */
145   aa                 = a->a; /* Nonzero elements stored row-by-row. */
146   ai                 = a->i; /* ai[k] is the position in aa and aj where row k starts. */
147   if (a->nz && aa && !A->structure_only) {
148     /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
149      * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
150     PetscCallExternal(mkl_sparse_x_create_csr, &aijmkl->csrA, SPARSE_INDEX_BASE_ZERO, (MKL_INT)m, (MKL_INT)n, (MKL_INT *)ai, (MKL_INT *)(ai + 1), (MKL_INT *)aj, aa);
151     PetscCallExternal(mkl_sparse_set_mv_hint, aijmkl->csrA, SPARSE_OPERATION_NON_TRANSPOSE, aijmkl->descr, 1000);
152     PetscCallExternal(mkl_sparse_set_memory_hint, aijmkl->csrA, SPARSE_MEMORY_AGGRESSIVE);
153     if (!aijmkl->no_SpMV2) PetscCallExternal(mkl_sparse_optimize, aijmkl->csrA);
154     aijmkl->sparse_optimized = PETSC_TRUE;
155     PetscCall(PetscObjectStateGet((PetscObject)A, &aijmkl->state));
156   } else {
157     aijmkl->csrA = NULL;
158   }
159   PetscFunctionReturn(PETSC_SUCCESS);
160 #endif
161 }
162 
163 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
164 /* Take an already created but empty matrix and set up the nonzero structure from an MKL sparse matrix handle. */
165 static PetscErrorCode MatSeqAIJMKL_setup_structure_from_mkl_handle(MPI_Comm comm, sparse_matrix_t csrA, PetscInt nrows, PetscInt ncols, Mat A)
166 {
167   sparse_index_base_t indexing;
168   PetscInt            m, n;
169   PetscInt           *aj, *ai, *dummy;
170   MatScalar          *aa;
171   Mat_SeqAIJMKL      *aijmkl;
172 
173   PetscFunctionBegin;
174   if (csrA) {
175     /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
176     PetscCallExternal(mkl_sparse_x_export_csr, csrA, &indexing, (MKL_INT *)&m, (MKL_INT *)&n, (MKL_INT **)&ai, (MKL_INT **)&dummy, (MKL_INT **)&aj, &aa);
177     PetscCheck((m == nrows) && (n == ncols), PETSC_COMM_SELF, PETSC_ERR_LIB, "Number of rows/columns does not match those from mkl_sparse_x_export_csr()");
178   } else {
179     aj = ai = NULL;
180     aa      = NULL;
181   }
182 
183   PetscCall(MatSetType(A, MATSEQAIJ));
184   PetscCall(MatSetSizes(A, PETSC_DECIDE, PETSC_DECIDE, nrows, ncols));
185   /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported
186    * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and
187    * they will be destroyed when the MKL handle is destroyed.
188    * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */
189   if (csrA) {
190     PetscCall(MatSeqAIJSetPreallocationCSR(A, ai, aj, NULL));
191   } else {
192     /* Since MatSeqAIJSetPreallocationCSR does initial set up and assembly begin/end, we must do that ourselves here. */
193     PetscCall(MatSetUp(A));
194     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
195     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
196   }
197 
198   /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle.
199    * Now turn it into a MATSEQAIJMKL. */
200   PetscCall(MatConvert_SeqAIJ_SeqAIJMKL(A, MATSEQAIJMKL, MAT_INPLACE_MATRIX, &A));
201 
202   aijmkl       = (Mat_SeqAIJMKL *)A->spptr;
203   aijmkl->csrA = csrA;
204 
205   /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but
206    * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one,
207    * and just need to be able to run the MKL optimization step. */
208   aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
209   aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
210   aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
211   if (csrA) {
212     PetscCallExternal(mkl_sparse_set_mv_hint, aijmkl->csrA, SPARSE_OPERATION_NON_TRANSPOSE, aijmkl->descr, 1000);
213     PetscCallExternal(mkl_sparse_set_memory_hint, aijmkl->csrA, SPARSE_MEMORY_AGGRESSIVE);
214   }
215   PetscCall(PetscObjectStateGet((PetscObject)A, &aijmkl->state));
216   PetscFunctionReturn(PETSC_SUCCESS);
217 }
218 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
219 
220 /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle.
221  * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in
222  * MatMatMultNumeric(). */
223 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
224 static PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A)
225 {
226   PetscInt            i;
227   PetscInt            nrows, ncols;
228   PetscInt            nz;
229   PetscInt           *ai, *aj, *dummy;
230   PetscScalar        *aa;
231   Mat_SeqAIJMKL      *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
232   sparse_index_base_t indexing;
233 
234   PetscFunctionBegin;
235   /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */
236   if (!aijmkl->csrA) PetscFunctionReturn(PETSC_SUCCESS);
237 
238   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
239   PetscCallExternal(mkl_sparse_x_export_csr, aijmkl->csrA, &indexing, (MKL_INT *)&nrows, (MKL_INT *)&ncols, (MKL_INT **)&ai, (MKL_INT **)&dummy, (MKL_INT **)&aj, &aa);
240 
241   /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc
242    * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */
243   for (i = 0; i < nrows; i++) {
244     nz = ai[i + 1] - ai[i];
245     PetscCall(MatSetValues_SeqAIJ(A, 1, &i, nz, aj + ai[i], aa + ai[i], INSERT_VALUES));
246   }
247 
248   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
249   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
250 
251   PetscCall(PetscObjectStateGet((PetscObject)A, &aijmkl->state));
252   /* At this point our matrix has a valid MKL handle, the contents of which match the PETSc AIJ representation.
253    * The MKL handle has *not* had mkl_sparse_optimize() called on it, though -- the MKL developers have confirmed
254    * that the matrix inspection/optimization step is not performed when matrix-matrix multiplication is finalized. */
255   aijmkl->sparse_optimized = PETSC_FALSE;
256   PetscFunctionReturn(PETSC_SUCCESS);
257 }
258 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
259 
260 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
261 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_view_mkl_handle(Mat A, PetscViewer viewer)
262 {
263   PetscInt            i, j, k;
264   PetscInt            nrows, ncols;
265   PetscInt            nz;
266   PetscInt           *ai, *aj, *dummy;
267   PetscScalar        *aa;
268   Mat_SeqAIJMKL      *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
269   sparse_index_base_t indexing;
270 
271   PetscFunctionBegin;
272   PetscCall(PetscViewerASCIIPrintf(viewer, "Contents of MKL sparse matrix handle for MATSEQAIJMKL object:\n"));
273 
274   /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */
275   if (!aijmkl->csrA) {
276     PetscCall(PetscViewerASCIIPrintf(viewer, "MKL matrix handle is NULL\n"));
277     PetscFunctionReturn(PETSC_SUCCESS);
278   }
279 
280   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
281   PetscCallExternal(mkl_sparse_x_export_csr, aijmkl->csrA, &indexing, (MKL_INT *)&nrows, (MKL_INT *)&ncols, (MKL_INT **)&ai, (MKL_INT **)&dummy, (MKL_INT **)&aj, &aa);
282 
283   k = 0;
284   for (i = 0; i < nrows; i++) {
285     PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ": ", i));
286     nz = ai[i + 1] - ai[i];
287     for (j = 0; j < nz; j++) {
288       if (aa) {
289         PetscCall(PetscViewerASCIIPrintf(viewer, "(%" PetscInt_FMT ", %g)  ", aj[k], PetscRealPart(aa[k])));
290       } else {
291         PetscCall(PetscViewerASCIIPrintf(viewer, "(%" PetscInt_FMT ", NULL)", aj[k]));
292       }
293       k++;
294     }
295     PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
296   }
297   PetscFunctionReturn(PETSC_SUCCESS);
298 }
299 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
300 
301 static PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
302 {
303   Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
304   Mat_SeqAIJMKL *aijmkl_dest;
305 
306   PetscFunctionBegin;
307   PetscCall(MatDuplicate_SeqAIJ(A, op, M));
308   aijmkl_dest = (Mat_SeqAIJMKL *)(*M)->spptr;
309   PetscCall(PetscArraycpy(aijmkl_dest, aijmkl, 1));
310   aijmkl_dest->sparse_optimized = PETSC_FALSE;
311   if (aijmkl->eager_inspection) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
312   PetscFunctionReturn(PETSC_SUCCESS);
313 }
314 
315 static PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode)
316 {
317   Mat_SeqAIJ    *a = (Mat_SeqAIJ *)A->data;
318   Mat_SeqAIJMKL *aijmkl;
319 
320   PetscFunctionBegin;
321   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(PETSC_SUCCESS);
322 
323   /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some
324    * extra information and some different methods, call the AssemblyEnd
325    * routine for a MATSEQAIJ.
326    * I'm not sure if this is the best way to do this, but it avoids
327    * a lot of code duplication. */
328   a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */
329   PetscCall(MatAssemblyEnd_SeqAIJ(A, mode));
330 
331   /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks).
332    * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */
333   aijmkl = (Mat_SeqAIJMKL *)A->spptr;
334   if (aijmkl->eager_inspection) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
335   PetscFunctionReturn(PETSC_SUCCESS);
336 }
337 
338 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
339 static PetscErrorCode MatMult_SeqAIJMKL(Mat A, Vec xx, Vec yy)
340 {
341   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
342   const PetscScalar *x;
343   PetscScalar       *y;
344   const MatScalar   *aa;
345   PetscInt           m     = A->rmap->n;
346   PetscInt           n     = A->cmap->n;
347   PetscScalar        alpha = 1.0;
348   PetscScalar        beta  = 0.0;
349   const PetscInt    *aj, *ai;
350   char               matdescra[6];
351 
352   /* Variables not in MatMult_SeqAIJ. */
353   char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */
354 
355   PetscFunctionBegin;
356   matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
357   matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
358   PetscCall(VecGetArrayRead(xx, &x));
359   PetscCall(VecGetArray(yy, &y));
360   aj = a->j; /* aj[k] gives column index for element aa[k]. */
361   aa = a->a; /* Nonzero elements stored row-by-row. */
362   ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
363 
364   /* Call MKL sparse BLAS routine to do the MatMult. */
365   mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, y);
366 
367   PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt));
368   PetscCall(VecRestoreArrayRead(xx, &x));
369   PetscCall(VecRestoreArray(yy, &y));
370   PetscFunctionReturn(PETSC_SUCCESS);
371 }
372 #endif
373 
374 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
375 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy)
376 {
377   Mat_SeqAIJ        *a      = (Mat_SeqAIJ *)A->data;
378   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
379   const PetscScalar *x;
380   PetscScalar       *y;
381   PetscObjectState   state;
382 
383   PetscFunctionBegin;
384   /* If there are no nonzero entries, zero yy and return immediately. */
385   if (!a->nz) {
386     PetscCall(VecGetArray(yy, &y));
387     PetscCall(PetscArrayzero(y, A->rmap->n));
388     PetscCall(VecRestoreArray(yy, &y));
389     PetscFunctionReturn(PETSC_SUCCESS);
390   }
391 
392   PetscCall(VecGetArrayRead(xx, &x));
393   PetscCall(VecGetArray(yy, &y));
394 
395   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
396    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
397    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
398   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
399   if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
400 
401   /* Call MKL SpMV2 executor routine to do the MatMult. */
402   PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, y);
403 
404   PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt));
405   PetscCall(VecRestoreArrayRead(xx, &x));
406   PetscCall(VecRestoreArray(yy, &y));
407   PetscFunctionReturn(PETSC_SUCCESS);
408 }
409 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
410 
411 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
412 static PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A, Vec xx, Vec yy)
413 {
414   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
415   const PetscScalar *x;
416   PetscScalar       *y;
417   const MatScalar   *aa;
418   PetscInt           m     = A->rmap->n;
419   PetscInt           n     = A->cmap->n;
420   PetscScalar        alpha = 1.0;
421   PetscScalar        beta  = 0.0;
422   const PetscInt    *aj, *ai;
423   char               matdescra[6];
424 
425   /* Variables not in MatMultTranspose_SeqAIJ. */
426   char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */
427 
428   PetscFunctionBegin;
429   matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
430   matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
431   PetscCall(VecGetArrayRead(xx, &x));
432   PetscCall(VecGetArray(yy, &y));
433   aj = a->j; /* aj[k] gives column index for element aa[k]. */
434   aa = a->a; /* Nonzero elements stored row-by-row. */
435   ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
436 
437   /* Call MKL sparse BLAS routine to do the MatMult. */
438   mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, y);
439 
440   PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt));
441   PetscCall(VecRestoreArrayRead(xx, &x));
442   PetscCall(VecRestoreArray(yy, &y));
443   PetscFunctionReturn(PETSC_SUCCESS);
444 }
445 #endif
446 
447 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
448 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy)
449 {
450   Mat_SeqAIJ        *a      = (Mat_SeqAIJ *)A->data;
451   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
452   const PetscScalar *x;
453   PetscScalar       *y;
454   PetscObjectState   state;
455 
456   PetscFunctionBegin;
457   /* If there are no nonzero entries, zero yy and return immediately. */
458   if (!a->nz) {
459     PetscCall(VecGetArray(yy, &y));
460     PetscCall(PetscArrayzero(y, A->cmap->n));
461     PetscCall(VecRestoreArray(yy, &y));
462     PetscFunctionReturn(PETSC_SUCCESS);
463   }
464 
465   PetscCall(VecGetArrayRead(xx, &x));
466   PetscCall(VecGetArray(yy, &y));
467 
468   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
469    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
470    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
471   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
472   if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
473 
474   /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
475   PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, y);
476 
477   PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt));
478   PetscCall(VecRestoreArrayRead(xx, &x));
479   PetscCall(VecRestoreArray(yy, &y));
480   PetscFunctionReturn(PETSC_SUCCESS);
481 }
482 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
483 
484 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
485 static PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A, Vec xx, Vec yy, Vec zz)
486 {
487   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
488   const PetscScalar *x;
489   PetscScalar       *y, *z;
490   const MatScalar   *aa;
491   PetscInt           m = A->rmap->n;
492   PetscInt           n = A->cmap->n;
493   const PetscInt    *aj, *ai;
494   PetscInt           i;
495 
496   /* Variables not in MatMultAdd_SeqAIJ. */
497   char        transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */
498   PetscScalar alpha  = 1.0;
499   PetscScalar beta;
500   char        matdescra[6];
501 
502   PetscFunctionBegin;
503   matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
504   matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
505 
506   PetscCall(VecGetArrayRead(xx, &x));
507   PetscCall(VecGetArrayPair(yy, zz, &y, &z));
508   aj = a->j; /* aj[k] gives column index for element aa[k]. */
509   aa = a->a; /* Nonzero elements stored row-by-row. */
510   ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
511 
512   /* Call MKL sparse BLAS routine to do the MatMult. */
513   if (zz == yy) {
514     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
515     beta = 1.0;
516     mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z);
517   } else {
518     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
519      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
520     beta = 0.0;
521     mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z);
522     for (i = 0; i < m; i++) z[i] += y[i];
523   }
524 
525   PetscCall(PetscLogFlops(2.0 * a->nz));
526   PetscCall(VecRestoreArrayRead(xx, &x));
527   PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
528   PetscFunctionReturn(PETSC_SUCCESS);
529 }
530 #endif
531 
532 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
533 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy, Vec zz)
534 {
535   Mat_SeqAIJ        *a      = (Mat_SeqAIJ *)A->data;
536   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
537   const PetscScalar *x;
538   PetscScalar       *y, *z;
539   PetscInt           m = A->rmap->n;
540   PetscInt           i;
541 
542   /* Variables not in MatMultAdd_SeqAIJ. */
543   PetscObjectState state;
544 
545   PetscFunctionBegin;
546   /* If there are no nonzero entries, set zz = yy and return immediately. */
547   if (!a->nz) {
548     PetscCall(VecGetArrayPair(yy, zz, &y, &z));
549     PetscCall(PetscArraycpy(z, y, m));
550     PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
551     PetscFunctionReturn(PETSC_SUCCESS);
552   }
553 
554   PetscCall(VecGetArrayRead(xx, &x));
555   PetscCall(VecGetArrayPair(yy, zz, &y, &z));
556 
557   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
558    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
559    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
560   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
561   if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
562 
563   /* Call MKL sparse BLAS routine to do the MatMult. */
564   if (zz == yy) {
565     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
566      * with alpha and beta both set to 1.0. */
567     PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 1.0, z);
568   } else {
569     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
570      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
571     PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, z);
572     for (i = 0; i < m; i++) z[i] += y[i];
573   }
574 
575   PetscCall(PetscLogFlops(2.0 * a->nz));
576   PetscCall(VecRestoreArrayRead(xx, &x));
577   PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
578   PetscFunctionReturn(PETSC_SUCCESS);
579 }
580 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
581 
582 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
583 static PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A, Vec xx, Vec yy, Vec zz)
584 {
585   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
586   const PetscScalar *x;
587   PetscScalar       *y, *z;
588   const MatScalar   *aa;
589   PetscInt           m = A->rmap->n;
590   PetscInt           n = A->cmap->n;
591   const PetscInt    *aj, *ai;
592   PetscInt           i;
593 
594   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
595   char        transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */
596   PetscScalar alpha  = 1.0;
597   PetscScalar beta;
598   char        matdescra[6];
599 
600   PetscFunctionBegin;
601   matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
602   matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
603 
604   PetscCall(VecGetArrayRead(xx, &x));
605   PetscCall(VecGetArrayPair(yy, zz, &y, &z));
606   aj = a->j; /* aj[k] gives column index for element aa[k]. */
607   aa = a->a; /* Nonzero elements stored row-by-row. */
608   ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
609 
610   /* Call MKL sparse BLAS routine to do the MatMult. */
611   if (zz == yy) {
612     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
613     beta = 1.0;
614     mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z);
615   } else {
616     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
617      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
618     beta = 0.0;
619     mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z);
620     for (i = 0; i < n; i++) z[i] += y[i];
621   }
622 
623   PetscCall(PetscLogFlops(2.0 * a->nz));
624   PetscCall(VecRestoreArrayRead(xx, &x));
625   PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
626   PetscFunctionReturn(PETSC_SUCCESS);
627 }
628 #endif
629 
630 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
631 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy, Vec zz)
632 {
633   Mat_SeqAIJ        *a      = (Mat_SeqAIJ *)A->data;
634   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
635   const PetscScalar *x;
636   PetscScalar       *y, *z;
637   PetscInt           n = A->cmap->n;
638   PetscInt           i;
639   PetscObjectState   state;
640 
641   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
642 
643   PetscFunctionBegin;
644   /* If there are no nonzero entries, set zz = yy and return immediately. */
645   if (!a->nz) {
646     PetscCall(VecGetArrayPair(yy, zz, &y, &z));
647     PetscCall(PetscArraycpy(z, y, n));
648     PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
649     PetscFunctionReturn(PETSC_SUCCESS);
650   }
651 
652   PetscCall(VecGetArrayRead(xx, &x));
653   PetscCall(VecGetArrayPair(yy, zz, &y, &z));
654 
655   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
656    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
657    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
658   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
659   if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
660 
661   /* Call MKL sparse BLAS routine to do the MatMult. */
662   if (zz == yy) {
663     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
664      * with alpha and beta both set to 1.0. */
665     PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 1.0, z);
666   } else {
667     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
668      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
669     PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, z);
670     for (i = 0; i < n; i++) z[i] += y[i];
671   }
672 
673   PetscCall(PetscLogFlops(2.0 * a->nz));
674   PetscCall(VecRestoreArrayRead(xx, &x));
675   PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
676   PetscFunctionReturn(PETSC_SUCCESS);
677 }
678 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
679 
680 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
681 static PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(Mat A, const sparse_operation_t transA, Mat B, const sparse_operation_t transB, Mat C)
682 {
683   Mat_SeqAIJMKL      *a = (Mat_SeqAIJMKL *)A->spptr, *b = (Mat_SeqAIJMKL *)B->spptr;
684   sparse_matrix_t     csrA, csrB, csrC;
685   PetscInt            nrows, ncols;
686   struct matrix_descr descr_type_gen;
687   PetscObjectState    state;
688 
689   PetscFunctionBegin;
690   /* Determine the number of rows and columns that the result matrix C will have. We have to do this ourselves because MKL does
691    * not handle sparse matrices with zero rows or columns. */
692   if (transA == SPARSE_OPERATION_NON_TRANSPOSE) nrows = A->rmap->N;
693   else nrows = A->cmap->N;
694   if (transB == SPARSE_OPERATION_NON_TRANSPOSE) ncols = B->cmap->N;
695   else ncols = B->rmap->N;
696 
697   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
698   if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
699   PetscCall(PetscObjectStateGet((PetscObject)B, &state));
700   if (!b->sparse_optimized || b->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(B));
701   csrA                = a->csrA;
702   csrB                = b->csrA;
703   descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
704 
705   if (csrA && csrB) {
706     PetscCallExternal(mkl_sparse_sp2m, transA, descr_type_gen, csrA, transB, descr_type_gen, csrB, SPARSE_STAGE_FULL_MULT_NO_VAL, &csrC);
707   } else {
708     csrC = NULL;
709   }
710 
711   PetscCall(MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF, csrC, nrows, ncols, C));
712   PetscFunctionReturn(PETSC_SUCCESS);
713 }
714 
715 PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(Mat A, const sparse_operation_t transA, Mat B, const sparse_operation_t transB, Mat C)
716 {
717   Mat_SeqAIJMKL      *a = (Mat_SeqAIJMKL *)A->spptr, *b = (Mat_SeqAIJMKL *)B->spptr, *c = (Mat_SeqAIJMKL *)C->spptr;
718   sparse_matrix_t     csrA, csrB, csrC;
719   struct matrix_descr descr_type_gen;
720   PetscObjectState    state;
721 
722   PetscFunctionBegin;
723   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
724   if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
725   PetscCall(PetscObjectStateGet((PetscObject)B, &state));
726   if (!b->sparse_optimized || b->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(B));
727   csrA                = a->csrA;
728   csrB                = b->csrA;
729   csrC                = c->csrA;
730   descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
731 
732   if (csrA && csrB) {
733     PetscCallExternal(mkl_sparse_sp2m, transA, descr_type_gen, csrA, transB, descr_type_gen, csrB, SPARSE_STAGE_FINALIZE_MULT, &csrC);
734   } else {
735     csrC = NULL;
736   }
737 
738   /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */
739   PetscCall(MatSeqAIJMKL_update_from_mkl_handle(C));
740   PetscFunctionReturn(PETSC_SUCCESS);
741 }
742 
743 PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C)
744 {
745   PetscFunctionBegin;
746   PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C));
747   PetscFunctionReturn(PETSC_SUCCESS);
748 }
749 
750 PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C)
751 {
752   PetscFunctionBegin;
753   PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C));
754   PetscFunctionReturn(PETSC_SUCCESS);
755 }
756 
757 PetscErrorCode MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C)
758 {
759   PetscFunctionBegin;
760   PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C));
761   PetscFunctionReturn(PETSC_SUCCESS);
762 }
763 
764 PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C)
765 {
766   PetscFunctionBegin;
767   PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C));
768   PetscFunctionReturn(PETSC_SUCCESS);
769 }
770 
771 PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C)
772 {
773   PetscFunctionBegin;
774   PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_TRANSPOSE, C));
775   PetscFunctionReturn(PETSC_SUCCESS);
776 }
777 
778 PetscErrorCode MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C)
779 {
780   PetscFunctionBegin;
781   PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_TRANSPOSE, C));
782   PetscFunctionReturn(PETSC_SUCCESS);
783 }
784 
785 static PetscErrorCode MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL(Mat C)
786 {
787   Mat_Product *product = C->product;
788   Mat          A = product->A, B = product->B;
789 
790   PetscFunctionBegin;
791   PetscCall(MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(A, B, C));
792   PetscFunctionReturn(PETSC_SUCCESS);
793 }
794 
795 static PetscErrorCode MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL(Mat C)
796 {
797   Mat_Product *product = C->product;
798   Mat          A = product->A, B = product->B;
799   PetscReal    fill = product->fill;
800 
801   PetscFunctionBegin;
802   PetscCall(MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(A, B, fill, C));
803   C->ops->productnumeric = MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL;
804   PetscFunctionReturn(PETSC_SUCCESS);
805 }
806 
807 PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat A, Mat P, Mat C)
808 {
809   Mat                 Ct;
810   Vec                 zeros;
811   Mat_SeqAIJMKL      *a = (Mat_SeqAIJMKL *)A->spptr, *p = (Mat_SeqAIJMKL *)P->spptr, *c = (Mat_SeqAIJMKL *)C->spptr;
812   sparse_matrix_t     csrA, csrP, csrC;
813   PetscBool           set, flag;
814   struct matrix_descr descr_type_sym;
815   PetscObjectState    state;
816 
817   PetscFunctionBegin;
818   PetscCall(MatIsSymmetricKnown(A, &set, &flag));
819   PetscCheck(set && flag, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal() called on matrix A not marked as symmetric");
820 
821   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
822   if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
823   PetscCall(PetscObjectStateGet((PetscObject)P, &state));
824   if (!p->sparse_optimized || p->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(P));
825   csrA                = a->csrA;
826   csrP                = p->csrA;
827   csrC                = c->csrA;
828   descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
829   descr_type_sym.mode = SPARSE_FILL_MODE_UPPER;
830   descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
831 
832   /* the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
833   PetscCallExternal(mkl_sparse_sypr, SPARSE_OPERATION_TRANSPOSE, csrP, csrA, descr_type_sym, &csrC, SPARSE_STAGE_FINALIZE_MULT);
834 
835   /* Update the PETSc AIJ representation for matrix C from contents of MKL handle.
836    * This is more complicated than it should be: it turns out that, though mkl_sparse_sypr() will accept a full AIJ/CSR matrix,
837    * the output matrix only contains the upper or lower triangle (we arbitrarily have chosen upper) of the symmetric matrix.
838    * We have to fill in the missing portion, which we currently do below by forming the transpose and performing at MatAXPY
839    * operation. This may kill any performance benefit of using the optimized mkl_sparse_sypr() routine. Performance might
840    * improve if we come up with a more efficient way to do this, or we can convince the MKL team to provide an option to output
841    * the full matrix. */
842   PetscCall(MatSeqAIJMKL_update_from_mkl_handle(C));
843   PetscCall(MatTranspose(C, MAT_INITIAL_MATRIX, &Ct));
844   PetscCall(MatCreateVecs(C, &zeros, NULL));
845   PetscCall(VecSetFromOptions(zeros));
846   PetscCall(VecZeroEntries(zeros));
847   PetscCall(MatDiagonalSet(Ct, zeros, INSERT_VALUES));
848   PetscCall(MatAXPY(C, 1.0, Ct, DIFFERENT_NONZERO_PATTERN));
849   /* Note: The MatAXPY() call destroys the MatProduct, so we must recreate it. */
850   PetscCall(MatProductCreateWithMat(A, P, NULL, C));
851   PetscCall(MatProductSetType(C, MATPRODUCT_PtAP));
852   PetscCall(MatSeqAIJMKL_create_mkl_handle(C));
853   PetscCall(VecDestroy(&zeros));
854   PetscCall(MatDestroy(&Ct));
855   PetscFunctionReturn(PETSC_SUCCESS);
856 }
857 
858 PetscErrorCode MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat C)
859 {
860   Mat_Product        *product = C->product;
861   Mat                 A = product->A, P = product->B;
862   Mat_SeqAIJMKL      *a = (Mat_SeqAIJMKL *)A->spptr, *p = (Mat_SeqAIJMKL *)P->spptr;
863   sparse_matrix_t     csrA, csrP, csrC;
864   struct matrix_descr descr_type_sym;
865   PetscObjectState    state;
866 
867   PetscFunctionBegin;
868   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
869   if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
870   PetscCall(PetscObjectStateGet((PetscObject)P, &state));
871   if (!p->sparse_optimized || p->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(P));
872   csrA                = a->csrA;
873   csrP                = p->csrA;
874   descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
875   descr_type_sym.mode = SPARSE_FILL_MODE_UPPER;
876   descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
877 
878   /* the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
879   if (csrP && csrA) {
880     PetscCallExternal(mkl_sparse_sypr, SPARSE_OPERATION_TRANSPOSE, csrP, csrA, descr_type_sym, &csrC, SPARSE_STAGE_FULL_MULT_NO_VAL);
881   } else {
882     csrC = NULL;
883   }
884 
885   /* Update the I and J arrays of the PETSc AIJ representation for matrix C from contents of MKL handle.
886    * Note that, because mkl_sparse_sypr() only computes one triangle of the symmetric matrix, this representation will only contain
887    * the upper triangle of the symmetric matrix. We fix this in MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(). I believe that
888    * leaving things in this incomplete state is OK because the numeric product should follow soon after, but am not certain if this
889    * is guaranteed. */
890   PetscCall(MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF, csrC, P->cmap->N, P->cmap->N, C));
891 
892   C->ops->productnumeric = MatProductNumeric_PtAP;
893   PetscFunctionReturn(PETSC_SUCCESS);
894 }
895 
896 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AB(Mat C)
897 {
898   PetscFunctionBegin;
899   C->ops->productsymbolic = MatProductSymbolic_AB;
900   C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL;
901   PetscFunctionReturn(PETSC_SUCCESS);
902 }
903 
904 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AtB(Mat C)
905 {
906   PetscFunctionBegin;
907   C->ops->productsymbolic = MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL;
908   PetscFunctionReturn(PETSC_SUCCESS);
909 }
910 
911 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABt(Mat C)
912 {
913   PetscFunctionBegin;
914   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ;
915   C->ops->productsymbolic          = MatProductSymbolic_ABt;
916   PetscFunctionReturn(PETSC_SUCCESS);
917 }
918 
919 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_PtAP(Mat C)
920 {
921   Mat_Product *product = C->product;
922   Mat          A       = product->A;
923   PetscBool    set, flag;
924 
925   PetscFunctionBegin;
926   if (PetscDefined(USE_COMPLEX)) {
927     /* By setting C->ops->productsymbolic to NULL, we ensure that MatProductSymbolic_Unsafe() will be used.
928      * We do this in several other locations in this file. This works for the time being, but these
929      * routines are considered unsafe and may be removed from the MatProduct code in the future.
930      * TODO: Add proper MATSEQAIJMKL implementations */
931     C->ops->productsymbolic = NULL;
932   } else {
933     /* AIJMKL only has an optimized routine for PtAP when A is symmetric and real. */
934     PetscCall(MatIsSymmetricKnown(A, &set, &flag));
935     if (set && flag) C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal;
936     else C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
937     /* we don't set C->ops->productnumeric here, as this must happen in MatProductSymbolic_PtAP_XXX(),
938      * depending on whether the algorithm for the general case vs. the real symmetric one is used. */
939   }
940   PetscFunctionReturn(PETSC_SUCCESS);
941 }
942 
943 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_RARt(Mat C)
944 {
945   PetscFunctionBegin;
946   C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
947   PetscFunctionReturn(PETSC_SUCCESS);
948 }
949 
950 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABC(Mat C)
951 {
952   PetscFunctionBegin;
953   C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
954   PetscFunctionReturn(PETSC_SUCCESS);
955 }
956 
957 PetscErrorCode MatProductSetFromOptions_SeqAIJMKL(Mat C)
958 {
959   Mat_Product *product = C->product;
960 
961   PetscFunctionBegin;
962   switch (product->type) {
963   case MATPRODUCT_AB:
964     PetscCall(MatProductSetFromOptions_SeqAIJMKL_AB(C));
965     break;
966   case MATPRODUCT_AtB:
967     PetscCall(MatProductSetFromOptions_SeqAIJMKL_AtB(C));
968     break;
969   case MATPRODUCT_ABt:
970     PetscCall(MatProductSetFromOptions_SeqAIJMKL_ABt(C));
971     break;
972   case MATPRODUCT_PtAP:
973     PetscCall(MatProductSetFromOptions_SeqAIJMKL_PtAP(C));
974     break;
975   case MATPRODUCT_RARt:
976     PetscCall(MatProductSetFromOptions_SeqAIJMKL_RARt(C));
977     break;
978   case MATPRODUCT_ABC:
979     PetscCall(MatProductSetFromOptions_SeqAIJMKL_ABC(C));
980     break;
981   default:
982     break;
983   }
984   PetscFunctionReturn(PETSC_SUCCESS);
985 }
986 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
987 
988 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a
989  * SeqAIJMKL matrix.  This routine is called by the MatCreate_SeqAIJMKL()
990  * routine, but can also be used to convert an assembled SeqAIJ matrix
991  * into a SeqAIJMKL one. */
992 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A, MatType type, MatReuse reuse, Mat *newmat)
993 {
994   Mat            B = *newmat;
995   Mat_SeqAIJMKL *aijmkl;
996   PetscBool      set;
997   PetscBool      sametype;
998 
999   PetscFunctionBegin;
1000   if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B));
1001 
1002   PetscCall(PetscObjectTypeCompare((PetscObject)A, type, &sametype));
1003   if (sametype) PetscFunctionReturn(PETSC_SUCCESS);
1004 
1005   PetscCall(PetscNew(&aijmkl));
1006   B->spptr = (void *)aijmkl;
1007 
1008   /* Set function pointers for methods that we inherit from AIJ but override.
1009    * We also parse some command line options below, since those determine some of the methods we point to. */
1010   B->ops->duplicate   = MatDuplicate_SeqAIJMKL;
1011   B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL;
1012   B->ops->destroy     = MatDestroy_SeqAIJMKL;
1013 
1014   aijmkl->sparse_optimized = PETSC_FALSE;
1015 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1016   aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */
1017 #else
1018   aijmkl->no_SpMV2 = PETSC_TRUE;
1019 #endif
1020   aijmkl->eager_inspection = PETSC_FALSE;
1021 
1022   /* Parse command line options. */
1023   PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "AIJMKL Options", "Mat");
1024   PetscCall(PetscOptionsBool("-mat_aijmkl_no_spmv2", "Disable use of inspector-executor (SpMV 2) routines", "None", (PetscBool)aijmkl->no_SpMV2, (PetscBool *)&aijmkl->no_SpMV2, &set));
1025   PetscCall(PetscOptionsBool("-mat_aijmkl_eager_inspection", "Run inspection at matrix assembly time, instead of waiting until needed by an operation", "None", (PetscBool)aijmkl->eager_inspection, (PetscBool *)&aijmkl->eager_inspection, &set));
1026   PetscOptionsEnd();
1027 #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1028   if (!aijmkl->no_SpMV2) {
1029     PetscCall(PetscInfo(B, "User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize();  defaulting to non-SpMV2 routines.\n"));
1030     aijmkl->no_SpMV2 = PETSC_TRUE;
1031   }
1032 #endif
1033 
1034 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1035   B->ops->mult             = MatMult_SeqAIJMKL_SpMV2;
1036   B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL_SpMV2;
1037   B->ops->multadd          = MatMultAdd_SeqAIJMKL_SpMV2;
1038   B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2;
1039   #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
1040   B->ops->productsetfromoptions   = MatProductSetFromOptions_SeqAIJMKL;
1041   B->ops->matmultsymbolic         = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL;
1042   B->ops->matmultnumeric          = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL;
1043   B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL;
1044   B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL;
1045     #if !defined(PETSC_USE_COMPLEX)
1046   B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal;
1047     #else
1048   B->ops->ptapnumeric = NULL;
1049     #endif
1050   #endif
1051 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
1052 
1053 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
1054   /* In MKL version 18, update 2, the old sparse BLAS interfaces were marked as deprecated. If "no_SpMV2" has been specified by the
1055    * user and the old SpBLAS interfaces are deprecated in our MKL version, we use the new _SpMV2 routines (set above), but do not
1056    * call mkl_sparse_optimize(), which results in the old numerical kernels (without the inspector-executor model) being used. For
1057    * versions in which the older interface has not been deprecated, we use the old interface. */
1058   if (aijmkl->no_SpMV2) {
1059     B->ops->mult             = MatMult_SeqAIJMKL;
1060     B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL;
1061     B->ops->multadd          = MatMultAdd_SeqAIJMKL;
1062     B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL;
1063   }
1064 #endif
1065 
1066   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqaijmkl_seqaij_C", MatConvert_SeqAIJMKL_SeqAIJ));
1067 
1068   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJMKL));
1069   *newmat = B;
1070   PetscFunctionReturn(PETSC_SUCCESS);
1071 }
1072 
1073 /*@C
1074   MatCreateSeqAIJMKL - Creates a sparse matrix of type `MATSEQAIJMKL`.
1075 
1076   Collective
1077 
1078   Input Parameters:
1079 + comm - MPI communicator, set to `PETSC_COMM_SELF`
1080 . m    - number of rows
1081 . n    - number of columns
1082 . nz   - number of nonzeros per row (same for all rows)
1083 - nnz  - array containing the number of nonzeros in the various rows
1084          (possibly different for each row) or `NULL`
1085 
1086   Output Parameter:
1087 . A - the matrix
1088 
1089   Options Database Keys:
1090 + -mat_aijmkl_no_spmv2         - disable use of the SpMV2 inspector-executor routines
1091 - -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection,
1092                                   performing this step the first time the matrix is applied
1093 
1094   Level: intermediate
1095 
1096   Notes:
1097   If `nnz` is given then `nz` is ignored
1098 
1099   This type inherits from `MATSEQAIJ` and is largely identical, but uses sparse BLAS
1100   routines from Intel MKL whenever possible.
1101 
1102   If the installed version of MKL supports the "SpMV2" sparse
1103   inspector-executor routines, then those are used by default.
1104 
1105   `MatMult()`, `MatMultAdd()`, `MatMultTranspose()`, `MatMultTransposeAdd()`, `MatMatMult()`, `MatTransposeMatMult()`, and `MatPtAP()`
1106   (for symmetric A) operations are currently supported.
1107   MKL version 18, update 2 or later is required for `MatPtAP()`, `MatPtAPNumeric()` and `MatMatMultNumeric()`.
1108 
1109 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateMPIAIJMKL()`, `MatSetValues()`
1110 @*/
1111 PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A)
1112 {
1113   PetscFunctionBegin;
1114   PetscCall(MatCreate(comm, A));
1115   PetscCall(MatSetSizes(*A, m, n, m, n));
1116   PetscCall(MatSetType(*A, MATSEQAIJMKL));
1117   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(*A, nz, nnz));
1118   PetscFunctionReturn(PETSC_SUCCESS);
1119 }
1120 
1121 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A)
1122 {
1123   PetscFunctionBegin;
1124   PetscCall(MatSetType(A, MATSEQAIJ));
1125   PetscCall(MatConvert_SeqAIJ_SeqAIJMKL(A, MATSEQAIJMKL, MAT_INPLACE_MATRIX, &A));
1126   PetscFunctionReturn(PETSC_SUCCESS);
1127 }
1128