xref: /petsc/src/mat/impls/aij/seq/aijmkl/aijmkl.c (revision 41417bb58ccc2dfdcf1f98b94d1e30a9a42a4648)
1 
2 /*
3   Defines basic operations for the MATSEQAIJMKL matrix class.
4   This class is derived from the MATSEQAIJ class and retains the
5   compressed row storage (aka Yale sparse matrix format) but uses
6   sparse BLAS operations from the Intel Math Kernel Library (MKL)
7   wherever possible.
8 */
9 
10 #include <../src/mat/impls/aij/seq/aij.h>
11 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h>
12 #include <mkl_spblas.h>
13 
14 typedef struct {
15   PetscBool           no_SpMV2;  /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */
16   PetscBool           eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */
17   PetscBool           sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
18   PetscObjectState    state;
19 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
20   sparse_matrix_t     csrA; /* "Handle" used by SpMV2 inspector-executor routines. */
21   struct matrix_descr descr;
22 #endif
23 } Mat_SeqAIJMKL;
24 
25 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType);
26 
27 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat)
28 {
29   /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */
30   /* so we will ignore 'MatType type'. */
31   Mat            B       = *newmat;
32 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
33   Mat_SeqAIJMKL  *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
34 #endif
35 
36   PetscFunctionBegin;
37   if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A,MAT_COPY_VALUES,&B));
38 
39   /* Reset the original function pointers. */
40   B->ops->duplicate               = MatDuplicate_SeqAIJ;
41   B->ops->assemblyend             = MatAssemblyEnd_SeqAIJ;
42   B->ops->destroy                 = MatDestroy_SeqAIJ;
43   B->ops->mult                    = MatMult_SeqAIJ;
44   B->ops->multtranspose           = MatMultTranspose_SeqAIJ;
45   B->ops->multadd                 = MatMultAdd_SeqAIJ;
46   B->ops->multtransposeadd        = MatMultTransposeAdd_SeqAIJ;
47   B->ops->productsetfromoptions   = MatProductSetFromOptions_SeqAIJ;
48   B->ops->matmultsymbolic         = MatMatMultSymbolic_SeqAIJ_SeqAIJ;
49   B->ops->matmultnumeric          = MatMatMultNumeric_SeqAIJ_SeqAIJ;
50   B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ;
51   B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ;
52   B->ops->ptapnumeric             = MatPtAPNumeric_SeqAIJ_SeqAIJ;
53 
54   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL));
55 
56 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
57   /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this
58    * simply involves destroying the MKL sparse matrix handle and then freeing
59    * the spptr pointer. */
60   if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr;
61 
62   if (aijmkl->sparse_optimized) PetscStackCallStandard(mkl_sparse_destroy,aijmkl->csrA);
63 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
64   PetscCall(PetscFree(B->spptr));
65 
66   /* Change the type of B to MATSEQAIJ. */
67   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ));
68 
69   *newmat = B;
70   PetscFunctionReturn(0);
71 }
72 
73 PetscErrorCode MatDestroy_SeqAIJMKL(Mat A)
74 {
75   Mat_SeqAIJMKL  *aijmkl = (Mat_SeqAIJMKL*) A->spptr;
76 
77   PetscFunctionBegin;
78 
79   /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an spptr pointer. */
80   if (aijmkl) {
81     /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */
82 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
83     if (aijmkl->sparse_optimized) PetscStackCallStandard(mkl_sparse_destroy,aijmkl->csrA);
84 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
85     PetscCall(PetscFree(A->spptr));
86   }
87 
88   /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
89    * to destroy everything that remains. */
90   PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ));
91   /* Note that I don't call MatSetType().  I believe this is because that
92    * is only to be called when *building* a matrix.  I could be wrong, but
93    * that is how things work for the SuperLU matrix class. */
94   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqaijmkl_seqaij_C",NULL));
95   PetscCall(MatDestroy_SeqAIJ(A));
96   PetscFunctionReturn(0);
97 }
98 
99 /* MatSeqAIJMKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it,
100  * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize().
101  * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix
102  * handle, creates a new one, and then calls mkl_sparse_optimize().
103  * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been
104  * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of
105  * an unoptimized matrix handle here. */
106 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A)
107 {
108 #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
109   /* If the MKL library does not have mkl_sparse_optimize(), then this routine
110    * does nothing. We make it callable anyway in this case because it cuts
111    * down on littering the code with #ifdefs. */
112   PetscFunctionBegin;
113   PetscFunctionReturn(0);
114 #else
115   Mat_SeqAIJ       *a = (Mat_SeqAIJ*)A->data;
116   Mat_SeqAIJMKL    *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
117   PetscInt         m,n;
118   MatScalar        *aa;
119   PetscInt         *aj,*ai;
120 
121   PetscFunctionBegin;
122 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
123   /* For MKL versions that still support the old, non-inspector-executor interfaces versions, we simply exit here if the no_SpMV2
124    * option has been specified. For versions that have deprecated the old interfaces (version 18, update 2 and later), we must
125    * use the new inspector-executor interfaces, but we can still use the old, non-inspector-executor code by not calling
126    * mkl_sparse_optimize() later. */
127   if (aijmkl->no_SpMV2) PetscFunctionReturn(0);
128 #endif
129 
130   if (aijmkl->sparse_optimized) {
131     /* Matrix has been previously assembled and optimized. Must destroy old
132      * matrix handle before running the optimization step again. */
133     PetscStackCallStandard(mkl_sparse_destroy,aijmkl->csrA);
134   }
135   aijmkl->sparse_optimized = PETSC_FALSE;
136 
137   /* Now perform the SpMV2 setup and matrix optimization. */
138   aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
139   aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
140   aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
141   m = A->rmap->n;
142   n = A->cmap->n;
143   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
144   aa   = a->a;  /* Nonzero elements stored row-by-row. */
145   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
146   if (a->nz && aa && !A->structure_only) {
147     /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
148      * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
149     PetscStackCallStandard(mkl_sparse_x_create_csr,&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa);
150     PetscStackCallStandard(mkl_sparse_set_mv_hint,aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
151     PetscStackCallStandard(mkl_sparse_set_memory_hint,aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
152     if (!aijmkl->no_SpMV2) {
153       PetscStackCallStandard(mkl_sparse_optimize,aijmkl->csrA);
154     }
155     aijmkl->sparse_optimized = PETSC_TRUE;
156     PetscCall(PetscObjectStateGet((PetscObject)A,&(aijmkl->state)));
157   } else {
158     aijmkl->csrA = PETSC_NULL;
159   }
160 
161   PetscFunctionReturn(0);
162 #endif
163 }
164 
165 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
166 /* Take an already created but empty matrix and set up the nonzero structure from an MKL sparse matrix handle. */
167 static PetscErrorCode MatSeqAIJMKL_setup_structure_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,PetscInt nrows,PetscInt ncols,Mat A)
168 {
169   sparse_index_base_t indexing;
170   PetscInt            m,n;
171   PetscInt            *aj,*ai,*dummy;
172   MatScalar           *aa;
173   Mat_SeqAIJMKL       *aijmkl;
174 
175   PetscFunctionBegin;
176   if (csrA) {
177     /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
178     PetscStackCallStandard(mkl_sparse_x_export_csr,csrA,&indexing,&m,&n,&ai,&dummy,&aj,&aa);
179     PetscCheck((m == nrows) && (n == ncols),PETSC_COMM_SELF,PETSC_ERR_LIB,"Number of rows/columns does not match those from mkl_sparse_x_export_csr()");
180   } else {
181     aj = ai = PETSC_NULL;
182     aa = PETSC_NULL;
183   }
184 
185   PetscCall(MatSetType(A,MATSEQAIJ));
186   PetscCall(MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,nrows,ncols));
187   /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported
188    * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and
189    * they will be destroyed when the MKL handle is destroyed.
190    * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */
191   if (csrA) {
192     PetscCall(MatSeqAIJSetPreallocationCSR(A,ai,aj,NULL));
193   } else {
194     /* Since MatSeqAIJSetPreallocationCSR does initial set up and assembly begin/end, we must do that ourselves here. */
195     PetscCall(MatSetUp(A));
196     PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY));
197     PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY));
198   }
199 
200   /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle.
201    * Now turn it into a MATSEQAIJMKL. */
202   PetscCall(MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A));
203 
204   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
205   aijmkl->csrA = csrA;
206 
207   /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but
208    * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one,
209    * and just need to be able to run the MKL optimization step. */
210   aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
211   aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
212   aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
213   if (csrA) {
214     PetscStackCallStandard(mkl_sparse_set_mv_hint,aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
215     PetscStackCallStandard(mkl_sparse_set_memory_hint,aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
216   }
217   PetscCall(PetscObjectStateGet((PetscObject)A,&(aijmkl->state)));
218   PetscFunctionReturn(0);
219 }
220 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
221 
222 /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle.
223  * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in
224  * MatMatMultNumeric(). */
225 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
226 static PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A)
227 {
228   PetscInt            i;
229   PetscInt            nrows,ncols;
230   PetscInt            nz;
231   PetscInt            *ai,*aj,*dummy;
232   PetscScalar         *aa;
233   Mat_SeqAIJMKL       *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
234   sparse_index_base_t indexing;
235 
236   PetscFunctionBegin;
237   /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */
238   if (!aijmkl->csrA) PetscFunctionReturn(0);
239 
240   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
241   PetscStackCallStandard(mkl_sparse_x_export_csr,aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);
242 
243   /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc
244    * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */
245   for (i=0; i<nrows; i++) {
246     nz = ai[i+1] - ai[i];
247     PetscCall(MatSetValues_SeqAIJ(A, 1, &i, nz, aj+ai[i], aa+ai[i], INSERT_VALUES));
248   }
249 
250   PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY));
251   PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY));
252 
253   PetscCall(PetscObjectStateGet((PetscObject)A,&(aijmkl->state)));
254   /* At this point our matrix has a valid MKL handle, the contents of which match the PETSc AIJ representation.
255    * The MKL handle has *not* had mkl_sparse_optimize() called on it, though -- the MKL developers have confirmed
256    * that the matrix inspection/optimization step is not performed when matrix-matrix multiplication is finalized. */
257   aijmkl->sparse_optimized = PETSC_FALSE;
258   PetscFunctionReturn(0);
259 }
260 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
261 
262 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
263 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_view_mkl_handle(Mat A,PetscViewer viewer)
264 {
265   PetscInt            i,j,k;
266   PetscInt            nrows,ncols;
267   PetscInt            nz;
268   PetscInt            *ai,*aj,*dummy;
269   PetscScalar         *aa;
270   Mat_SeqAIJMKL       *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
271   sparse_index_base_t indexing;
272 
273   PetscFunctionBegin;
274   PetscCall(PetscViewerASCIIPrintf(viewer,"Contents of MKL sparse matrix handle for MATSEQAIJMKL object:\n"));
275 
276   /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */
277   if (!aijmkl->csrA) {
278     PetscCall(PetscViewerASCIIPrintf(viewer,"MKL matrix handle is NULL\n"));
279     PetscFunctionReturn(0);
280   }
281 
282   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
283   PetscStackCallStandard(mkl_sparse_x_export_csr,aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);
284 
285   k = 0;
286   for (i=0; i<nrows; i++) {
287     PetscCall(PetscViewerASCIIPrintf(viewer,"row %" PetscInt_FMT ": ",i));
288     nz = ai[i+1] - ai[i];
289     for (j=0; j<nz; j++) {
290       if (aa) {
291         PetscCall(PetscViewerASCIIPrintf(viewer,"(%" PetscInt_FMT ", %g)  ",aj[k],PetscRealPart(aa[k])));
292       } else {
293         PetscCall(PetscViewerASCIIPrintf(viewer,"(%" PetscInt_FMT ", NULL)",aj[k]));
294       }
295       k++;
296     }
297     PetscCall(PetscViewerASCIIPrintf(viewer,"\n"));
298   }
299   PetscFunctionReturn(0);
300 }
301 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
302 
303 PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
304 {
305   Mat_SeqAIJMKL  *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
306   Mat_SeqAIJMKL  *aijmkl_dest;
307 
308   PetscFunctionBegin;
309   PetscCall(MatDuplicate_SeqAIJ(A,op,M));
310   aijmkl_dest = (Mat_SeqAIJMKL*)(*M)->spptr;
311   PetscCall(PetscArraycpy(aijmkl_dest,aijmkl,1));
312   aijmkl_dest->sparse_optimized = PETSC_FALSE;
313   if (aijmkl->eager_inspection) {
314     PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
315   }
316   PetscFunctionReturn(0);
317 }
318 
319 PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode)
320 {
321   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
322   Mat_SeqAIJMKL   *aijmkl;
323 
324   PetscFunctionBegin;
325   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
326 
327   /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some
328    * extra information and some different methods, call the AssemblyEnd
329    * routine for a MATSEQAIJ.
330    * I'm not sure if this is the best way to do this, but it avoids
331    * a lot of code duplication. */
332   a->inode.use = PETSC_FALSE;  /* Must disable: otherwise the MKL routines won't get used. */
333   PetscCall(MatAssemblyEnd_SeqAIJ(A, mode));
334 
335   /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks).
336    * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */
337   aijmkl = (Mat_SeqAIJMKL*)A->spptr;
338   if (aijmkl->eager_inspection) {
339     PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
340   }
341 
342   PetscFunctionReturn(0);
343 }
344 
345 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
346 PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy)
347 {
348   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
349   const PetscScalar *x;
350   PetscScalar       *y;
351   const MatScalar   *aa;
352   PetscInt          m = A->rmap->n;
353   PetscInt          n = A->cmap->n;
354   PetscScalar       alpha = 1.0;
355   PetscScalar       beta = 0.0;
356   const PetscInt    *aj,*ai;
357   char              matdescra[6];
358 
359   /* Variables not in MatMult_SeqAIJ. */
360   char transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
361 
362   PetscFunctionBegin;
363   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
364   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
365   PetscCall(VecGetArrayRead(xx,&x));
366   PetscCall(VecGetArray(yy,&y));
367   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
368   aa   = a->a;  /* Nonzero elements stored row-by-row. */
369   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
370 
371   /* Call MKL sparse BLAS routine to do the MatMult. */
372   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
373 
374   PetscCall(PetscLogFlops(2.0*a->nz - a->nonzerorowcnt));
375   PetscCall(VecRestoreArrayRead(xx,&x));
376   PetscCall(VecRestoreArray(yy,&y));
377   PetscFunctionReturn(0);
378 }
379 #endif
380 
381 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
382 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
383 {
384   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
385   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
386   const PetscScalar *x;
387   PetscScalar       *y;
388   PetscObjectState  state;
389 
390   PetscFunctionBegin;
391 
392   /* If there are no nonzero entries, zero yy and return immediately. */
393   if (!a->nz) {
394     PetscCall(VecGetArray(yy,&y));
395     PetscCall(PetscArrayzero(y,A->rmap->n));
396     PetscCall(VecRestoreArray(yy,&y));
397     PetscFunctionReturn(0);
398   }
399 
400   PetscCall(VecGetArrayRead(xx,&x));
401   PetscCall(VecGetArray(yy,&y));
402 
403   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
404    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
405    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
406   PetscCall(PetscObjectStateGet((PetscObject)A,&state));
407   if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
408 
409   /* Call MKL SpMV2 executor routine to do the MatMult. */
410   PetscStackCallStandard(mkl_sparse_x_mv,SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
411 
412   PetscCall(PetscLogFlops(2.0*a->nz - a->nonzerorowcnt));
413   PetscCall(VecRestoreArrayRead(xx,&x));
414   PetscCall(VecRestoreArray(yy,&y));
415   PetscFunctionReturn(0);
416 }
417 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
418 
419 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
420 PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy)
421 {
422   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
423   const PetscScalar *x;
424   PetscScalar       *y;
425   const MatScalar   *aa;
426   PetscInt          m = A->rmap->n;
427   PetscInt          n = A->cmap->n;
428   PetscScalar       alpha = 1.0;
429   PetscScalar       beta = 0.0;
430   const PetscInt    *aj,*ai;
431   char              matdescra[6];
432 
433   /* Variables not in MatMultTranspose_SeqAIJ. */
434   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
435 
436   PetscFunctionBegin;
437   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
438   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
439   PetscCall(VecGetArrayRead(xx,&x));
440   PetscCall(VecGetArray(yy,&y));
441   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
442   aa   = a->a;  /* Nonzero elements stored row-by-row. */
443   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
444 
445   /* Call MKL sparse BLAS routine to do the MatMult. */
446   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
447 
448   PetscCall(PetscLogFlops(2.0*a->nz - a->nonzerorowcnt));
449   PetscCall(VecRestoreArrayRead(xx,&x));
450   PetscCall(VecRestoreArray(yy,&y));
451   PetscFunctionReturn(0);
452 }
453 #endif
454 
455 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
456 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
457 {
458   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
459   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
460   const PetscScalar *x;
461   PetscScalar       *y;
462   PetscObjectState  state;
463 
464   PetscFunctionBegin;
465 
466   /* If there are no nonzero entries, zero yy and return immediately. */
467   if (!a->nz) {
468     PetscCall(VecGetArray(yy,&y));
469     PetscCall(PetscArrayzero(y,A->cmap->n));
470     PetscCall(VecRestoreArray(yy,&y));
471     PetscFunctionReturn(0);
472   }
473 
474   PetscCall(VecGetArrayRead(xx,&x));
475   PetscCall(VecGetArray(yy,&y));
476 
477   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
478    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
479    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
480   PetscCall(PetscObjectStateGet((PetscObject)A,&state));
481   if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
482 
483   /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
484   PetscStackCallStandard(mkl_sparse_x_mv,SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
485 
486   PetscCall(PetscLogFlops(2.0*a->nz - a->nonzerorowcnt));
487   PetscCall(VecRestoreArrayRead(xx,&x));
488   PetscCall(VecRestoreArray(yy,&y));
489   PetscFunctionReturn(0);
490 }
491 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
492 
493 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
494 PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
495 {
496   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
497   const PetscScalar *x;
498   PetscScalar       *y,*z;
499   const MatScalar   *aa;
500   PetscInt          m = A->rmap->n;
501   PetscInt          n = A->cmap->n;
502   const PetscInt    *aj,*ai;
503   PetscInt          i;
504 
505   /* Variables not in MatMultAdd_SeqAIJ. */
506   char              transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
507   PetscScalar       alpha = 1.0;
508   PetscScalar       beta;
509   char              matdescra[6];
510 
511   PetscFunctionBegin;
512   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
513   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
514 
515   PetscCall(VecGetArrayRead(xx,&x));
516   PetscCall(VecGetArrayPair(yy,zz,&y,&z));
517   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
518   aa   = a->a;  /* Nonzero elements stored row-by-row. */
519   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
520 
521   /* Call MKL sparse BLAS routine to do the MatMult. */
522   if (zz == yy) {
523     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
524     beta = 1.0;
525     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
526   } else {
527     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
528      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
529     beta = 0.0;
530     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
531     for (i=0; i<m; i++) {
532       z[i] += y[i];
533     }
534   }
535 
536   PetscCall(PetscLogFlops(2.0*a->nz));
537   PetscCall(VecRestoreArrayRead(xx,&x));
538   PetscCall(VecRestoreArrayPair(yy,zz,&y,&z));
539   PetscFunctionReturn(0);
540 }
541 #endif
542 
543 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
544 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
545 {
546   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
547   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
548   const PetscScalar *x;
549   PetscScalar       *y,*z;
550   PetscInt          m = A->rmap->n;
551   PetscInt          i;
552 
553   /* Variables not in MatMultAdd_SeqAIJ. */
554   PetscObjectState  state;
555 
556   PetscFunctionBegin;
557 
558   /* If there are no nonzero entries, set zz = yy and return immediately. */
559   if (!a->nz) {
560     PetscCall(VecGetArrayPair(yy,zz,&y,&z));
561     PetscCall(PetscArraycpy(z,y,m));
562     PetscCall(VecRestoreArrayPair(yy,zz,&y,&z));
563     PetscFunctionReturn(0);
564   }
565 
566   PetscCall(VecGetArrayRead(xx,&x));
567   PetscCall(VecGetArrayPair(yy,zz,&y,&z));
568 
569   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
570    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
571    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
572   PetscCall(PetscObjectStateGet((PetscObject)A,&state));
573   if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
574 
575   /* Call MKL sparse BLAS routine to do the MatMult. */
576   if (zz == yy) {
577     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
578      * with alpha and beta both set to 1.0. */
579     PetscStackCallStandard(mkl_sparse_x_mv,SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
580   } else {
581     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
582      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
583     PetscStackCallStandard(mkl_sparse_x_mv,SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
584     for (i=0; i<m; i++) z[i] += y[i];
585   }
586 
587   PetscCall(PetscLogFlops(2.0*a->nz));
588   PetscCall(VecRestoreArrayRead(xx,&x));
589   PetscCall(VecRestoreArrayPair(yy,zz,&y,&z));
590   PetscFunctionReturn(0);
591 }
592 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
593 
594 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
595 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
596 {
597   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
598   const PetscScalar *x;
599   PetscScalar       *y,*z;
600   const MatScalar   *aa;
601   PetscInt          m = A->rmap->n;
602   PetscInt          n = A->cmap->n;
603   const PetscInt    *aj,*ai;
604   PetscInt          i;
605 
606   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
607   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
608   PetscScalar       alpha = 1.0;
609   PetscScalar       beta;
610   char              matdescra[6];
611 
612   PetscFunctionBegin;
613   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
614   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
615 
616   PetscCall(VecGetArrayRead(xx,&x));
617   PetscCall(VecGetArrayPair(yy,zz,&y,&z));
618   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
619   aa   = a->a;  /* Nonzero elements stored row-by-row. */
620   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
621 
622   /* Call MKL sparse BLAS routine to do the MatMult. */
623   if (zz == yy) {
624     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
625     beta = 1.0;
626     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
627   } else {
628     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
629      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
630     beta = 0.0;
631     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
632     for (i=0; i<n; i++) {
633       z[i] += y[i];
634     }
635   }
636 
637   PetscCall(PetscLogFlops(2.0*a->nz));
638   PetscCall(VecRestoreArrayRead(xx,&x));
639   PetscCall(VecRestoreArrayPair(yy,zz,&y,&z));
640   PetscFunctionReturn(0);
641 }
642 #endif
643 
644 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
645 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
646 {
647   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
648   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
649   const PetscScalar *x;
650   PetscScalar       *y,*z;
651   PetscInt          n = A->cmap->n;
652   PetscInt          i;
653   PetscObjectState  state;
654 
655   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
656 
657   PetscFunctionBegin;
658 
659   /* If there are no nonzero entries, set zz = yy and return immediately. */
660   if (!a->nz) {
661     PetscCall(VecGetArrayPair(yy,zz,&y,&z));
662     PetscCall(PetscArraycpy(z,y,n));
663     PetscCall(VecRestoreArrayPair(yy,zz,&y,&z));
664     PetscFunctionReturn(0);
665   }
666 
667   PetscCall(VecGetArrayRead(xx,&x));
668   PetscCall(VecGetArrayPair(yy,zz,&y,&z));
669 
670   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
671    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
672    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
673   PetscCall(PetscObjectStateGet((PetscObject)A,&state));
674   if (!aijmkl->sparse_optimized || aijmkl->state != state) MatSeqAIJMKL_create_mkl_handle(A);
675 
676   /* Call MKL sparse BLAS routine to do the MatMult. */
677   if (zz == yy) {
678     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
679      * with alpha and beta both set to 1.0. */
680     PetscStackCallStandard(mkl_sparse_x_mv,SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
681   } else {
682     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
683      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
684     PetscStackCallStandard(mkl_sparse_x_mv,SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
685     for (i=0; i<n; i++) z[i] += y[i];
686   }
687 
688   PetscCall(PetscLogFlops(2.0*a->nz));
689   PetscCall(VecRestoreArrayRead(xx,&x));
690   PetscCall(VecRestoreArrayPair(yy,zz,&y,&z));
691   PetscFunctionReturn(0);
692 }
693 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
694 
695 /* -------------------------- MatProduct code -------------------------- */
696 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
697 static PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(Mat A,const sparse_operation_t transA,Mat B,const sparse_operation_t transB,Mat C)
698 {
699   Mat_SeqAIJMKL       *a = (Mat_SeqAIJMKL*)A->spptr,*b = (Mat_SeqAIJMKL*)B->spptr;
700   sparse_matrix_t     csrA,csrB,csrC;
701   PetscInt            nrows,ncols;
702   struct matrix_descr descr_type_gen;
703   PetscObjectState    state;
704 
705   PetscFunctionBegin;
706   /* Determine the number of rows and columns that the result matrix C will have. We have to do this ourselves because MKL does
707    * not handle sparse matrices with zero rows or columns. */
708   if (transA == SPARSE_OPERATION_NON_TRANSPOSE) nrows = A->rmap->N;
709   else nrows = A->cmap->N;
710   if (transB == SPARSE_OPERATION_NON_TRANSPOSE) ncols = B->cmap->N;
711   else ncols = B->rmap->N;
712 
713   PetscCall(PetscObjectStateGet((PetscObject)A,&state));
714   if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
715   PetscCall(PetscObjectStateGet((PetscObject)B,&state));
716   if (!b->sparse_optimized || b->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(B));
717   csrA = a->csrA;
718   csrB = b->csrA;
719   descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
720 
721   if (csrA && csrB) {
722     PetscStackCallStandard(mkl_sparse_sp2m,transA,descr_type_gen,csrA,transB,descr_type_gen,csrB,SPARSE_STAGE_FULL_MULT_NO_VAL,&csrC);
723   } else {
724     csrC = PETSC_NULL;
725   }
726 
727   PetscCall(MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF,csrC,nrows,ncols,C));
728 
729   PetscFunctionReturn(0);
730 }
731 
732 PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(Mat A,const sparse_operation_t transA,Mat B,const sparse_operation_t transB,Mat C)
733 {
734   Mat_SeqAIJMKL       *a = (Mat_SeqAIJMKL*)A->spptr,*b = (Mat_SeqAIJMKL*)B->spptr,*c = (Mat_SeqAIJMKL*)C->spptr;
735   sparse_matrix_t     csrA, csrB, csrC;
736   struct matrix_descr descr_type_gen;
737   PetscObjectState    state;
738 
739   PetscFunctionBegin;
740   PetscCall(PetscObjectStateGet((PetscObject)A,&state));
741   if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
742   PetscCall(PetscObjectStateGet((PetscObject)B,&state));
743   if (!b->sparse_optimized || b->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(B));
744   csrA = a->csrA;
745   csrB = b->csrA;
746   csrC = c->csrA;
747   descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
748 
749   if (csrA && csrB) {
750     PetscStackCallStandard(mkl_sparse_sp2m,transA,descr_type_gen,csrA,transB,descr_type_gen,csrB,SPARSE_STAGE_FINALIZE_MULT,&csrC);
751   } else {
752     csrC = PETSC_NULL;
753   }
754 
755   /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */
756   PetscCall(MatSeqAIJMKL_update_from_mkl_handle(C));
757 
758   PetscFunctionReturn(0);
759 }
760 
761 PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C)
762 {
763   PetscFunctionBegin;
764   PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C));
765   PetscFunctionReturn(0);
766 }
767 
768 PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C)
769 {
770   PetscFunctionBegin;
771   PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C));
772   PetscFunctionReturn(0);
773 }
774 
775 PetscErrorCode MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C)
776 {
777   PetscFunctionBegin;
778   PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C));
779   PetscFunctionReturn(0);
780 }
781 
782 PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C)
783 {
784   PetscFunctionBegin;
785   PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C));
786   PetscFunctionReturn(0);
787 }
788 
789 PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C)
790 {
791   PetscFunctionBegin;
792   PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_TRANSPOSE,C));
793   PetscFunctionReturn(0);
794 }
795 
796 PetscErrorCode MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C)
797 {
798   PetscFunctionBegin;
799   PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_TRANSPOSE,C));
800   PetscFunctionReturn(0);
801 }
802 
803 static PetscErrorCode MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL(Mat C)
804 {
805   Mat_Product    *product = C->product;
806   Mat            A = product->A,B = product->B;
807 
808   PetscFunctionBegin;
809   PetscCall(MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(A,B,C));
810   PetscFunctionReturn(0);
811 }
812 
813 static PetscErrorCode MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL(Mat C)
814 {
815   Mat_Product    *product = C->product;
816   Mat            A = product->A,B = product->B;
817   PetscReal      fill = product->fill;
818 
819   PetscFunctionBegin;
820   PetscCall(MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(A,B,fill,C));
821   C->ops->productnumeric = MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL;
822   PetscFunctionReturn(0);
823 }
824 
825 PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat A,Mat P,Mat C)
826 {
827   Mat                 Ct;
828   Vec                 zeros;
829   Mat_SeqAIJMKL       *a = (Mat_SeqAIJMKL*)A->spptr,*p = (Mat_SeqAIJMKL*)P->spptr,*c = (Mat_SeqAIJMKL*)C->spptr;
830   sparse_matrix_t     csrA, csrP, csrC;
831   PetscBool           set, flag;
832   struct matrix_descr descr_type_sym;
833   PetscObjectState    state;
834 
835   PetscFunctionBegin;
836   PetscCall(MatIsSymmetricKnown(A,&set,&flag));
837   PetscCheck(set && !(set && !flag),PETSC_COMM_SELF,PETSC_ERR_PLIB,"MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal() called on matrix A not marked as symmetric");
838 
839   PetscCall(PetscObjectStateGet((PetscObject)A,&state));
840   if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
841   PetscCall(PetscObjectStateGet((PetscObject)P,&state));
842   if (!p->sparse_optimized || p->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(P));
843   csrA = a->csrA;
844   csrP = p->csrA;
845   csrC = c->csrA;
846   descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
847   descr_type_sym.mode = SPARSE_FILL_MODE_UPPER;
848   descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
849 
850   /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
851   PetscStackCallStandard(mkl_sparse_sypr,SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FINALIZE_MULT);
852 
853   /* Update the PETSc AIJ representation for matrix C from contents of MKL handle.
854    * This is more complicated than it should be: it turns out that, though mkl_sparse_sypr() will accept a full AIJ/CSR matrix,
855    * the output matrix only contains the upper or lower triangle (we arbitrarily have chosen upper) of the symmetric matrix.
856    * We have to fill in the missing portion, which we currently do below by forming the transpose and performing at MatAXPY
857    * operation. This may kill any performance benefit of using the optimized mkl_sparse_sypr() routine. Performance might
858    * improve if we come up with a more efficient way to do this, or we can convince the MKL team to provide an option to output
859    * the full matrix. */
860   PetscCall(MatSeqAIJMKL_update_from_mkl_handle(C));
861   PetscCall(MatTranspose(C,MAT_INITIAL_MATRIX,&Ct));
862   PetscCall(MatCreateVecs(C,&zeros,NULL));
863   PetscCall(VecSetFromOptions(zeros));
864   PetscCall(VecZeroEntries(zeros));
865   PetscCall(MatDiagonalSet(Ct,zeros,INSERT_VALUES));
866   PetscCall(MatAXPY(C,1.0,Ct,DIFFERENT_NONZERO_PATTERN));
867   /* Note: The MatAXPY() call destroys the MatProduct, so we must recreate it. */
868   PetscCall(MatProductCreateWithMat(A,P,PETSC_NULL,C));
869   PetscCall(MatProductSetType(C,MATPRODUCT_PtAP));
870   PetscCall(MatSeqAIJMKL_create_mkl_handle(C));
871   PetscCall(VecDestroy(&zeros));
872   PetscCall(MatDestroy(&Ct));
873   PetscFunctionReturn(0);
874 }
875 
876 PetscErrorCode MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat C)
877 {
878   Mat_Product         *product = C->product;
879   Mat                 A = product->A,P = product->B;
880   Mat_SeqAIJMKL       *a = (Mat_SeqAIJMKL*)A->spptr,*p = (Mat_SeqAIJMKL*)P->spptr;
881   sparse_matrix_t     csrA,csrP,csrC;
882   struct matrix_descr descr_type_sym;
883   PetscObjectState    state;
884 
885   PetscFunctionBegin;
886   PetscCall(PetscObjectStateGet((PetscObject)A,&state));
887   if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
888   PetscCall(PetscObjectStateGet((PetscObject)P,&state));
889   if (!p->sparse_optimized || p->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(P));
890   csrA = a->csrA;
891   csrP = p->csrA;
892   descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
893   descr_type_sym.mode = SPARSE_FILL_MODE_UPPER;
894   descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
895 
896   /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
897   if (csrP && csrA) {
898     PetscStackCallStandard(mkl_sparse_sypr,SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FULL_MULT_NO_VAL);
899   } else {
900     csrC = PETSC_NULL;
901   }
902 
903   /* Update the I and J arrays of the PETSc AIJ representation for matrix C from contents of MKL handle.
904    * Note that, because mkl_sparse_sypr() only computes one triangle of the symmetric matrix, this representation will only contain
905    * the upper triangle of the symmetric matrix. We fix this in MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(). I believe that
906    * leaving things in this incomplete state is OK because the numeric product should follow soon after, but am not certain if this
907    * is guaranteed. */
908   PetscCall(MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF,csrC,P->cmap->N,P->cmap->N,C));
909 
910   C->ops->productnumeric = MatProductNumeric_PtAP;
911   PetscFunctionReturn(0);
912 }
913 
914 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AB(Mat C)
915 {
916   PetscFunctionBegin;
917   C->ops->productsymbolic = MatProductSymbolic_AB;
918   C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL;
919   PetscFunctionReturn(0);
920 }
921 
922 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AtB(Mat C)
923 {
924   PetscFunctionBegin;
925   C->ops->productsymbolic = MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL;
926   PetscFunctionReturn(0);
927 }
928 
929 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABt(Mat C)
930 {
931   PetscFunctionBegin;
932   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ;
933   C->ops->productsymbolic          = MatProductSymbolic_ABt;
934   PetscFunctionReturn(0);
935 }
936 
937 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_PtAP(Mat C)
938 {
939   Mat_Product    *product = C->product;
940   Mat            A = product->A;
941   PetscBool      set, flag;
942 
943   PetscFunctionBegin;
944   if (PetscDefined(USE_COMPLEX)) {
945     /* By setting C->ops->productsymbolic to NULL, we ensure that MatProductSymbolic_Unsafe() will be used.
946      * We do this in several other locations in this file. This works for the time being, but these
947      * routines are considered unsafe and may be removed from the MatProduct code in the future.
948      * TODO: Add proper MATSEQAIJMKL implementations */
949     C->ops->productsymbolic = NULL;
950   } else {
951     /* AIJMKL only has an optimized routine for PtAP when A is symmetric and real. */
952     PetscCall(MatIsSymmetricKnown(A,&set,&flag));
953     if (set && flag) C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal;
954     else C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
955     /* Note that we don't set C->ops->productnumeric here, as this must happen in MatProductSymbolic_PtAP_XXX(),
956      * depending on whether the algorithm for the general case vs. the real symmetric one is used. */
957   }
958   PetscFunctionReturn(0);
959 }
960 
961 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_RARt(Mat C)
962 {
963   PetscFunctionBegin;
964   C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
965   PetscFunctionReturn(0);
966 }
967 
968 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABC(Mat C)
969 {
970   PetscFunctionBegin;
971   C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
972   PetscFunctionReturn(0);
973 }
974 
975 PetscErrorCode MatProductSetFromOptions_SeqAIJMKL(Mat C)
976 {
977   Mat_Product    *product = C->product;
978 
979   PetscFunctionBegin;
980   switch (product->type) {
981   case MATPRODUCT_AB:
982     PetscCall(MatProductSetFromOptions_SeqAIJMKL_AB(C));
983     break;
984   case MATPRODUCT_AtB:
985     PetscCall(MatProductSetFromOptions_SeqAIJMKL_AtB(C));
986     break;
987   case MATPRODUCT_ABt:
988     PetscCall(MatProductSetFromOptions_SeqAIJMKL_ABt(C));
989     break;
990   case MATPRODUCT_PtAP:
991     PetscCall(MatProductSetFromOptions_SeqAIJMKL_PtAP(C));
992     break;
993   case MATPRODUCT_RARt:
994     PetscCall(MatProductSetFromOptions_SeqAIJMKL_RARt(C));
995     break;
996   case MATPRODUCT_ABC:
997     PetscCall(MatProductSetFromOptions_SeqAIJMKL_ABC(C));
998     break;
999   default:
1000     break;
1001   }
1002   PetscFunctionReturn(0);
1003 }
1004 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
1005 /* ------------------------ End MatProduct code ------------------------ */
1006 
1007 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a
1008  * SeqAIJMKL matrix.  This routine is called by the MatCreate_SeqAIJMKL()
1009  * routine, but can also be used to convert an assembled SeqAIJ matrix
1010  * into a SeqAIJMKL one. */
1011 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
1012 {
1013   Mat            B = *newmat;
1014   Mat_SeqAIJMKL  *aijmkl;
1015   PetscBool      set;
1016   PetscBool      sametype;
1017 
1018   PetscFunctionBegin;
1019   if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A,MAT_COPY_VALUES,&B));
1020 
1021   PetscCall(PetscObjectTypeCompare((PetscObject)A,type,&sametype));
1022   if (sametype) PetscFunctionReturn(0);
1023 
1024   PetscCall(PetscNewLog(B,&aijmkl));
1025   B->spptr = (void*) aijmkl;
1026 
1027   /* Set function pointers for methods that we inherit from AIJ but override.
1028    * We also parse some command line options below, since those determine some of the methods we point to. */
1029   B->ops->duplicate        = MatDuplicate_SeqAIJMKL;
1030   B->ops->assemblyend      = MatAssemblyEnd_SeqAIJMKL;
1031   B->ops->destroy          = MatDestroy_SeqAIJMKL;
1032 
1033   aijmkl->sparse_optimized = PETSC_FALSE;
1034 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1035   aijmkl->no_SpMV2 = PETSC_FALSE;  /* Default to using the SpMV2 routines if our MKL supports them. */
1036 #else
1037   aijmkl->no_SpMV2 = PETSC_TRUE;
1038 #endif
1039   aijmkl->eager_inspection = PETSC_FALSE;
1040 
1041   /* Parse command line options. */
1042   PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");
1043   PetscCall(PetscOptionsBool("-mat_aijmkl_no_spmv2","Disable use of inspector-executor (SpMV 2) routines","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set));
1044   PetscCall(PetscOptionsBool("-mat_aijmkl_eager_inspection","Run inspection at matrix assembly time, instead of waiting until needed by an operation","None",(PetscBool)aijmkl->eager_inspection,(PetscBool*)&aijmkl->eager_inspection,&set));
1045   PetscOptionsEnd();
1046 #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1047   if (!aijmkl->no_SpMV2) {
1048     PetscCall(PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize();  defaulting to non-SpMV2 routines.\n"));
1049     aijmkl->no_SpMV2 = PETSC_TRUE;
1050   }
1051 #endif
1052 
1053 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1054   B->ops->mult                    = MatMult_SeqAIJMKL_SpMV2;
1055   B->ops->multtranspose           = MatMultTranspose_SeqAIJMKL_SpMV2;
1056   B->ops->multadd                 = MatMultAdd_SeqAIJMKL_SpMV2;
1057   B->ops->multtransposeadd        = MatMultTransposeAdd_SeqAIJMKL_SpMV2;
1058 # if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
1059   B->ops->productsetfromoptions   = MatProductSetFromOptions_SeqAIJMKL;
1060   B->ops->matmultsymbolic         = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL;
1061   B->ops->matmultnumeric          = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL;
1062   B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL;
1063   B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL;
1064 #   if !defined(PETSC_USE_COMPLEX)
1065   B->ops->ptapnumeric             = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal;
1066 #   else
1067   B->ops->ptapnumeric             = NULL;
1068 #   endif
1069 # endif
1070 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
1071 
1072 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
1073   /* In MKL version 18, update 2, the old sparse BLAS interfaces were marked as deprecated. If "no_SpMV2" has been specified by the
1074    * user and the old SpBLAS interfaces are deprecated in our MKL version, we use the new _SpMV2 routines (set above), but do not
1075    * call mkl_sparse_optimize(), which results in the old numerical kernels (without the inspector-executor model) being used. For
1076    * versions in which the older interface has not been deprecated, we use the old interface. */
1077   if (aijmkl->no_SpMV2) {
1078     B->ops->mult             = MatMult_SeqAIJMKL;
1079     B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL;
1080     B->ops->multadd          = MatMultAdd_SeqAIJMKL;
1081     B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL;
1082   }
1083 #endif
1084 
1085   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ));
1086 
1087   PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL));
1088   *newmat = B;
1089   PetscFunctionReturn(0);
1090 }
1091 
1092 /*@C
1093    MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL.
1094    This type inherits from AIJ and is largely identical, but uses sparse BLAS
1095    routines from Intel MKL whenever possible.
1096    If the installed version of MKL supports the "SpMV2" sparse
1097    inspector-executor routines, then those are used by default.
1098    MatMult, MatMultAdd, MatMultTranspose, MatMultTransposeAdd, MatMatMult, MatTransposeMatMult, and MatPtAP (for
1099    symmetric A) operations are currently supported.
1100    Note that MKL version 18, update 2 or later is required for MatPtAP/MatPtAPNumeric and MatMatMultNumeric.
1101 
1102    Collective
1103 
1104    Input Parameters:
1105 +  comm - MPI communicator, set to PETSC_COMM_SELF
1106 .  m - number of rows
1107 .  n - number of columns
1108 .  nz - number of nonzeros per row (same for all rows)
1109 -  nnz - array containing the number of nonzeros in the various rows
1110          (possibly different for each row) or NULL
1111 
1112    Output Parameter:
1113 .  A - the matrix
1114 
1115    Options Database Keys:
1116 +  -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines
1117 -  -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, performing this step the first time the matrix is applied
1118 
1119    Notes:
1120    If nnz is given then nz is ignored
1121 
1122    Level: intermediate
1123 
1124 .seealso: `MatCreate()`, `MatCreateMPIAIJMKL()`, `MatSetValues()`
1125 @*/
1126 PetscErrorCode  MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
1127 {
1128   PetscFunctionBegin;
1129   PetscCall(MatCreate(comm,A));
1130   PetscCall(MatSetSizes(*A,m,n,m,n));
1131   PetscCall(MatSetType(*A,MATSEQAIJMKL));
1132   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz));
1133   PetscFunctionReturn(0);
1134 }
1135 
1136 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A)
1137 {
1138   PetscFunctionBegin;
1139   PetscCall(MatSetType(A,MATSEQAIJ));
1140   PetscCall(MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A));
1141   PetscFunctionReturn(0);
1142 }
1143