xref: /petsc/src/mat/impls/aij/seq/aijmkl/aijmkl.c (revision 984ed092ecc7313e63e6aa733432dfb60d929fa3)
1 
2 /*
3   Defines basic operations for the MATSEQAIJMKL matrix class.
4   This class is derived from the MATSEQAIJ class and retains the
5   compressed row storage (aka Yale sparse matrix format) but uses
6   sparse BLAS operations from the Intel Math Kernel Library (MKL)
7   wherever possible.
8 */
9 
10 #include <../src/mat/impls/aij/seq/aij.h>
11 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h>
12 #include <mkl_spblas.h>
13 
14 typedef struct {
15   PetscBool           no_SpMV2;  /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */
16   PetscBool           eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */
17   PetscBool           sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
18   PetscObjectState    state;
19 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
20   sparse_matrix_t     csrA; /* "Handle" used by SpMV2 inspector-executor routines. */
21   struct matrix_descr descr;
22 #endif
23 } Mat_SeqAIJMKL;
24 
25 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType);
26 
27 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat)
28 {
29   /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */
30   /* so we will ignore 'MatType type'. */
31   Mat            B       = *newmat;
32 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
33   Mat_SeqAIJMKL  *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
34 #endif
35 
36   PetscFunctionBegin;
37   if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A,MAT_COPY_VALUES,&B));
38 
39   /* Reset the original function pointers. */
40   B->ops->duplicate               = MatDuplicate_SeqAIJ;
41   B->ops->assemblyend             = MatAssemblyEnd_SeqAIJ;
42   B->ops->destroy                 = MatDestroy_SeqAIJ;
43   B->ops->mult                    = MatMult_SeqAIJ;
44   B->ops->multtranspose           = MatMultTranspose_SeqAIJ;
45   B->ops->multadd                 = MatMultAdd_SeqAIJ;
46   B->ops->multtransposeadd        = MatMultTransposeAdd_SeqAIJ;
47   B->ops->productsetfromoptions   = MatProductSetFromOptions_SeqAIJ;
48   B->ops->matmultsymbolic         = MatMatMultSymbolic_SeqAIJ_SeqAIJ;
49   B->ops->matmultnumeric          = MatMatMultNumeric_SeqAIJ_SeqAIJ;
50   B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ;
51   B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ;
52   B->ops->ptapnumeric             = MatPtAPNumeric_SeqAIJ_SeqAIJ;
53 
54   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL));
55 
56 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
57   /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this
58    * simply involves destroying the MKL sparse matrix handle and then freeing
59    * the spptr pointer. */
60   if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr;
61 
62   if (aijmkl->sparse_optimized) PetscStackCallStandard(mkl_sparse_destroy,aijmkl->csrA);
63 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
64   PetscCall(PetscFree(B->spptr));
65 
66   /* Change the type of B to MATSEQAIJ. */
67   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ));
68 
69   *newmat = B;
70   PetscFunctionReturn(0);
71 }
72 
73 PetscErrorCode MatDestroy_SeqAIJMKL(Mat A)
74 {
75   Mat_SeqAIJMKL  *aijmkl = (Mat_SeqAIJMKL*) A->spptr;
76 
77   PetscFunctionBegin;
78 
79   /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an spptr pointer. */
80   if (aijmkl) {
81     /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */
82 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
83     if (aijmkl->sparse_optimized) PetscStackCallStandard(mkl_sparse_destroy,aijmkl->csrA);
84 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
85     PetscCall(PetscFree(A->spptr));
86   }
87 
88   /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
89    * to destroy everything that remains. */
90   PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ));
91   /* Note that I don't call MatSetType().  I believe this is because that
92    * is only to be called when *building* a matrix.  I could be wrong, but
93    * that is how things work for the SuperLU matrix class. */
94   PetscCall(MatDestroy_SeqAIJ(A));
95   PetscFunctionReturn(0);
96 }
97 
98 /* MatSeqAIJMKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it,
99  * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize().
100  * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix
101  * handle, creates a new one, and then calls mkl_sparse_optimize().
102  * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been
103  * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of
104  * an unoptimized matrix handle here. */
105 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A)
106 {
107 #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
108   /* If the MKL library does not have mkl_sparse_optimize(), then this routine
109    * does nothing. We make it callable anyway in this case because it cuts
110    * down on littering the code with #ifdefs. */
111   PetscFunctionBegin;
112   PetscFunctionReturn(0);
113 #else
114   Mat_SeqAIJ       *a = (Mat_SeqAIJ*)A->data;
115   Mat_SeqAIJMKL    *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
116   PetscInt         m,n;
117   MatScalar        *aa;
118   PetscInt         *aj,*ai;
119 
120   PetscFunctionBegin;
121 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
122   /* For MKL versions that still support the old, non-inspector-executor interfaces versions, we simply exit here if the no_SpMV2
123    * option has been specified. For versions that have deprecated the old interfaces (version 18, update 2 and later), we must
124    * use the new inspector-executor interfaces, but we can still use the old, non-inspector-executor code by not calling
125    * mkl_sparse_optimize() later. */
126   if (aijmkl->no_SpMV2) PetscFunctionReturn(0);
127 #endif
128 
129   if (aijmkl->sparse_optimized) {
130     /* Matrix has been previously assembled and optimized. Must destroy old
131      * matrix handle before running the optimization step again. */
132     PetscStackCallStandard(mkl_sparse_destroy,aijmkl->csrA);
133   }
134   aijmkl->sparse_optimized = PETSC_FALSE;
135 
136   /* Now perform the SpMV2 setup and matrix optimization. */
137   aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
138   aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
139   aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
140   m = A->rmap->n;
141   n = A->cmap->n;
142   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
143   aa   = a->a;  /* Nonzero elements stored row-by-row. */
144   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
145   if (a->nz && aa && !A->structure_only) {
146     /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
147      * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
148     PetscStackCallStandard(mkl_sparse_x_create_csr,&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa);
149     PetscStackCallStandard(mkl_sparse_set_mv_hint,aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
150     PetscStackCallStandard(mkl_sparse_set_memory_hint,aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
151     if (!aijmkl->no_SpMV2) {
152       PetscStackCallStandard(mkl_sparse_optimize,aijmkl->csrA);
153     }
154     aijmkl->sparse_optimized = PETSC_TRUE;
155     PetscCall(PetscObjectStateGet((PetscObject)A,&(aijmkl->state)));
156   } else {
157     aijmkl->csrA = PETSC_NULL;
158   }
159 
160   PetscFunctionReturn(0);
161 #endif
162 }
163 
164 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
165 /* Take an already created but empty matrix and set up the nonzero structure from an MKL sparse matrix handle. */
166 static PetscErrorCode MatSeqAIJMKL_setup_structure_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,PetscInt nrows,PetscInt ncols,Mat A)
167 {
168   sparse_index_base_t indexing;
169   PetscInt            m,n;
170   PetscInt            *aj,*ai,*dummy;
171   MatScalar           *aa;
172   Mat_SeqAIJMKL       *aijmkl;
173 
174   PetscFunctionBegin;
175   if (csrA) {
176     /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
177     PetscStackCallStandard(mkl_sparse_x_export_csr,csrA,&indexing,&m,&n,&ai,&dummy,&aj,&aa);
178     PetscCheck((m == nrows) && (n == ncols),PETSC_COMM_SELF,PETSC_ERR_LIB,"Number of rows/columns does not match those from mkl_sparse_x_export_csr()");
179   } else {
180     aj = ai = PETSC_NULL;
181     aa = PETSC_NULL;
182   }
183 
184   PetscCall(MatSetType(A,MATSEQAIJ));
185   PetscCall(MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,nrows,ncols));
186   /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported
187    * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and
188    * they will be destroyed when the MKL handle is destroyed.
189    * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */
190   if (csrA) {
191     PetscCall(MatSeqAIJSetPreallocationCSR(A,ai,aj,NULL));
192   } else {
193     /* Since MatSeqAIJSetPreallocationCSR does initial set up and assembly begin/end, we must do that ourselves here. */
194     PetscCall(MatSetUp(A));
195     PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY));
196     PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY));
197   }
198 
199   /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle.
200    * Now turn it into a MATSEQAIJMKL. */
201   PetscCall(MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A));
202 
203   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
204   aijmkl->csrA = csrA;
205 
206   /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but
207    * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one,
208    * and just need to be able to run the MKL optimization step. */
209   aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
210   aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
211   aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
212   if (csrA) {
213     PetscStackCallStandard(mkl_sparse_set_mv_hint,aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
214     PetscStackCallStandard(mkl_sparse_set_memory_hint,aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
215   }
216   PetscCall(PetscObjectStateGet((PetscObject)A,&(aijmkl->state)));
217   PetscFunctionReturn(0);
218 }
219 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
220 
221 /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle.
222  * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in
223  * MatMatMultNumeric(). */
224 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
225 static PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A)
226 {
227   PetscInt            i;
228   PetscInt            nrows,ncols;
229   PetscInt            nz;
230   PetscInt            *ai,*aj,*dummy;
231   PetscScalar         *aa;
232   Mat_SeqAIJMKL       *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
233   sparse_index_base_t indexing;
234 
235   PetscFunctionBegin;
236   /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */
237   if (!aijmkl->csrA) PetscFunctionReturn(0);
238 
239   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
240   PetscStackCallStandard(mkl_sparse_x_export_csr,aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);
241 
242   /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc
243    * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */
244   for (i=0; i<nrows; i++) {
245     nz = ai[i+1] - ai[i];
246     PetscCall(MatSetValues_SeqAIJ(A, 1, &i, nz, aj+ai[i], aa+ai[i], INSERT_VALUES));
247   }
248 
249   PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY));
250   PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY));
251 
252   PetscCall(PetscObjectStateGet((PetscObject)A,&(aijmkl->state)));
253   /* At this point our matrix has a valid MKL handle, the contents of which match the PETSc AIJ representation.
254    * The MKL handle has *not* had mkl_sparse_optimize() called on it, though -- the MKL developers have confirmed
255    * that the matrix inspection/optimization step is not performed when matrix-matrix multiplication is finalized. */
256   aijmkl->sparse_optimized = PETSC_FALSE;
257   PetscFunctionReturn(0);
258 }
259 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
260 
261 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
262 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_view_mkl_handle(Mat A,PetscViewer viewer)
263 {
264   PetscInt            i,j,k;
265   PetscInt            nrows,ncols;
266   PetscInt            nz;
267   PetscInt            *ai,*aj,*dummy;
268   PetscScalar         *aa;
269   Mat_SeqAIJMKL       *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
270   sparse_index_base_t indexing;
271 
272   PetscFunctionBegin;
273   PetscCall(PetscViewerASCIIPrintf(viewer,"Contents of MKL sparse matrix handle for MATSEQAIJMKL object:\n"));
274 
275   /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */
276   if (!aijmkl->csrA) {
277     PetscCall(PetscViewerASCIIPrintf(viewer,"MKL matrix handle is NULL\n"));
278     PetscFunctionReturn(0);
279   }
280 
281   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
282   PetscStackCallStandard(mkl_sparse_x_export_csr,aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);
283 
284   k = 0;
285   for (i=0; i<nrows; i++) {
286     PetscCall(PetscViewerASCIIPrintf(viewer,"row %" PetscInt_FMT ": ",i));
287     nz = ai[i+1] - ai[i];
288     for (j=0; j<nz; j++) {
289       if (aa) {
290         PetscCall(PetscViewerASCIIPrintf(viewer,"(%" PetscInt_FMT ", %g)  ",aj[k],PetscRealPart(aa[k])));
291       } else {
292         PetscCall(PetscViewerASCIIPrintf(viewer,"(%" PetscInt_FMT ", NULL)",aj[k]));
293       }
294       k++;
295     }
296     PetscCall(PetscViewerASCIIPrintf(viewer,"\n"));
297   }
298   PetscFunctionReturn(0);
299 }
300 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
301 
302 PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
303 {
304   Mat_SeqAIJMKL  *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
305   Mat_SeqAIJMKL  *aijmkl_dest;
306 
307   PetscFunctionBegin;
308   PetscCall(MatDuplicate_SeqAIJ(A,op,M));
309   aijmkl_dest = (Mat_SeqAIJMKL*)(*M)->spptr;
310   PetscCall(PetscArraycpy(aijmkl_dest,aijmkl,1));
311   aijmkl_dest->sparse_optimized = PETSC_FALSE;
312   if (aijmkl->eager_inspection) {
313     PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
314   }
315   PetscFunctionReturn(0);
316 }
317 
318 PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode)
319 {
320   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
321   Mat_SeqAIJMKL   *aijmkl;
322 
323   PetscFunctionBegin;
324   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
325 
326   /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some
327    * extra information and some different methods, call the AssemblyEnd
328    * routine for a MATSEQAIJ.
329    * I'm not sure if this is the best way to do this, but it avoids
330    * a lot of code duplication. */
331   a->inode.use = PETSC_FALSE;  /* Must disable: otherwise the MKL routines won't get used. */
332   PetscCall(MatAssemblyEnd_SeqAIJ(A, mode));
333 
334   /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks).
335    * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */
336   aijmkl = (Mat_SeqAIJMKL*)A->spptr;
337   if (aijmkl->eager_inspection) {
338     PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
339   }
340 
341   PetscFunctionReturn(0);
342 }
343 
344 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
345 PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy)
346 {
347   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
348   const PetscScalar *x;
349   PetscScalar       *y;
350   const MatScalar   *aa;
351   PetscInt          m = A->rmap->n;
352   PetscInt          n = A->cmap->n;
353   PetscScalar       alpha = 1.0;
354   PetscScalar       beta = 0.0;
355   const PetscInt    *aj,*ai;
356   char              matdescra[6];
357 
358   /* Variables not in MatMult_SeqAIJ. */
359   char transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
360 
361   PetscFunctionBegin;
362   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
363   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
364   PetscCall(VecGetArrayRead(xx,&x));
365   PetscCall(VecGetArray(yy,&y));
366   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
367   aa   = a->a;  /* Nonzero elements stored row-by-row. */
368   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
369 
370   /* Call MKL sparse BLAS routine to do the MatMult. */
371   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
372 
373   PetscCall(PetscLogFlops(2.0*a->nz - a->nonzerorowcnt));
374   PetscCall(VecRestoreArrayRead(xx,&x));
375   PetscCall(VecRestoreArray(yy,&y));
376   PetscFunctionReturn(0);
377 }
378 #endif
379 
380 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
381 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
382 {
383   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
384   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
385   const PetscScalar *x;
386   PetscScalar       *y;
387   PetscObjectState  state;
388 
389   PetscFunctionBegin;
390 
391   /* If there are no nonzero entries, zero yy and return immediately. */
392   if (!a->nz) {
393     PetscCall(VecGetArray(yy,&y));
394     PetscCall(PetscArrayzero(y,A->rmap->n));
395     PetscCall(VecRestoreArray(yy,&y));
396     PetscFunctionReturn(0);
397   }
398 
399   PetscCall(VecGetArrayRead(xx,&x));
400   PetscCall(VecGetArray(yy,&y));
401 
402   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
403    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
404    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
405   PetscCall(PetscObjectStateGet((PetscObject)A,&state));
406   if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
407 
408   /* Call MKL SpMV2 executor routine to do the MatMult. */
409   PetscStackCallStandard(mkl_sparse_x_mv,SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
410 
411   PetscCall(PetscLogFlops(2.0*a->nz - a->nonzerorowcnt));
412   PetscCall(VecRestoreArrayRead(xx,&x));
413   PetscCall(VecRestoreArray(yy,&y));
414   PetscFunctionReturn(0);
415 }
416 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
417 
418 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
419 PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy)
420 {
421   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
422   const PetscScalar *x;
423   PetscScalar       *y;
424   const MatScalar   *aa;
425   PetscInt          m = A->rmap->n;
426   PetscInt          n = A->cmap->n;
427   PetscScalar       alpha = 1.0;
428   PetscScalar       beta = 0.0;
429   const PetscInt    *aj,*ai;
430   char              matdescra[6];
431 
432   /* Variables not in MatMultTranspose_SeqAIJ. */
433   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
434 
435   PetscFunctionBegin;
436   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
437   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
438   PetscCall(VecGetArrayRead(xx,&x));
439   PetscCall(VecGetArray(yy,&y));
440   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
441   aa   = a->a;  /* Nonzero elements stored row-by-row. */
442   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
443 
444   /* Call MKL sparse BLAS routine to do the MatMult. */
445   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
446 
447   PetscCall(PetscLogFlops(2.0*a->nz - a->nonzerorowcnt));
448   PetscCall(VecRestoreArrayRead(xx,&x));
449   PetscCall(VecRestoreArray(yy,&y));
450   PetscFunctionReturn(0);
451 }
452 #endif
453 
454 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
455 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
456 {
457   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
458   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
459   const PetscScalar *x;
460   PetscScalar       *y;
461   PetscObjectState  state;
462 
463   PetscFunctionBegin;
464 
465   /* If there are no nonzero entries, zero yy and return immediately. */
466   if (!a->nz) {
467     PetscCall(VecGetArray(yy,&y));
468     PetscCall(PetscArrayzero(y,A->cmap->n));
469     PetscCall(VecRestoreArray(yy,&y));
470     PetscFunctionReturn(0);
471   }
472 
473   PetscCall(VecGetArrayRead(xx,&x));
474   PetscCall(VecGetArray(yy,&y));
475 
476   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
477    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
478    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
479   PetscCall(PetscObjectStateGet((PetscObject)A,&state));
480   if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
481 
482   /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
483   PetscStackCallStandard(mkl_sparse_x_mv,SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
484 
485   PetscCall(PetscLogFlops(2.0*a->nz - a->nonzerorowcnt));
486   PetscCall(VecRestoreArrayRead(xx,&x));
487   PetscCall(VecRestoreArray(yy,&y));
488   PetscFunctionReturn(0);
489 }
490 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
491 
492 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
493 PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
494 {
495   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
496   const PetscScalar *x;
497   PetscScalar       *y,*z;
498   const MatScalar   *aa;
499   PetscInt          m = A->rmap->n;
500   PetscInt          n = A->cmap->n;
501   const PetscInt    *aj,*ai;
502   PetscInt          i;
503 
504   /* Variables not in MatMultAdd_SeqAIJ. */
505   char              transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
506   PetscScalar       alpha = 1.0;
507   PetscScalar       beta;
508   char              matdescra[6];
509 
510   PetscFunctionBegin;
511   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
512   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
513 
514   PetscCall(VecGetArrayRead(xx,&x));
515   PetscCall(VecGetArrayPair(yy,zz,&y,&z));
516   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
517   aa   = a->a;  /* Nonzero elements stored row-by-row. */
518   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
519 
520   /* Call MKL sparse BLAS routine to do the MatMult. */
521   if (zz == yy) {
522     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
523     beta = 1.0;
524     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
525   } else {
526     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
527      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
528     beta = 0.0;
529     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
530     for (i=0; i<m; i++) {
531       z[i] += y[i];
532     }
533   }
534 
535   PetscCall(PetscLogFlops(2.0*a->nz));
536   PetscCall(VecRestoreArrayRead(xx,&x));
537   PetscCall(VecRestoreArrayPair(yy,zz,&y,&z));
538   PetscFunctionReturn(0);
539 }
540 #endif
541 
542 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
543 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
544 {
545   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
546   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
547   const PetscScalar *x;
548   PetscScalar       *y,*z;
549   PetscInt          m = A->rmap->n;
550   PetscInt          i;
551 
552   /* Variables not in MatMultAdd_SeqAIJ. */
553   PetscObjectState  state;
554 
555   PetscFunctionBegin;
556 
557   /* If there are no nonzero entries, set zz = yy and return immediately. */
558   if (!a->nz) {
559     PetscCall(VecGetArrayPair(yy,zz,&y,&z));
560     PetscCall(PetscArraycpy(z,y,m));
561     PetscCall(VecRestoreArrayPair(yy,zz,&y,&z));
562     PetscFunctionReturn(0);
563   }
564 
565   PetscCall(VecGetArrayRead(xx,&x));
566   PetscCall(VecGetArrayPair(yy,zz,&y,&z));
567 
568   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
569    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
570    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
571   PetscCall(PetscObjectStateGet((PetscObject)A,&state));
572   if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
573 
574   /* Call MKL sparse BLAS routine to do the MatMult. */
575   if (zz == yy) {
576     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
577      * with alpha and beta both set to 1.0. */
578     PetscStackCallStandard(mkl_sparse_x_mv,SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
579   } else {
580     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
581      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
582     PetscStackCallStandard(mkl_sparse_x_mv,SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
583     for (i=0; i<m; i++) z[i] += y[i];
584   }
585 
586   PetscCall(PetscLogFlops(2.0*a->nz));
587   PetscCall(VecRestoreArrayRead(xx,&x));
588   PetscCall(VecRestoreArrayPair(yy,zz,&y,&z));
589   PetscFunctionReturn(0);
590 }
591 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
592 
593 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
594 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
595 {
596   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
597   const PetscScalar *x;
598   PetscScalar       *y,*z;
599   const MatScalar   *aa;
600   PetscInt          m = A->rmap->n;
601   PetscInt          n = A->cmap->n;
602   const PetscInt    *aj,*ai;
603   PetscInt          i;
604 
605   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
606   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
607   PetscScalar       alpha = 1.0;
608   PetscScalar       beta;
609   char              matdescra[6];
610 
611   PetscFunctionBegin;
612   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
613   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
614 
615   PetscCall(VecGetArrayRead(xx,&x));
616   PetscCall(VecGetArrayPair(yy,zz,&y,&z));
617   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
618   aa   = a->a;  /* Nonzero elements stored row-by-row. */
619   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
620 
621   /* Call MKL sparse BLAS routine to do the MatMult. */
622   if (zz == yy) {
623     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
624     beta = 1.0;
625     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
626   } else {
627     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
628      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
629     beta = 0.0;
630     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
631     for (i=0; i<n; i++) {
632       z[i] += y[i];
633     }
634   }
635 
636   PetscCall(PetscLogFlops(2.0*a->nz));
637   PetscCall(VecRestoreArrayRead(xx,&x));
638   PetscCall(VecRestoreArrayPair(yy,zz,&y,&z));
639   PetscFunctionReturn(0);
640 }
641 #endif
642 
643 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
644 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
645 {
646   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
647   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
648   const PetscScalar *x;
649   PetscScalar       *y,*z;
650   PetscInt          n = A->cmap->n;
651   PetscInt          i;
652   PetscObjectState  state;
653 
654   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
655 
656   PetscFunctionBegin;
657 
658   /* If there are no nonzero entries, set zz = yy and return immediately. */
659   if (!a->nz) {
660     PetscCall(VecGetArrayPair(yy,zz,&y,&z));
661     PetscCall(PetscArraycpy(z,y,n));
662     PetscCall(VecRestoreArrayPair(yy,zz,&y,&z));
663     PetscFunctionReturn(0);
664   }
665 
666   PetscCall(VecGetArrayRead(xx,&x));
667   PetscCall(VecGetArrayPair(yy,zz,&y,&z));
668 
669   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
670    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
671    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
672   PetscCall(PetscObjectStateGet((PetscObject)A,&state));
673   if (!aijmkl->sparse_optimized || aijmkl->state != state) MatSeqAIJMKL_create_mkl_handle(A);
674 
675   /* Call MKL sparse BLAS routine to do the MatMult. */
676   if (zz == yy) {
677     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
678      * with alpha and beta both set to 1.0. */
679     PetscStackCallStandard(mkl_sparse_x_mv,SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
680   } else {
681     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
682      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
683     PetscStackCallStandard(mkl_sparse_x_mv,SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
684     for (i=0; i<n; i++) z[i] += y[i];
685   }
686 
687   PetscCall(PetscLogFlops(2.0*a->nz));
688   PetscCall(VecRestoreArrayRead(xx,&x));
689   PetscCall(VecRestoreArrayPair(yy,zz,&y,&z));
690   PetscFunctionReturn(0);
691 }
692 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
693 
694 /* -------------------------- MatProduct code -------------------------- */
695 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
696 static PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(Mat A,const sparse_operation_t transA,Mat B,const sparse_operation_t transB,Mat C)
697 {
698   Mat_SeqAIJMKL       *a = (Mat_SeqAIJMKL*)A->spptr,*b = (Mat_SeqAIJMKL*)B->spptr;
699   sparse_matrix_t     csrA,csrB,csrC;
700   PetscInt            nrows,ncols;
701   struct matrix_descr descr_type_gen;
702   PetscObjectState    state;
703 
704   PetscFunctionBegin;
705   /* Determine the number of rows and columns that the result matrix C will have. We have to do this ourselves because MKL does
706    * not handle sparse matrices with zero rows or columns. */
707   if (transA == SPARSE_OPERATION_NON_TRANSPOSE) nrows = A->rmap->N;
708   else nrows = A->cmap->N;
709   if (transB == SPARSE_OPERATION_NON_TRANSPOSE) ncols = B->cmap->N;
710   else ncols = B->rmap->N;
711 
712   PetscCall(PetscObjectStateGet((PetscObject)A,&state));
713   if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
714   PetscCall(PetscObjectStateGet((PetscObject)B,&state));
715   if (!b->sparse_optimized || b->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(B));
716   csrA = a->csrA;
717   csrB = b->csrA;
718   descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
719 
720   if (csrA && csrB) {
721     PetscStackCallStandard(mkl_sparse_sp2m,transA,descr_type_gen,csrA,transB,descr_type_gen,csrB,SPARSE_STAGE_FULL_MULT_NO_VAL,&csrC);
722   } else {
723     csrC = PETSC_NULL;
724   }
725 
726   PetscCall(MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF,csrC,nrows,ncols,C));
727 
728   PetscFunctionReturn(0);
729 }
730 
731 PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(Mat A,const sparse_operation_t transA,Mat B,const sparse_operation_t transB,Mat C)
732 {
733   Mat_SeqAIJMKL       *a = (Mat_SeqAIJMKL*)A->spptr,*b = (Mat_SeqAIJMKL*)B->spptr,*c = (Mat_SeqAIJMKL*)C->spptr;
734   sparse_matrix_t     csrA, csrB, csrC;
735   struct matrix_descr descr_type_gen;
736   PetscObjectState    state;
737 
738   PetscFunctionBegin;
739   PetscCall(PetscObjectStateGet((PetscObject)A,&state));
740   if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
741   PetscCall(PetscObjectStateGet((PetscObject)B,&state));
742   if (!b->sparse_optimized || b->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(B));
743   csrA = a->csrA;
744   csrB = b->csrA;
745   csrC = c->csrA;
746   descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
747 
748   if (csrA && csrB) {
749     PetscStackCallStandard(mkl_sparse_sp2m,transA,descr_type_gen,csrA,transB,descr_type_gen,csrB,SPARSE_STAGE_FINALIZE_MULT,&csrC);
750   } else {
751     csrC = PETSC_NULL;
752   }
753 
754   /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */
755   PetscCall(MatSeqAIJMKL_update_from_mkl_handle(C));
756 
757   PetscFunctionReturn(0);
758 }
759 
760 PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C)
761 {
762   PetscFunctionBegin;
763   PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C));
764   PetscFunctionReturn(0);
765 }
766 
767 PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C)
768 {
769   PetscFunctionBegin;
770   PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C));
771   PetscFunctionReturn(0);
772 }
773 
774 PetscErrorCode MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C)
775 {
776   PetscFunctionBegin;
777   PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C));
778   PetscFunctionReturn(0);
779 }
780 
781 PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C)
782 {
783   PetscFunctionBegin;
784   PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C));
785   PetscFunctionReturn(0);
786 }
787 
788 PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C)
789 {
790   PetscFunctionBegin;
791   PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_TRANSPOSE,C));
792   PetscFunctionReturn(0);
793 }
794 
795 PetscErrorCode MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C)
796 {
797   PetscFunctionBegin;
798   PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_TRANSPOSE,C));
799   PetscFunctionReturn(0);
800 }
801 
802 static PetscErrorCode MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL(Mat C)
803 {
804   Mat_Product    *product = C->product;
805   Mat            A = product->A,B = product->B;
806 
807   PetscFunctionBegin;
808   PetscCall(MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(A,B,C));
809   PetscFunctionReturn(0);
810 }
811 
812 static PetscErrorCode MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL(Mat C)
813 {
814   Mat_Product    *product = C->product;
815   Mat            A = product->A,B = product->B;
816   PetscReal      fill = product->fill;
817 
818   PetscFunctionBegin;
819   PetscCall(MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(A,B,fill,C));
820   C->ops->productnumeric = MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL;
821   PetscFunctionReturn(0);
822 }
823 
824 PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat A,Mat P,Mat C)
825 {
826   Mat                 Ct;
827   Vec                 zeros;
828   Mat_SeqAIJMKL       *a = (Mat_SeqAIJMKL*)A->spptr,*p = (Mat_SeqAIJMKL*)P->spptr,*c = (Mat_SeqAIJMKL*)C->spptr;
829   sparse_matrix_t     csrA, csrP, csrC;
830   PetscBool           set, flag;
831   struct matrix_descr descr_type_sym;
832   PetscObjectState    state;
833 
834   PetscFunctionBegin;
835   PetscCall(MatIsSymmetricKnown(A,&set,&flag));
836   PetscCheck(set && !(set && !flag),PETSC_COMM_SELF,PETSC_ERR_PLIB,"MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal() called on matrix A not marked as symmetric");
837 
838   PetscCall(PetscObjectStateGet((PetscObject)A,&state));
839   if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
840   PetscCall(PetscObjectStateGet((PetscObject)P,&state));
841   if (!p->sparse_optimized || p->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(P));
842   csrA = a->csrA;
843   csrP = p->csrA;
844   csrC = c->csrA;
845   descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
846   descr_type_sym.mode = SPARSE_FILL_MODE_UPPER;
847   descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
848 
849   /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
850   PetscStackCallStandard(mkl_sparse_sypr,SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FINALIZE_MULT);
851 
852   /* Update the PETSc AIJ representation for matrix C from contents of MKL handle.
853    * This is more complicated than it should be: it turns out that, though mkl_sparse_sypr() will accept a full AIJ/CSR matrix,
854    * the output matrix only contains the upper or lower triangle (we arbitrarily have chosen upper) of the symmetric matrix.
855    * We have to fill in the missing portion, which we currently do below by forming the transpose and performing at MatAXPY
856    * operation. This may kill any performance benefit of using the optimized mkl_sparse_sypr() routine. Performance might
857    * improve if we come up with a more efficient way to do this, or we can convince the MKL team to provide an option to output
858    * the full matrix. */
859   PetscCall(MatSeqAIJMKL_update_from_mkl_handle(C));
860   PetscCall(MatTranspose(C,MAT_INITIAL_MATRIX,&Ct));
861   PetscCall(MatCreateVecs(C,&zeros,NULL));
862   PetscCall(VecSetFromOptions(zeros));
863   PetscCall(VecZeroEntries(zeros));
864   PetscCall(MatDiagonalSet(Ct,zeros,INSERT_VALUES));
865   PetscCall(MatAXPY(C,1.0,Ct,DIFFERENT_NONZERO_PATTERN));
866   /* Note: The MatAXPY() call destroys the MatProduct, so we must recreate it. */
867   PetscCall(MatProductCreateWithMat(A,P,PETSC_NULL,C));
868   PetscCall(MatProductSetType(C,MATPRODUCT_PtAP));
869   PetscCall(MatSeqAIJMKL_create_mkl_handle(C));
870   PetscCall(VecDestroy(&zeros));
871   PetscCall(MatDestroy(&Ct));
872   PetscFunctionReturn(0);
873 }
874 
875 PetscErrorCode MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat C)
876 {
877   Mat_Product         *product = C->product;
878   Mat                 A = product->A,P = product->B;
879   Mat_SeqAIJMKL       *a = (Mat_SeqAIJMKL*)A->spptr,*p = (Mat_SeqAIJMKL*)P->spptr;
880   sparse_matrix_t     csrA,csrP,csrC;
881   struct matrix_descr descr_type_sym;
882   PetscObjectState    state;
883 
884   PetscFunctionBegin;
885   PetscCall(PetscObjectStateGet((PetscObject)A,&state));
886   if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
887   PetscCall(PetscObjectStateGet((PetscObject)P,&state));
888   if (!p->sparse_optimized || p->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(P));
889   csrA = a->csrA;
890   csrP = p->csrA;
891   descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
892   descr_type_sym.mode = SPARSE_FILL_MODE_UPPER;
893   descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
894 
895   /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
896   if (csrP && csrA) {
897     PetscStackCallStandard(mkl_sparse_sypr,SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FULL_MULT_NO_VAL);
898   } else {
899     csrC = PETSC_NULL;
900   }
901 
902   /* Update the I and J arrays of the PETSc AIJ representation for matrix C from contents of MKL handle.
903    * Note that, because mkl_sparse_sypr() only computes one triangle of the symmetric matrix, this representation will only contain
904    * the upper triangle of the symmetric matrix. We fix this in MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(). I believe that
905    * leaving things in this incomplete state is OK because the numeric product should follow soon after, but am not certain if this
906    * is guaranteed. */
907   PetscCall(MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF,csrC,P->cmap->N,P->cmap->N,C));
908 
909   C->ops->productnumeric = MatProductNumeric_PtAP;
910   PetscFunctionReturn(0);
911 }
912 
913 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AB(Mat C)
914 {
915   PetscFunctionBegin;
916   C->ops->productsymbolic = MatProductSymbolic_AB;
917   C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL;
918   PetscFunctionReturn(0);
919 }
920 
921 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AtB(Mat C)
922 {
923   PetscFunctionBegin;
924   C->ops->productsymbolic = MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL;
925   PetscFunctionReturn(0);
926 }
927 
928 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABt(Mat C)
929 {
930   PetscFunctionBegin;
931   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ;
932   C->ops->productsymbolic          = MatProductSymbolic_ABt;
933   PetscFunctionReturn(0);
934 }
935 
936 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_PtAP(Mat C)
937 {
938   Mat_Product    *product = C->product;
939   Mat            A = product->A;
940   PetscBool      set, flag;
941 
942   PetscFunctionBegin;
943   if (PetscDefined(USE_COMPLEX)) {
944     /* By setting C->ops->productsymbolic to NULL, we ensure that MatProductSymbolic_Unsafe() will be used.
945      * We do this in several other locations in this file. This works for the time being, but these
946      * routines are considered unsafe and may be removed from the MatProduct code in the future.
947      * TODO: Add proper MATSEQAIJMKL implementations */
948     C->ops->productsymbolic = NULL;
949   } else {
950     /* AIJMKL only has an optimized routine for PtAP when A is symmetric and real. */
951     PetscCall(MatIsSymmetricKnown(A,&set,&flag));
952     if (set && flag) C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal;
953     else C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
954     /* Note that we don't set C->ops->productnumeric here, as this must happen in MatProductSymbolic_PtAP_XXX(),
955      * depending on whether the algorithm for the general case vs. the real symmetric one is used. */
956   }
957   PetscFunctionReturn(0);
958 }
959 
960 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_RARt(Mat C)
961 {
962   PetscFunctionBegin;
963   C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
964   PetscFunctionReturn(0);
965 }
966 
967 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABC(Mat C)
968 {
969   PetscFunctionBegin;
970   C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
971   PetscFunctionReturn(0);
972 }
973 
974 PetscErrorCode MatProductSetFromOptions_SeqAIJMKL(Mat C)
975 {
976   Mat_Product    *product = C->product;
977 
978   PetscFunctionBegin;
979   switch (product->type) {
980   case MATPRODUCT_AB:
981     PetscCall(MatProductSetFromOptions_SeqAIJMKL_AB(C));
982     break;
983   case MATPRODUCT_AtB:
984     PetscCall(MatProductSetFromOptions_SeqAIJMKL_AtB(C));
985     break;
986   case MATPRODUCT_ABt:
987     PetscCall(MatProductSetFromOptions_SeqAIJMKL_ABt(C));
988     break;
989   case MATPRODUCT_PtAP:
990     PetscCall(MatProductSetFromOptions_SeqAIJMKL_PtAP(C));
991     break;
992   case MATPRODUCT_RARt:
993     PetscCall(MatProductSetFromOptions_SeqAIJMKL_RARt(C));
994     break;
995   case MATPRODUCT_ABC:
996     PetscCall(MatProductSetFromOptions_SeqAIJMKL_ABC(C));
997     break;
998   default:
999     break;
1000   }
1001   PetscFunctionReturn(0);
1002 }
1003 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
1004 /* ------------------------ End MatProduct code ------------------------ */
1005 
1006 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a
1007  * SeqAIJMKL matrix.  This routine is called by the MatCreate_SeqAIJMKL()
1008  * routine, but can also be used to convert an assembled SeqAIJ matrix
1009  * into a SeqAIJMKL one. */
1010 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
1011 {
1012   Mat            B = *newmat;
1013   Mat_SeqAIJMKL  *aijmkl;
1014   PetscBool      set;
1015   PetscBool      sametype;
1016   PetscErrorCode ierr;
1017 
1018   PetscFunctionBegin;
1019   if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A,MAT_COPY_VALUES,&B));
1020 
1021   PetscCall(PetscObjectTypeCompare((PetscObject)A,type,&sametype));
1022   if (sametype) PetscFunctionReturn(0);
1023 
1024   PetscCall(PetscNewLog(B,&aijmkl));
1025   B->spptr = (void*) aijmkl;
1026 
1027   /* Set function pointers for methods that we inherit from AIJ but override.
1028    * We also parse some command line options below, since those determine some of the methods we point to. */
1029   B->ops->duplicate        = MatDuplicate_SeqAIJMKL;
1030   B->ops->assemblyend      = MatAssemblyEnd_SeqAIJMKL;
1031   B->ops->destroy          = MatDestroy_SeqAIJMKL;
1032 
1033   aijmkl->sparse_optimized = PETSC_FALSE;
1034 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1035   aijmkl->no_SpMV2 = PETSC_FALSE;  /* Default to using the SpMV2 routines if our MKL supports them. */
1036 #else
1037   aijmkl->no_SpMV2 = PETSC_TRUE;
1038 #endif
1039   aijmkl->eager_inspection = PETSC_FALSE;
1040 
1041   /* Parse command line options. */
1042   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");PetscCall(ierr);
1043   PetscCall(PetscOptionsBool("-mat_aijmkl_no_spmv2","Disable use of inspector-executor (SpMV 2) routines","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set));
1044   PetscCall(PetscOptionsBool("-mat_aijmkl_eager_inspection","Run inspection at matrix assembly time, instead of waiting until needed by an operation","None",(PetscBool)aijmkl->eager_inspection,(PetscBool*)&aijmkl->eager_inspection,&set));
1045   ierr = PetscOptionsEnd();PetscCall(ierr);
1046 #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1047   if (!aijmkl->no_SpMV2) {
1048     PetscCall(PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize();  defaulting to non-SpMV2 routines.\n"));
1049     aijmkl->no_SpMV2 = PETSC_TRUE;
1050   }
1051 #endif
1052 
1053 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1054   B->ops->mult                    = MatMult_SeqAIJMKL_SpMV2;
1055   B->ops->multtranspose           = MatMultTranspose_SeqAIJMKL_SpMV2;
1056   B->ops->multadd                 = MatMultAdd_SeqAIJMKL_SpMV2;
1057   B->ops->multtransposeadd        = MatMultTransposeAdd_SeqAIJMKL_SpMV2;
1058 # if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
1059   B->ops->productsetfromoptions   = MatProductSetFromOptions_SeqAIJMKL;
1060   B->ops->matmultsymbolic         = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL;
1061   B->ops->matmultnumeric          = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL;
1062   B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL;
1063   B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL;
1064 #   if !defined(PETSC_USE_COMPLEX)
1065   B->ops->ptapnumeric             = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal;
1066 #   else
1067   B->ops->ptapnumeric             = NULL;
1068 #   endif
1069 # endif
1070 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
1071 
1072 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
1073   /* In MKL version 18, update 2, the old sparse BLAS interfaces were marked as deprecated. If "no_SpMV2" has been specified by the
1074    * user and the old SpBLAS interfaces are deprecated in our MKL version, we use the new _SpMV2 routines (set above), but do not
1075    * call mkl_sparse_optimize(), which results in the old numerical kernels (without the inspector-executor model) being used. For
1076    * versions in which the older interface has not been deprecated, we use the old interface. */
1077   if (aijmkl->no_SpMV2) {
1078     B->ops->mult             = MatMult_SeqAIJMKL;
1079     B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL;
1080     B->ops->multadd          = MatMultAdd_SeqAIJMKL;
1081     B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL;
1082   }
1083 #endif
1084 
1085   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ));
1086 
1087   PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL));
1088   *newmat = B;
1089   PetscFunctionReturn(0);
1090 }
1091 
1092 /*@C
1093    MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL.
1094    This type inherits from AIJ and is largely identical, but uses sparse BLAS
1095    routines from Intel MKL whenever possible.
1096    If the installed version of MKL supports the "SpMV2" sparse
1097    inspector-executor routines, then those are used by default.
1098    MatMult, MatMultAdd, MatMultTranspose, MatMultTransposeAdd, MatMatMult, MatTransposeMatMult, and MatPtAP (for
1099    symmetric A) operations are currently supported.
1100    Note that MKL version 18, update 2 or later is required for MatPtAP/MatPtAPNumeric and MatMatMultNumeric.
1101 
1102    Collective
1103 
1104    Input Parameters:
1105 +  comm - MPI communicator, set to PETSC_COMM_SELF
1106 .  m - number of rows
1107 .  n - number of columns
1108 .  nz - number of nonzeros per row (same for all rows)
1109 -  nnz - array containing the number of nonzeros in the various rows
1110          (possibly different for each row) or NULL
1111 
1112    Output Parameter:
1113 .  A - the matrix
1114 
1115    Options Database Keys:
1116 +  -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines
1117 -  -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, performing this step the first time the matrix is applied
1118 
1119    Notes:
1120    If nnz is given then nz is ignored
1121 
1122    Level: intermediate
1123 
1124 .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues()
1125 @*/
1126 PetscErrorCode  MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
1127 {
1128   PetscFunctionBegin;
1129   PetscCall(MatCreate(comm,A));
1130   PetscCall(MatSetSizes(*A,m,n,m,n));
1131   PetscCall(MatSetType(*A,MATSEQAIJMKL));
1132   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz));
1133   PetscFunctionReturn(0);
1134 }
1135 
1136 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A)
1137 {
1138   PetscFunctionBegin;
1139   PetscCall(MatSetType(A,MATSEQAIJ));
1140   PetscCall(MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A));
1141   PetscFunctionReturn(0);
1142 }
1143