xref: /petsc/src/mat/impls/aij/seq/aijmkl/aijmkl.c (revision fdf6c4e30aafdbc795e4f76379caa977fd5cdf5a)
1 
2 /*
3   Defines basic operations for the MATSEQAIJMKL matrix class.
4   This class is derived from the MATSEQAIJ class and retains the
5   compressed row storage (aka Yale sparse matrix format) but uses
6   sparse BLAS operations from the Intel Math Kernel Library (MKL)
7   wherever possible.
8 */
9 
10 #include <../src/mat/impls/aij/seq/aij.h>
11 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h>
12 #include <mkl_spblas.h>
13 
14 typedef struct {
15   PetscBool           no_SpMV2;  /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */
16   PetscBool           eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */
17   PetscBool           sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
18   PetscObjectState    state;
19 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
20   sparse_matrix_t     csrA; /* "Handle" used by SpMV2 inspector-executor routines. */
21   struct matrix_descr descr;
22 #endif
23 } Mat_SeqAIJMKL;
24 
25 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType);
26 
27 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat)
28 {
29   /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */
30   /* so we will ignore 'MatType type'. */
31   Mat            B       = *newmat;
32 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
33   Mat_SeqAIJMKL  *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
34 #endif
35 
36   PetscFunctionBegin;
37   if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A,MAT_COPY_VALUES,&B));
38 
39   /* Reset the original function pointers. */
40   B->ops->duplicate               = MatDuplicate_SeqAIJ;
41   B->ops->assemblyend             = MatAssemblyEnd_SeqAIJ;
42   B->ops->destroy                 = MatDestroy_SeqAIJ;
43   B->ops->mult                    = MatMult_SeqAIJ;
44   B->ops->multtranspose           = MatMultTranspose_SeqAIJ;
45   B->ops->multadd                 = MatMultAdd_SeqAIJ;
46   B->ops->multtransposeadd        = MatMultTransposeAdd_SeqAIJ;
47   B->ops->productsetfromoptions   = MatProductSetFromOptions_SeqAIJ;
48   B->ops->matmultsymbolic         = MatMatMultSymbolic_SeqAIJ_SeqAIJ;
49   B->ops->matmultnumeric          = MatMatMultNumeric_SeqAIJ_SeqAIJ;
50   B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ;
51   B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ;
52   B->ops->ptapnumeric             = MatPtAPNumeric_SeqAIJ_SeqAIJ;
53 
54   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL));
55 
56 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
57   /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this
58    * simply involves destroying the MKL sparse matrix handle and then freeing
59    * the spptr pointer. */
60   if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr;
61 
62   if (aijmkl->sparse_optimized) PetscStackCallStandard(mkl_sparse_destroy,aijmkl->csrA);
63 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
64   PetscCall(PetscFree(B->spptr));
65 
66   /* Change the type of B to MATSEQAIJ. */
67   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ));
68 
69   *newmat = B;
70   PetscFunctionReturn(0);
71 }
72 
73 PetscErrorCode MatDestroy_SeqAIJMKL(Mat A)
74 {
75   Mat_SeqAIJMKL  *aijmkl = (Mat_SeqAIJMKL*) A->spptr;
76 
77   PetscFunctionBegin;
78 
79   /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an spptr pointer. */
80   if (aijmkl) {
81     /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */
82 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
83     if (aijmkl->sparse_optimized) PetscStackCallStandard(mkl_sparse_destroy,aijmkl->csrA);
84 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
85     PetscCall(PetscFree(A->spptr));
86   }
87 
88   /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
89    * to destroy everything that remains. */
90   PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ));
91   /* Note that I don't call MatSetType().  I believe this is because that
92    * is only to be called when *building* a matrix.  I could be wrong, but
93    * that is how things work for the SuperLU matrix class. */
94   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqaijmkl_seqaij_C",NULL));
95   PetscCall(MatDestroy_SeqAIJ(A));
96   PetscFunctionReturn(0);
97 }
98 
99 /* MatSeqAIJMKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it,
100  * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize().
101  * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix
102  * handle, creates a new one, and then calls mkl_sparse_optimize().
103  * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been
104  * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of
105  * an unoptimized matrix handle here. */
106 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A)
107 {
108 #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
109   /* If the MKL library does not have mkl_sparse_optimize(), then this routine
110    * does nothing. We make it callable anyway in this case because it cuts
111    * down on littering the code with #ifdefs. */
112   PetscFunctionBegin;
113   PetscFunctionReturn(0);
114 #else
115   Mat_SeqAIJ       *a = (Mat_SeqAIJ*)A->data;
116   Mat_SeqAIJMKL    *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
117   PetscInt         m,n;
118   MatScalar        *aa;
119   PetscInt         *aj,*ai;
120 
121   PetscFunctionBegin;
122 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
123   /* For MKL versions that still support the old, non-inspector-executor interfaces versions, we simply exit here if the no_SpMV2
124    * option has been specified. For versions that have deprecated the old interfaces (version 18, update 2 and later), we must
125    * use the new inspector-executor interfaces, but we can still use the old, non-inspector-executor code by not calling
126    * mkl_sparse_optimize() later. */
127   if (aijmkl->no_SpMV2) PetscFunctionReturn(0);
128 #endif
129 
130   if (aijmkl->sparse_optimized) {
131     /* Matrix has been previously assembled and optimized. Must destroy old
132      * matrix handle before running the optimization step again. */
133     PetscStackCallStandard(mkl_sparse_destroy,aijmkl->csrA);
134   }
135   aijmkl->sparse_optimized = PETSC_FALSE;
136 
137   /* Now perform the SpMV2 setup and matrix optimization. */
138   aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
139   aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
140   aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
141   m = A->rmap->n;
142   n = A->cmap->n;
143   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
144   aa   = a->a;  /* Nonzero elements stored row-by-row. */
145   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
146   if (a->nz && aa && !A->structure_only) {
147     /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
148      * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
149     PetscStackCallStandard(mkl_sparse_x_create_csr,&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa);
150     PetscStackCallStandard(mkl_sparse_set_mv_hint,aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
151     PetscStackCallStandard(mkl_sparse_set_memory_hint,aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
152     if (!aijmkl->no_SpMV2) {
153       PetscStackCallStandard(mkl_sparse_optimize,aijmkl->csrA);
154     }
155     aijmkl->sparse_optimized = PETSC_TRUE;
156     PetscCall(PetscObjectStateGet((PetscObject)A,&(aijmkl->state)));
157   } else {
158     aijmkl->csrA = PETSC_NULL;
159   }
160 
161   PetscFunctionReturn(0);
162 #endif
163 }
164 
165 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
166 /* Take an already created but empty matrix and set up the nonzero structure from an MKL sparse matrix handle. */
167 static PetscErrorCode MatSeqAIJMKL_setup_structure_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,PetscInt nrows,PetscInt ncols,Mat A)
168 {
169   sparse_index_base_t indexing;
170   PetscInt            m,n;
171   PetscInt            *aj,*ai,*dummy;
172   MatScalar           *aa;
173   Mat_SeqAIJMKL       *aijmkl;
174 
175   PetscFunctionBegin;
176   if (csrA) {
177     /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
178     PetscStackCallStandard(mkl_sparse_x_export_csr,csrA,&indexing,&m,&n,&ai,&dummy,&aj,&aa);
179     PetscCheck((m == nrows) && (n == ncols),PETSC_COMM_SELF,PETSC_ERR_LIB,"Number of rows/columns does not match those from mkl_sparse_x_export_csr()");
180   } else {
181     aj = ai = PETSC_NULL;
182     aa = PETSC_NULL;
183   }
184 
185   PetscCall(MatSetType(A,MATSEQAIJ));
186   PetscCall(MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,nrows,ncols));
187   /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported
188    * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and
189    * they will be destroyed when the MKL handle is destroyed.
190    * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */
191   if (csrA) {
192     PetscCall(MatSeqAIJSetPreallocationCSR(A,ai,aj,NULL));
193   } else {
194     /* Since MatSeqAIJSetPreallocationCSR does initial set up and assembly begin/end, we must do that ourselves here. */
195     PetscCall(MatSetUp(A));
196     PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY));
197     PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY));
198   }
199 
200   /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle.
201    * Now turn it into a MATSEQAIJMKL. */
202   PetscCall(MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A));
203 
204   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
205   aijmkl->csrA = csrA;
206 
207   /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but
208    * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one,
209    * and just need to be able to run the MKL optimization step. */
210   aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
211   aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
212   aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
213   if (csrA) {
214     PetscStackCallStandard(mkl_sparse_set_mv_hint,aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
215     PetscStackCallStandard(mkl_sparse_set_memory_hint,aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
216   }
217   PetscCall(PetscObjectStateGet((PetscObject)A,&(aijmkl->state)));
218   PetscFunctionReturn(0);
219 }
220 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
221 
222 /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle.
223  * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in
224  * MatMatMultNumeric(). */
225 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
226 static PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A)
227 {
228   PetscInt            i;
229   PetscInt            nrows,ncols;
230   PetscInt            nz;
231   PetscInt            *ai,*aj,*dummy;
232   PetscScalar         *aa;
233   Mat_SeqAIJMKL       *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
234   sparse_index_base_t indexing;
235 
236   PetscFunctionBegin;
237   /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */
238   if (!aijmkl->csrA) PetscFunctionReturn(0);
239 
240   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
241   PetscStackCallStandard(mkl_sparse_x_export_csr,aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);
242 
243   /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc
244    * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */
245   for (i=0; i<nrows; i++) {
246     nz = ai[i+1] - ai[i];
247     PetscCall(MatSetValues_SeqAIJ(A, 1, &i, nz, aj+ai[i], aa+ai[i], INSERT_VALUES));
248   }
249 
250   PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY));
251   PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY));
252 
253   PetscCall(PetscObjectStateGet((PetscObject)A,&(aijmkl->state)));
254   /* At this point our matrix has a valid MKL handle, the contents of which match the PETSc AIJ representation.
255    * The MKL handle has *not* had mkl_sparse_optimize() called on it, though -- the MKL developers have confirmed
256    * that the matrix inspection/optimization step is not performed when matrix-matrix multiplication is finalized. */
257   aijmkl->sparse_optimized = PETSC_FALSE;
258   PetscFunctionReturn(0);
259 }
260 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
261 
262 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
263 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_view_mkl_handle(Mat A,PetscViewer viewer)
264 {
265   PetscInt            i,j,k;
266   PetscInt            nrows,ncols;
267   PetscInt            nz;
268   PetscInt            *ai,*aj,*dummy;
269   PetscScalar         *aa;
270   Mat_SeqAIJMKL       *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
271   sparse_index_base_t indexing;
272 
273   PetscFunctionBegin;
274   PetscCall(PetscViewerASCIIPrintf(viewer,"Contents of MKL sparse matrix handle for MATSEQAIJMKL object:\n"));
275 
276   /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */
277   if (!aijmkl->csrA) {
278     PetscCall(PetscViewerASCIIPrintf(viewer,"MKL matrix handle is NULL\n"));
279     PetscFunctionReturn(0);
280   }
281 
282   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
283   PetscStackCallStandard(mkl_sparse_x_export_csr,aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);
284 
285   k = 0;
286   for (i=0; i<nrows; i++) {
287     PetscCall(PetscViewerASCIIPrintf(viewer,"row %" PetscInt_FMT ": ",i));
288     nz = ai[i+1] - ai[i];
289     for (j=0; j<nz; j++) {
290       if (aa) {
291         PetscCall(PetscViewerASCIIPrintf(viewer,"(%" PetscInt_FMT ", %g)  ",aj[k],PetscRealPart(aa[k])));
292       } else {
293         PetscCall(PetscViewerASCIIPrintf(viewer,"(%" PetscInt_FMT ", NULL)",aj[k]));
294       }
295       k++;
296     }
297     PetscCall(PetscViewerASCIIPrintf(viewer,"\n"));
298   }
299   PetscFunctionReturn(0);
300 }
301 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
302 
303 PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
304 {
305   Mat_SeqAIJMKL  *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
306   Mat_SeqAIJMKL  *aijmkl_dest;
307 
308   PetscFunctionBegin;
309   PetscCall(MatDuplicate_SeqAIJ(A,op,M));
310   aijmkl_dest = (Mat_SeqAIJMKL*)(*M)->spptr;
311   PetscCall(PetscArraycpy(aijmkl_dest,aijmkl,1));
312   aijmkl_dest->sparse_optimized = PETSC_FALSE;
313   if (aijmkl->eager_inspection) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
314   PetscFunctionReturn(0);
315 }
316 
317 PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode)
318 {
319   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
320   Mat_SeqAIJMKL   *aijmkl;
321 
322   PetscFunctionBegin;
323   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
324 
325   /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some
326    * extra information and some different methods, call the AssemblyEnd
327    * routine for a MATSEQAIJ.
328    * I'm not sure if this is the best way to do this, but it avoids
329    * a lot of code duplication. */
330   a->inode.use = PETSC_FALSE;  /* Must disable: otherwise the MKL routines won't get used. */
331   PetscCall(MatAssemblyEnd_SeqAIJ(A, mode));
332 
333   /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks).
334    * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */
335   aijmkl = (Mat_SeqAIJMKL*)A->spptr;
336   if (aijmkl->eager_inspection) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
337 
338   PetscFunctionReturn(0);
339 }
340 
341 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
342 PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy)
343 {
344   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
345   const PetscScalar *x;
346   PetscScalar       *y;
347   const MatScalar   *aa;
348   PetscInt          m = A->rmap->n;
349   PetscInt          n = A->cmap->n;
350   PetscScalar       alpha = 1.0;
351   PetscScalar       beta = 0.0;
352   const PetscInt    *aj,*ai;
353   char              matdescra[6];
354 
355   /* Variables not in MatMult_SeqAIJ. */
356   char transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
357 
358   PetscFunctionBegin;
359   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
360   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
361   PetscCall(VecGetArrayRead(xx,&x));
362   PetscCall(VecGetArray(yy,&y));
363   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
364   aa   = a->a;  /* Nonzero elements stored row-by-row. */
365   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
366 
367   /* Call MKL sparse BLAS routine to do the MatMult. */
368   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
369 
370   PetscCall(PetscLogFlops(2.0*a->nz - a->nonzerorowcnt));
371   PetscCall(VecRestoreArrayRead(xx,&x));
372   PetscCall(VecRestoreArray(yy,&y));
373   PetscFunctionReturn(0);
374 }
375 #endif
376 
377 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
378 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
379 {
380   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
381   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
382   const PetscScalar *x;
383   PetscScalar       *y;
384   PetscObjectState  state;
385 
386   PetscFunctionBegin;
387 
388   /* If there are no nonzero entries, zero yy and return immediately. */
389   if (!a->nz) {
390     PetscCall(VecGetArray(yy,&y));
391     PetscCall(PetscArrayzero(y,A->rmap->n));
392     PetscCall(VecRestoreArray(yy,&y));
393     PetscFunctionReturn(0);
394   }
395 
396   PetscCall(VecGetArrayRead(xx,&x));
397   PetscCall(VecGetArray(yy,&y));
398 
399   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
400    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
401    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
402   PetscCall(PetscObjectStateGet((PetscObject)A,&state));
403   if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
404 
405   /* Call MKL SpMV2 executor routine to do the MatMult. */
406   PetscStackCallStandard(mkl_sparse_x_mv,SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
407 
408   PetscCall(PetscLogFlops(2.0*a->nz - a->nonzerorowcnt));
409   PetscCall(VecRestoreArrayRead(xx,&x));
410   PetscCall(VecRestoreArray(yy,&y));
411   PetscFunctionReturn(0);
412 }
413 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
414 
415 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
416 PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy)
417 {
418   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
419   const PetscScalar *x;
420   PetscScalar       *y;
421   const MatScalar   *aa;
422   PetscInt          m = A->rmap->n;
423   PetscInt          n = A->cmap->n;
424   PetscScalar       alpha = 1.0;
425   PetscScalar       beta = 0.0;
426   const PetscInt    *aj,*ai;
427   char              matdescra[6];
428 
429   /* Variables not in MatMultTranspose_SeqAIJ. */
430   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
431 
432   PetscFunctionBegin;
433   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
434   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
435   PetscCall(VecGetArrayRead(xx,&x));
436   PetscCall(VecGetArray(yy,&y));
437   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
438   aa   = a->a;  /* Nonzero elements stored row-by-row. */
439   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
440 
441   /* Call MKL sparse BLAS routine to do the MatMult. */
442   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
443 
444   PetscCall(PetscLogFlops(2.0*a->nz - a->nonzerorowcnt));
445   PetscCall(VecRestoreArrayRead(xx,&x));
446   PetscCall(VecRestoreArray(yy,&y));
447   PetscFunctionReturn(0);
448 }
449 #endif
450 
451 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
452 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
453 {
454   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
455   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
456   const PetscScalar *x;
457   PetscScalar       *y;
458   PetscObjectState  state;
459 
460   PetscFunctionBegin;
461 
462   /* If there are no nonzero entries, zero yy and return immediately. */
463   if (!a->nz) {
464     PetscCall(VecGetArray(yy,&y));
465     PetscCall(PetscArrayzero(y,A->cmap->n));
466     PetscCall(VecRestoreArray(yy,&y));
467     PetscFunctionReturn(0);
468   }
469 
470   PetscCall(VecGetArrayRead(xx,&x));
471   PetscCall(VecGetArray(yy,&y));
472 
473   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
474    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
475    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
476   PetscCall(PetscObjectStateGet((PetscObject)A,&state));
477   if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
478 
479   /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
480   PetscStackCallStandard(mkl_sparse_x_mv,SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
481 
482   PetscCall(PetscLogFlops(2.0*a->nz - a->nonzerorowcnt));
483   PetscCall(VecRestoreArrayRead(xx,&x));
484   PetscCall(VecRestoreArray(yy,&y));
485   PetscFunctionReturn(0);
486 }
487 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
488 
489 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
490 PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
491 {
492   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
493   const PetscScalar *x;
494   PetscScalar       *y,*z;
495   const MatScalar   *aa;
496   PetscInt          m = A->rmap->n;
497   PetscInt          n = A->cmap->n;
498   const PetscInt    *aj,*ai;
499   PetscInt          i;
500 
501   /* Variables not in MatMultAdd_SeqAIJ. */
502   char              transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
503   PetscScalar       alpha = 1.0;
504   PetscScalar       beta;
505   char              matdescra[6];
506 
507   PetscFunctionBegin;
508   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
509   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
510 
511   PetscCall(VecGetArrayRead(xx,&x));
512   PetscCall(VecGetArrayPair(yy,zz,&y,&z));
513   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
514   aa   = a->a;  /* Nonzero elements stored row-by-row. */
515   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
516 
517   /* Call MKL sparse BLAS routine to do the MatMult. */
518   if (zz == yy) {
519     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
520     beta = 1.0;
521     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
522   } else {
523     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
524      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
525     beta = 0.0;
526     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
527     for (i=0; i<m; i++) {
528       z[i] += y[i];
529     }
530   }
531 
532   PetscCall(PetscLogFlops(2.0*a->nz));
533   PetscCall(VecRestoreArrayRead(xx,&x));
534   PetscCall(VecRestoreArrayPair(yy,zz,&y,&z));
535   PetscFunctionReturn(0);
536 }
537 #endif
538 
539 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
540 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
541 {
542   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
543   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
544   const PetscScalar *x;
545   PetscScalar       *y,*z;
546   PetscInt          m = A->rmap->n;
547   PetscInt          i;
548 
549   /* Variables not in MatMultAdd_SeqAIJ. */
550   PetscObjectState  state;
551 
552   PetscFunctionBegin;
553 
554   /* If there are no nonzero entries, set zz = yy and return immediately. */
555   if (!a->nz) {
556     PetscCall(VecGetArrayPair(yy,zz,&y,&z));
557     PetscCall(PetscArraycpy(z,y,m));
558     PetscCall(VecRestoreArrayPair(yy,zz,&y,&z));
559     PetscFunctionReturn(0);
560   }
561 
562   PetscCall(VecGetArrayRead(xx,&x));
563   PetscCall(VecGetArrayPair(yy,zz,&y,&z));
564 
565   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
566    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
567    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
568   PetscCall(PetscObjectStateGet((PetscObject)A,&state));
569   if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
570 
571   /* Call MKL sparse BLAS routine to do the MatMult. */
572   if (zz == yy) {
573     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
574      * with alpha and beta both set to 1.0. */
575     PetscStackCallStandard(mkl_sparse_x_mv,SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
576   } else {
577     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
578      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
579     PetscStackCallStandard(mkl_sparse_x_mv,SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
580     for (i=0; i<m; i++) z[i] += y[i];
581   }
582 
583   PetscCall(PetscLogFlops(2.0*a->nz));
584   PetscCall(VecRestoreArrayRead(xx,&x));
585   PetscCall(VecRestoreArrayPair(yy,zz,&y,&z));
586   PetscFunctionReturn(0);
587 }
588 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
589 
590 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
591 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
592 {
593   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
594   const PetscScalar *x;
595   PetscScalar       *y,*z;
596   const MatScalar   *aa;
597   PetscInt          m = A->rmap->n;
598   PetscInt          n = A->cmap->n;
599   const PetscInt    *aj,*ai;
600   PetscInt          i;
601 
602   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
603   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
604   PetscScalar       alpha = 1.0;
605   PetscScalar       beta;
606   char              matdescra[6];
607 
608   PetscFunctionBegin;
609   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
610   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
611 
612   PetscCall(VecGetArrayRead(xx,&x));
613   PetscCall(VecGetArrayPair(yy,zz,&y,&z));
614   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
615   aa   = a->a;  /* Nonzero elements stored row-by-row. */
616   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
617 
618   /* Call MKL sparse BLAS routine to do the MatMult. */
619   if (zz == yy) {
620     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
621     beta = 1.0;
622     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
623   } else {
624     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
625      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
626     beta = 0.0;
627     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
628     for (i=0; i<n; i++) {
629       z[i] += y[i];
630     }
631   }
632 
633   PetscCall(PetscLogFlops(2.0*a->nz));
634   PetscCall(VecRestoreArrayRead(xx,&x));
635   PetscCall(VecRestoreArrayPair(yy,zz,&y,&z));
636   PetscFunctionReturn(0);
637 }
638 #endif
639 
640 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
641 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
642 {
643   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
644   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
645   const PetscScalar *x;
646   PetscScalar       *y,*z;
647   PetscInt          n = A->cmap->n;
648   PetscInt          i;
649   PetscObjectState  state;
650 
651   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
652 
653   PetscFunctionBegin;
654 
655   /* If there are no nonzero entries, set zz = yy and return immediately. */
656   if (!a->nz) {
657     PetscCall(VecGetArrayPair(yy,zz,&y,&z));
658     PetscCall(PetscArraycpy(z,y,n));
659     PetscCall(VecRestoreArrayPair(yy,zz,&y,&z));
660     PetscFunctionReturn(0);
661   }
662 
663   PetscCall(VecGetArrayRead(xx,&x));
664   PetscCall(VecGetArrayPair(yy,zz,&y,&z));
665 
666   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
667    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
668    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
669   PetscCall(PetscObjectStateGet((PetscObject)A,&state));
670   if (!aijmkl->sparse_optimized || aijmkl->state != state) MatSeqAIJMKL_create_mkl_handle(A);
671 
672   /* Call MKL sparse BLAS routine to do the MatMult. */
673   if (zz == yy) {
674     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
675      * with alpha and beta both set to 1.0. */
676     PetscStackCallStandard(mkl_sparse_x_mv,SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
677   } else {
678     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
679      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
680     PetscStackCallStandard(mkl_sparse_x_mv,SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
681     for (i=0; i<n; i++) z[i] += y[i];
682   }
683 
684   PetscCall(PetscLogFlops(2.0*a->nz));
685   PetscCall(VecRestoreArrayRead(xx,&x));
686   PetscCall(VecRestoreArrayPair(yy,zz,&y,&z));
687   PetscFunctionReturn(0);
688 }
689 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
690 
691 /* -------------------------- MatProduct code -------------------------- */
692 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
693 static PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(Mat A,const sparse_operation_t transA,Mat B,const sparse_operation_t transB,Mat C)
694 {
695   Mat_SeqAIJMKL       *a = (Mat_SeqAIJMKL*)A->spptr,*b = (Mat_SeqAIJMKL*)B->spptr;
696   sparse_matrix_t     csrA,csrB,csrC;
697   PetscInt            nrows,ncols;
698   struct matrix_descr descr_type_gen;
699   PetscObjectState    state;
700 
701   PetscFunctionBegin;
702   /* Determine the number of rows and columns that the result matrix C will have. We have to do this ourselves because MKL does
703    * not handle sparse matrices with zero rows or columns. */
704   if (transA == SPARSE_OPERATION_NON_TRANSPOSE) nrows = A->rmap->N;
705   else nrows = A->cmap->N;
706   if (transB == SPARSE_OPERATION_NON_TRANSPOSE) ncols = B->cmap->N;
707   else ncols = B->rmap->N;
708 
709   PetscCall(PetscObjectStateGet((PetscObject)A,&state));
710   if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
711   PetscCall(PetscObjectStateGet((PetscObject)B,&state));
712   if (!b->sparse_optimized || b->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(B));
713   csrA = a->csrA;
714   csrB = b->csrA;
715   descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
716 
717   if (csrA && csrB) {
718     PetscStackCallStandard(mkl_sparse_sp2m,transA,descr_type_gen,csrA,transB,descr_type_gen,csrB,SPARSE_STAGE_FULL_MULT_NO_VAL,&csrC);
719   } else {
720     csrC = PETSC_NULL;
721   }
722 
723   PetscCall(MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF,csrC,nrows,ncols,C));
724 
725   PetscFunctionReturn(0);
726 }
727 
728 PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(Mat A,const sparse_operation_t transA,Mat B,const sparse_operation_t transB,Mat C)
729 {
730   Mat_SeqAIJMKL       *a = (Mat_SeqAIJMKL*)A->spptr,*b = (Mat_SeqAIJMKL*)B->spptr,*c = (Mat_SeqAIJMKL*)C->spptr;
731   sparse_matrix_t     csrA, csrB, csrC;
732   struct matrix_descr descr_type_gen;
733   PetscObjectState    state;
734 
735   PetscFunctionBegin;
736   PetscCall(PetscObjectStateGet((PetscObject)A,&state));
737   if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
738   PetscCall(PetscObjectStateGet((PetscObject)B,&state));
739   if (!b->sparse_optimized || b->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(B));
740   csrA = a->csrA;
741   csrB = b->csrA;
742   csrC = c->csrA;
743   descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
744 
745   if (csrA && csrB) {
746     PetscStackCallStandard(mkl_sparse_sp2m,transA,descr_type_gen,csrA,transB,descr_type_gen,csrB,SPARSE_STAGE_FINALIZE_MULT,&csrC);
747   } else {
748     csrC = PETSC_NULL;
749   }
750 
751   /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */
752   PetscCall(MatSeqAIJMKL_update_from_mkl_handle(C));
753 
754   PetscFunctionReturn(0);
755 }
756 
757 PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C)
758 {
759   PetscFunctionBegin;
760   PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C));
761   PetscFunctionReturn(0);
762 }
763 
764 PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C)
765 {
766   PetscFunctionBegin;
767   PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C));
768   PetscFunctionReturn(0);
769 }
770 
771 PetscErrorCode MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C)
772 {
773   PetscFunctionBegin;
774   PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C));
775   PetscFunctionReturn(0);
776 }
777 
778 PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C)
779 {
780   PetscFunctionBegin;
781   PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C));
782   PetscFunctionReturn(0);
783 }
784 
785 PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C)
786 {
787   PetscFunctionBegin;
788   PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_TRANSPOSE,C));
789   PetscFunctionReturn(0);
790 }
791 
792 PetscErrorCode MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C)
793 {
794   PetscFunctionBegin;
795   PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_TRANSPOSE,C));
796   PetscFunctionReturn(0);
797 }
798 
799 static PetscErrorCode MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL(Mat C)
800 {
801   Mat_Product    *product = C->product;
802   Mat            A = product->A,B = product->B;
803 
804   PetscFunctionBegin;
805   PetscCall(MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(A,B,C));
806   PetscFunctionReturn(0);
807 }
808 
809 static PetscErrorCode MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL(Mat C)
810 {
811   Mat_Product    *product = C->product;
812   Mat            A = product->A,B = product->B;
813   PetscReal      fill = product->fill;
814 
815   PetscFunctionBegin;
816   PetscCall(MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(A,B,fill,C));
817   C->ops->productnumeric = MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL;
818   PetscFunctionReturn(0);
819 }
820 
821 PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat A,Mat P,Mat C)
822 {
823   Mat                 Ct;
824   Vec                 zeros;
825   Mat_SeqAIJMKL       *a = (Mat_SeqAIJMKL*)A->spptr,*p = (Mat_SeqAIJMKL*)P->spptr,*c = (Mat_SeqAIJMKL*)C->spptr;
826   sparse_matrix_t     csrA, csrP, csrC;
827   PetscBool           set, flag;
828   struct matrix_descr descr_type_sym;
829   PetscObjectState    state;
830 
831   PetscFunctionBegin;
832   PetscCall(MatIsSymmetricKnown(A,&set,&flag));
833   PetscCheck(set && flag,PETSC_COMM_SELF,PETSC_ERR_PLIB,"MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal() called on matrix A not marked as symmetric");
834 
835   PetscCall(PetscObjectStateGet((PetscObject)A,&state));
836   if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
837   PetscCall(PetscObjectStateGet((PetscObject)P,&state));
838   if (!p->sparse_optimized || p->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(P));
839   csrA = a->csrA;
840   csrP = p->csrA;
841   csrC = c->csrA;
842   descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
843   descr_type_sym.mode = SPARSE_FILL_MODE_UPPER;
844   descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
845 
846   /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
847   PetscStackCallStandard(mkl_sparse_sypr,SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FINALIZE_MULT);
848 
849   /* Update the PETSc AIJ representation for matrix C from contents of MKL handle.
850    * This is more complicated than it should be: it turns out that, though mkl_sparse_sypr() will accept a full AIJ/CSR matrix,
851    * the output matrix only contains the upper or lower triangle (we arbitrarily have chosen upper) of the symmetric matrix.
852    * We have to fill in the missing portion, which we currently do below by forming the transpose and performing at MatAXPY
853    * operation. This may kill any performance benefit of using the optimized mkl_sparse_sypr() routine. Performance might
854    * improve if we come up with a more efficient way to do this, or we can convince the MKL team to provide an option to output
855    * the full matrix. */
856   PetscCall(MatSeqAIJMKL_update_from_mkl_handle(C));
857   PetscCall(MatTranspose(C,MAT_INITIAL_MATRIX,&Ct));
858   PetscCall(MatCreateVecs(C,&zeros,NULL));
859   PetscCall(VecSetFromOptions(zeros));
860   PetscCall(VecZeroEntries(zeros));
861   PetscCall(MatDiagonalSet(Ct,zeros,INSERT_VALUES));
862   PetscCall(MatAXPY(C,1.0,Ct,DIFFERENT_NONZERO_PATTERN));
863   /* Note: The MatAXPY() call destroys the MatProduct, so we must recreate it. */
864   PetscCall(MatProductCreateWithMat(A,P,PETSC_NULL,C));
865   PetscCall(MatProductSetType(C,MATPRODUCT_PtAP));
866   PetscCall(MatSeqAIJMKL_create_mkl_handle(C));
867   PetscCall(VecDestroy(&zeros));
868   PetscCall(MatDestroy(&Ct));
869   PetscFunctionReturn(0);
870 }
871 
872 PetscErrorCode MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat C)
873 {
874   Mat_Product         *product = C->product;
875   Mat                 A = product->A,P = product->B;
876   Mat_SeqAIJMKL       *a = (Mat_SeqAIJMKL*)A->spptr,*p = (Mat_SeqAIJMKL*)P->spptr;
877   sparse_matrix_t     csrA,csrP,csrC;
878   struct matrix_descr descr_type_sym;
879   PetscObjectState    state;
880 
881   PetscFunctionBegin;
882   PetscCall(PetscObjectStateGet((PetscObject)A,&state));
883   if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
884   PetscCall(PetscObjectStateGet((PetscObject)P,&state));
885   if (!p->sparse_optimized || p->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(P));
886   csrA = a->csrA;
887   csrP = p->csrA;
888   descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
889   descr_type_sym.mode = SPARSE_FILL_MODE_UPPER;
890   descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
891 
892   /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
893   if (csrP && csrA) {
894     PetscStackCallStandard(mkl_sparse_sypr,SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FULL_MULT_NO_VAL);
895   } else {
896     csrC = PETSC_NULL;
897   }
898 
899   /* Update the I and J arrays of the PETSc AIJ representation for matrix C from contents of MKL handle.
900    * Note that, because mkl_sparse_sypr() only computes one triangle of the symmetric matrix, this representation will only contain
901    * the upper triangle of the symmetric matrix. We fix this in MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(). I believe that
902    * leaving things in this incomplete state is OK because the numeric product should follow soon after, but am not certain if this
903    * is guaranteed. */
904   PetscCall(MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF,csrC,P->cmap->N,P->cmap->N,C));
905 
906   C->ops->productnumeric = MatProductNumeric_PtAP;
907   PetscFunctionReturn(0);
908 }
909 
910 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AB(Mat C)
911 {
912   PetscFunctionBegin;
913   C->ops->productsymbolic = MatProductSymbolic_AB;
914   C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL;
915   PetscFunctionReturn(0);
916 }
917 
918 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AtB(Mat C)
919 {
920   PetscFunctionBegin;
921   C->ops->productsymbolic = MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL;
922   PetscFunctionReturn(0);
923 }
924 
925 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABt(Mat C)
926 {
927   PetscFunctionBegin;
928   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ;
929   C->ops->productsymbolic          = MatProductSymbolic_ABt;
930   PetscFunctionReturn(0);
931 }
932 
933 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_PtAP(Mat C)
934 {
935   Mat_Product    *product = C->product;
936   Mat            A = product->A;
937   PetscBool      set, flag;
938 
939   PetscFunctionBegin;
940   if (PetscDefined(USE_COMPLEX)) {
941     /* By setting C->ops->productsymbolic to NULL, we ensure that MatProductSymbolic_Unsafe() will be used.
942      * We do this in several other locations in this file. This works for the time being, but these
943      * routines are considered unsafe and may be removed from the MatProduct code in the future.
944      * TODO: Add proper MATSEQAIJMKL implementations */
945     C->ops->productsymbolic = NULL;
946   } else {
947     /* AIJMKL only has an optimized routine for PtAP when A is symmetric and real. */
948     PetscCall(MatIsSymmetricKnown(A,&set,&flag));
949     if (set && flag) C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal;
950     else C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
951     /* Note that we don't set C->ops->productnumeric here, as this must happen in MatProductSymbolic_PtAP_XXX(),
952      * depending on whether the algorithm for the general case vs. the real symmetric one is used. */
953   }
954   PetscFunctionReturn(0);
955 }
956 
957 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_RARt(Mat C)
958 {
959   PetscFunctionBegin;
960   C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
961   PetscFunctionReturn(0);
962 }
963 
964 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABC(Mat C)
965 {
966   PetscFunctionBegin;
967   C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
968   PetscFunctionReturn(0);
969 }
970 
971 PetscErrorCode MatProductSetFromOptions_SeqAIJMKL(Mat C)
972 {
973   Mat_Product    *product = C->product;
974 
975   PetscFunctionBegin;
976   switch (product->type) {
977   case MATPRODUCT_AB:
978     PetscCall(MatProductSetFromOptions_SeqAIJMKL_AB(C));
979     break;
980   case MATPRODUCT_AtB:
981     PetscCall(MatProductSetFromOptions_SeqAIJMKL_AtB(C));
982     break;
983   case MATPRODUCT_ABt:
984     PetscCall(MatProductSetFromOptions_SeqAIJMKL_ABt(C));
985     break;
986   case MATPRODUCT_PtAP:
987     PetscCall(MatProductSetFromOptions_SeqAIJMKL_PtAP(C));
988     break;
989   case MATPRODUCT_RARt:
990     PetscCall(MatProductSetFromOptions_SeqAIJMKL_RARt(C));
991     break;
992   case MATPRODUCT_ABC:
993     PetscCall(MatProductSetFromOptions_SeqAIJMKL_ABC(C));
994     break;
995   default:
996     break;
997   }
998   PetscFunctionReturn(0);
999 }
1000 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
1001 /* ------------------------ End MatProduct code ------------------------ */
1002 
1003 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a
1004  * SeqAIJMKL matrix.  This routine is called by the MatCreate_SeqAIJMKL()
1005  * routine, but can also be used to convert an assembled SeqAIJ matrix
1006  * into a SeqAIJMKL one. */
1007 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
1008 {
1009   Mat            B = *newmat;
1010   Mat_SeqAIJMKL  *aijmkl;
1011   PetscBool      set;
1012   PetscBool      sametype;
1013 
1014   PetscFunctionBegin;
1015   if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A,MAT_COPY_VALUES,&B));
1016 
1017   PetscCall(PetscObjectTypeCompare((PetscObject)A,type,&sametype));
1018   if (sametype) PetscFunctionReturn(0);
1019 
1020   PetscCall(PetscNewLog(B,&aijmkl));
1021   B->spptr = (void*) aijmkl;
1022 
1023   /* Set function pointers for methods that we inherit from AIJ but override.
1024    * We also parse some command line options below, since those determine some of the methods we point to. */
1025   B->ops->duplicate        = MatDuplicate_SeqAIJMKL;
1026   B->ops->assemblyend      = MatAssemblyEnd_SeqAIJMKL;
1027   B->ops->destroy          = MatDestroy_SeqAIJMKL;
1028 
1029   aijmkl->sparse_optimized = PETSC_FALSE;
1030 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1031   aijmkl->no_SpMV2 = PETSC_FALSE;  /* Default to using the SpMV2 routines if our MKL supports them. */
1032 #else
1033   aijmkl->no_SpMV2 = PETSC_TRUE;
1034 #endif
1035   aijmkl->eager_inspection = PETSC_FALSE;
1036 
1037   /* Parse command line options. */
1038   PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");
1039   PetscCall(PetscOptionsBool("-mat_aijmkl_no_spmv2","Disable use of inspector-executor (SpMV 2) routines","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set));
1040   PetscCall(PetscOptionsBool("-mat_aijmkl_eager_inspection","Run inspection at matrix assembly time, instead of waiting until needed by an operation","None",(PetscBool)aijmkl->eager_inspection,(PetscBool*)&aijmkl->eager_inspection,&set));
1041   PetscOptionsEnd();
1042 #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1043   if (!aijmkl->no_SpMV2) {
1044     PetscCall(PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize();  defaulting to non-SpMV2 routines.\n"));
1045     aijmkl->no_SpMV2 = PETSC_TRUE;
1046   }
1047 #endif
1048 
1049 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1050   B->ops->mult                    = MatMult_SeqAIJMKL_SpMV2;
1051   B->ops->multtranspose           = MatMultTranspose_SeqAIJMKL_SpMV2;
1052   B->ops->multadd                 = MatMultAdd_SeqAIJMKL_SpMV2;
1053   B->ops->multtransposeadd        = MatMultTransposeAdd_SeqAIJMKL_SpMV2;
1054 # if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
1055   B->ops->productsetfromoptions   = MatProductSetFromOptions_SeqAIJMKL;
1056   B->ops->matmultsymbolic         = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL;
1057   B->ops->matmultnumeric          = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL;
1058   B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL;
1059   B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL;
1060 #   if !defined(PETSC_USE_COMPLEX)
1061   B->ops->ptapnumeric             = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal;
1062 #   else
1063   B->ops->ptapnumeric             = NULL;
1064 #   endif
1065 # endif
1066 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
1067 
1068 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
1069   /* In MKL version 18, update 2, the old sparse BLAS interfaces were marked as deprecated. If "no_SpMV2" has been specified by the
1070    * user and the old SpBLAS interfaces are deprecated in our MKL version, we use the new _SpMV2 routines (set above), but do not
1071    * call mkl_sparse_optimize(), which results in the old numerical kernels (without the inspector-executor model) being used. For
1072    * versions in which the older interface has not been deprecated, we use the old interface. */
1073   if (aijmkl->no_SpMV2) {
1074     B->ops->mult             = MatMult_SeqAIJMKL;
1075     B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL;
1076     B->ops->multadd          = MatMultAdd_SeqAIJMKL;
1077     B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL;
1078   }
1079 #endif
1080 
1081   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ));
1082 
1083   PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL));
1084   *newmat = B;
1085   PetscFunctionReturn(0);
1086 }
1087 
1088 /*@C
1089    MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL.
1090    This type inherits from AIJ and is largely identical, but uses sparse BLAS
1091    routines from Intel MKL whenever possible.
1092    If the installed version of MKL supports the "SpMV2" sparse
1093    inspector-executor routines, then those are used by default.
1094    MatMult, MatMultAdd, MatMultTranspose, MatMultTransposeAdd, MatMatMult, MatTransposeMatMult, and MatPtAP (for
1095    symmetric A) operations are currently supported.
1096    Note that MKL version 18, update 2 or later is required for MatPtAP/MatPtAPNumeric and MatMatMultNumeric.
1097 
1098    Collective
1099 
1100    Input Parameters:
1101 +  comm - MPI communicator, set to PETSC_COMM_SELF
1102 .  m - number of rows
1103 .  n - number of columns
1104 .  nz - number of nonzeros per row (same for all rows)
1105 -  nnz - array containing the number of nonzeros in the various rows
1106          (possibly different for each row) or NULL
1107 
1108    Output Parameter:
1109 .  A - the matrix
1110 
1111    Options Database Keys:
1112 +  -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines
1113 -  -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, performing this step the first time the matrix is applied
1114 
1115    Notes:
1116    If nnz is given then nz is ignored
1117 
1118    Level: intermediate
1119 
1120 .seealso: `MatCreate()`, `MatCreateMPIAIJMKL()`, `MatSetValues()`
1121 @*/
1122 PetscErrorCode  MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
1123 {
1124   PetscFunctionBegin;
1125   PetscCall(MatCreate(comm,A));
1126   PetscCall(MatSetSizes(*A,m,n,m,n));
1127   PetscCall(MatSetType(*A,MATSEQAIJMKL));
1128   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz));
1129   PetscFunctionReturn(0);
1130 }
1131 
1132 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A)
1133 {
1134   PetscFunctionBegin;
1135   PetscCall(MatSetType(A,MATSEQAIJ));
1136   PetscCall(MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A));
1137   PetscFunctionReturn(0);
1138 }
1139