xref: /petsc/src/mat/impls/aij/seq/aijmkl/aijmkl.c (revision dea3b16571b14eaaa2d0f681373289b7632821c9)
1 
2 /*
3   Defines basic operations for the MATSEQAIJMKL matrix class.
4   This class is derived from the MATSEQAIJ class and retains the
5   compressed row storage (aka Yale sparse matrix format) but uses
6   sparse BLAS operations from the Intel Math Kernel Library (MKL)
7   wherever possible.
8 */
9 
10 #include <../src/mat/impls/aij/seq/aij.h>
11 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h>
12 #include <mkl_spblas.h>
13 
14 typedef struct {
15   PetscBool           no_SpMV2;  /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */
16   PetscBool           eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */
17   PetscBool           sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
18   PetscObjectState    state;
19 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
20   sparse_matrix_t     csrA; /* "Handle" used by SpMV2 inspector-executor routines. */
21   struct matrix_descr descr;
22 #endif
23 } Mat_SeqAIJMKL;
24 
25 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType);
26 
27 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat)
28 {
29   /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */
30   /* so we will ignore 'MatType type'. */
31   PetscErrorCode ierr;
32   Mat            B       = *newmat;
33 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
34   Mat_SeqAIJMKL  *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
35 #endif
36 
37   PetscFunctionBegin;
38   if (reuse == MAT_INITIAL_MATRIX) {
39     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
40   }
41 
42   /* Reset the original function pointers. */
43   B->ops->duplicate               = MatDuplicate_SeqAIJ;
44   B->ops->assemblyend             = MatAssemblyEnd_SeqAIJ;
45   B->ops->destroy                 = MatDestroy_SeqAIJ;
46   B->ops->mult                    = MatMult_SeqAIJ;
47   B->ops->multtranspose           = MatMultTranspose_SeqAIJ;
48   B->ops->multadd                 = MatMultAdd_SeqAIJ;
49   B->ops->multtransposeadd        = MatMultTransposeAdd_SeqAIJ;
50   B->ops->productsetfromoptions   = MatProductSetFromOptions_SeqAIJ;
51   B->ops->matmultsymbolic         = MatMatMultSymbolic_SeqAIJ_SeqAIJ;
52   B->ops->matmultnumeric          = MatMatMultNumeric_SeqAIJ_SeqAIJ;
53   B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ;
54   B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ;
55   B->ops->ptapnumeric             = MatPtAPNumeric_SeqAIJ_SeqAIJ;
56 
57   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);CHKERRQ(ierr);
58 
59 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
60   if (!aijmkl->no_SpMV2) {
61 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
62     ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr);
63 #endif
64   }
65 
66   /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this
67    * simply involves destroying the MKL sparse matrix handle and then freeing
68    * the spptr pointer. */
69   if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr;
70 
71   if (aijmkl->sparse_optimized) {
72     sparse_status_t stat;
73     stat = mkl_sparse_destroy(aijmkl->csrA);
74     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set hints/complete mkl_sparse_optimize");
75   }
76 #endif
77   ierr = PetscFree(B->spptr);CHKERRQ(ierr);
78 
79   /* Change the type of B to MATSEQAIJ. */
80   ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr);
81 
82   *newmat = B;
83   PetscFunctionReturn(0);
84 }
85 
86 PetscErrorCode MatDestroy_SeqAIJMKL(Mat A)
87 {
88   PetscErrorCode ierr;
89   Mat_SeqAIJMKL  *aijmkl = (Mat_SeqAIJMKL*) A->spptr;
90 
91   PetscFunctionBegin;
92 
93   /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an
94    * spptr pointer. */
95   if (aijmkl) {
96     /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */
97 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
98     if (aijmkl->sparse_optimized) {
99       sparse_status_t stat = SPARSE_STATUS_SUCCESS;
100       stat = mkl_sparse_destroy(aijmkl->csrA);
101       if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy");
102     }
103 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
104     ierr = PetscFree(A->spptr);CHKERRQ(ierr);
105   }
106 
107   /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
108    * to destroy everything that remains. */
109   ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr);
110   /* Note that I don't call MatSetType().  I believe this is because that
111    * is only to be called when *building* a matrix.  I could be wrong, but
112    * that is how things work for the SuperLU matrix class. */
113   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
114   PetscFunctionReturn(0);
115 }
116 
117 /* MatSeqAIJMKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it,
118  * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize().
119  * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix
120  * handle, creates a new one, and then calls mkl_sparse_optimize().
121  * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been
122  * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of
123  * an unoptimized matrix handle here. */
124 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A)
125 {
126 #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
127   /* If the MKL library does not have mkl_sparse_optimize(), then this routine
128    * does nothing. We make it callable anyway in this case because it cuts
129    * down on littering the code with #ifdefs. */
130   PetscFunctionBegin;
131   PetscFunctionReturn(0);
132 #else
133   Mat_SeqAIJ       *a = (Mat_SeqAIJ*)A->data;
134   Mat_SeqAIJMKL    *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
135   PetscInt         m,n;
136   MatScalar        *aa;
137   PetscInt         *aj,*ai;
138   sparse_status_t  stat;
139   PetscErrorCode   ierr;
140 
141   PetscFunctionBegin;
142 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
143   /* For MKL versions that still support the old, non-inspector-executor interfaces versions, we simply exit here if the no_SpMV2
144    * option has been specified. For versions that have deprecated the old interfaces (version 18, update 2 and later), we must
145    * use the new inspector-executor interfaces, but we can still use the old, non-inspector-executor code by not calling
146    * mkl_sparse_optimize() later. */
147   if (aijmkl->no_SpMV2) PetscFunctionReturn(0);
148 #endif
149 
150   if (aijmkl->sparse_optimized) {
151     /* Matrix has been previously assembled and optimized. Must destroy old
152      * matrix handle before running the optimization step again. */
153     stat = mkl_sparse_destroy(aijmkl->csrA);
154     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy");
155   }
156   aijmkl->sparse_optimized = PETSC_FALSE;
157 
158   /* Now perform the SpMV2 setup and matrix optimization. */
159   aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
160   aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
161   aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
162   m = A->rmap->n;
163   n = A->cmap->n;
164   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
165   aa   = a->a;  /* Nonzero elements stored row-by-row. */
166   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
167   if ((a->nz!=0) && aa && !(A->structure_only)) {
168     /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
169      * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
170     stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa);
171     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle");
172     stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
173     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set mv_hint");
174     stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
175     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set memory_hint");
176     if (!aijmkl->no_SpMV2) {
177       stat = mkl_sparse_optimize(aijmkl->csrA);
178       if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_optimize");
179     }
180     aijmkl->sparse_optimized = PETSC_TRUE;
181     ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr);
182   } else {
183     aijmkl->csrA = PETSC_NULL;
184   }
185 
186   PetscFunctionReturn(0);
187 #endif
188 }
189 
190 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
191 /* Take an already created but empty matrix and set up the nonzero structure from an MKL sparse matrix handle. */
192 static PetscErrorCode MatSeqAIJMKL_setup_structure_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,PetscInt nrows,PetscInt ncols,Mat A)
193 {
194   PetscErrorCode      ierr;
195   sparse_status_t     stat;
196   sparse_index_base_t indexing;
197   PetscInt            m,n;
198   PetscInt            *aj,*ai,*dummy;
199   MatScalar           *aa;
200   Mat_SeqAIJMKL       *aijmkl;
201 
202   if (csrA) {
203     /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
204     stat = mkl_sparse_x_export_csr(csrA,&indexing,&m,&n,&ai,&dummy,&aj,&aa);
205     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");
206     if ((m != nrows) || (n != ncols)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Number of rows/columns does not match those from mkl_sparse_x_export_csr()");
207   } else {
208     aj = ai = PETSC_NULL;
209     aa = PETSC_NULL;
210   }
211 
212   ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
213   ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,nrows,ncols);CHKERRQ(ierr);
214   /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported
215    * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and
216    * they will be destroyed when the MKL handle is destroyed.
217    * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */
218   if (csrA) {
219     ierr = MatSeqAIJSetPreallocationCSR(A,ai,aj,NULL);CHKERRQ(ierr);
220   } else {
221     /* Since MatSeqAIJSetPreallocationCSR does initial set up and assembly begin/end, we must do that ourselves here. */
222     ierr = MatSetUp(A);CHKERRQ(ierr);
223     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
224     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
225   }
226 
227   /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle.
228    * Now turn it into a MATSEQAIJMKL. */
229   ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
230 
231   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
232   aijmkl->csrA = csrA;
233 
234   /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but
235    * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one,
236    * and just need to be able to run the MKL optimization step. */
237   aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
238   aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
239   aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
240   if (csrA) {
241     stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
242     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set mv_hint");
243     stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
244     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set memory_hint");
245   }
246   ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr);
247 
248   PetscFunctionReturn(0);
249 }
250 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
251 
252 
253 /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle.
254  * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in
255  * MatMatMultNumeric(). */
256 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
257 static PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A)
258 {
259   PetscInt            i;
260   PetscInt            nrows,ncols;
261   PetscInt            nz;
262   PetscInt            *ai,*aj,*dummy;
263   PetscScalar         *aa;
264   PetscErrorCode      ierr;
265   Mat_SeqAIJMKL       *aijmkl;
266   sparse_status_t     stat;
267   sparse_index_base_t indexing;
268 
269   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
270 
271   /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */
272   if (!aijmkl->csrA) PetscFunctionReturn(0);
273 
274   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
275   stat = mkl_sparse_x_export_csr(aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);
276   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");
277 
278   /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc
279    * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */
280   for (i=0; i<nrows; i++) {
281     nz = ai[i+1] - ai[i];
282     ierr = MatSetValues_SeqAIJ(A, 1, &i, nz, aj+ai[i], aa+ai[i], INSERT_VALUES);CHKERRQ(ierr);
283   }
284 
285   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
286   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
287 
288   ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr);
289   /* We mark our matrix as having a valid, optimized MKL handle.
290    * TODO: It is valid, but I am not sure if it is optimized. Need to ask MKL developers. */
291   aijmkl->sparse_optimized = PETSC_TRUE;
292 
293   PetscFunctionReturn(0);
294 }
295 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
296 
297 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
298 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_view_mkl_handle(Mat A,PetscViewer viewer)
299 {
300   PetscInt            i,j,k;
301   PetscInt            nrows,ncols;
302   PetscInt            nz;
303   PetscInt            *ai,*aj,*dummy;
304   PetscScalar         *aa;
305   PetscErrorCode      ierr;
306   Mat_SeqAIJMKL       *aijmkl;
307   sparse_status_t     stat;
308   sparse_index_base_t indexing;
309 
310   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
311 
312   ierr = PetscViewerASCIIPrintf(viewer,"Contents of MKL sparse matrix handle for MATSEQAIJMKL object:\n");CHKERRQ(ierr);
313 
314   /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */
315   if (!aijmkl->csrA) {
316     ierr = PetscViewerASCIIPrintf(viewer,"MKL matrix handle is NULL\n");CHKERRQ(ierr);
317     PetscFunctionReturn(0);
318   }
319 
320   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
321   stat = mkl_sparse_x_export_csr(aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);
322   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");
323 
324   k = 0;
325   for (i=0; i<nrows; i++) {
326     ierr = PetscViewerASCIIPrintf(viewer,"row %D: ",i);CHKERRQ(ierr);
327     nz = ai[i+1] - ai[i];
328     for (j=0; j<nz; j++) {
329       if (aa) {
330         ierr = PetscViewerASCIIPrintf(viewer,"(%D, %g)  ",aj[k],aa[k]);CHKERRQ(ierr);
331       } else {
332         ierr = PetscViewerASCIIPrintf(viewer,"(%D, NULL)",aj[k]);CHKERRQ(ierr);
333       }
334       k++;
335     }
336     ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
337   }
338   PetscFunctionReturn(0);
339 }
340 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
341 
342 PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
343 {
344   PetscErrorCode ierr;
345   Mat_SeqAIJMKL  *aijmkl;
346   Mat_SeqAIJMKL  *aijmkl_dest;
347 
348   PetscFunctionBegin;
349   ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr);
350   aijmkl      = (Mat_SeqAIJMKL*) A->spptr;
351   aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr;
352   ierr = PetscArraycpy(aijmkl_dest,aijmkl,1);CHKERRQ(ierr);
353   aijmkl_dest->sparse_optimized = PETSC_FALSE;
354   if (aijmkl->eager_inspection) {
355     ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
356   }
357   PetscFunctionReturn(0);
358 }
359 
360 PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode)
361 {
362   PetscErrorCode  ierr;
363   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
364   Mat_SeqAIJMKL   *aijmkl;
365 
366   PetscFunctionBegin;
367   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
368 
369   /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some
370    * extra information and some different methods, call the AssemblyEnd
371    * routine for a MATSEQAIJ.
372    * I'm not sure if this is the best way to do this, but it avoids
373    * a lot of code duplication. */
374   a->inode.use = PETSC_FALSE;  /* Must disable: otherwise the MKL routines won't get used. */
375   ierr = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr);
376 
377   /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks).
378    * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */
379   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
380   if (aijmkl->eager_inspection) {
381     ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
382   }
383 
384   PetscFunctionReturn(0);
385 }
386 
387 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
388 PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy)
389 {
390   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
391   const PetscScalar *x;
392   PetscScalar       *y;
393   const MatScalar   *aa;
394   PetscErrorCode    ierr;
395   PetscInt          m = A->rmap->n;
396   PetscInt          n = A->cmap->n;
397   PetscScalar       alpha = 1.0;
398   PetscScalar       beta = 0.0;
399   const PetscInt    *aj,*ai;
400   char              matdescra[6];
401 
402 
403   /* Variables not in MatMult_SeqAIJ. */
404   char transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
405 
406   PetscFunctionBegin;
407   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
408   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
409   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
410   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
411   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
412   aa   = a->a;  /* Nonzero elements stored row-by-row. */
413   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
414 
415   /* Call MKL sparse BLAS routine to do the MatMult. */
416   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
417 
418   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
419   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
420   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
421   PetscFunctionReturn(0);
422 }
423 #endif
424 
425 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
426 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
427 {
428   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
429   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
430   const PetscScalar *x;
431   PetscScalar       *y;
432   PetscErrorCode    ierr;
433   sparse_status_t   stat = SPARSE_STATUS_SUCCESS;
434   PetscObjectState  state;
435 
436   PetscFunctionBegin;
437 
438   /* If there are no nonzero entries, zero yy and return immediately. */
439   if(!a->nz) {
440     PetscInt i;
441     PetscInt m=A->rmap->n;
442     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
443     for (i=0; i<m; i++) {
444       y[i] = 0.0;
445     }
446     ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
447     PetscFunctionReturn(0);
448   }
449 
450   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
451   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
452 
453   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
454    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
455    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
456   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
457   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
458     MatSeqAIJMKL_create_mkl_handle(A);
459   }
460 
461   /* Call MKL SpMV2 executor routine to do the MatMult. */
462   stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
463   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
464 
465   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
466   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
467   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
468   PetscFunctionReturn(0);
469 }
470 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
471 
472 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
473 PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy)
474 {
475   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
476   const PetscScalar *x;
477   PetscScalar       *y;
478   const MatScalar   *aa;
479   PetscErrorCode    ierr;
480   PetscInt          m = A->rmap->n;
481   PetscInt          n = A->cmap->n;
482   PetscScalar       alpha = 1.0;
483   PetscScalar       beta = 0.0;
484   const PetscInt    *aj,*ai;
485   char              matdescra[6];
486 
487   /* Variables not in MatMultTranspose_SeqAIJ. */
488   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
489 
490   PetscFunctionBegin;
491   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
492   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
493   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
494   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
495   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
496   aa   = a->a;  /* Nonzero elements stored row-by-row. */
497   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
498 
499   /* Call MKL sparse BLAS routine to do the MatMult. */
500   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
501 
502   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
503   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
504   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
505   PetscFunctionReturn(0);
506 }
507 #endif
508 
509 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
510 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
511 {
512   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
513   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
514   const PetscScalar *x;
515   PetscScalar       *y;
516   PetscErrorCode    ierr;
517   sparse_status_t   stat;
518   PetscObjectState  state;
519 
520   PetscFunctionBegin;
521 
522   /* If there are no nonzero entries, zero yy and return immediately. */
523   if(!a->nz) {
524     PetscInt i;
525     PetscInt n=A->cmap->n;
526     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
527     for (i=0; i<n; i++) {
528       y[i] = 0.0;
529     }
530     ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
531     PetscFunctionReturn(0);
532   }
533 
534   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
535   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
536 
537   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
538    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
539    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
540   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
541   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
542     MatSeqAIJMKL_create_mkl_handle(A);
543   }
544 
545   /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
546   stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
547   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
548 
549   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
550   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
551   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
552   PetscFunctionReturn(0);
553 }
554 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
555 
556 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
557 PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
558 {
559   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
560   const PetscScalar *x;
561   PetscScalar       *y,*z;
562   const MatScalar   *aa;
563   PetscErrorCode    ierr;
564   PetscInt          m = A->rmap->n;
565   PetscInt          n = A->cmap->n;
566   const PetscInt    *aj,*ai;
567   PetscInt          i;
568 
569   /* Variables not in MatMultAdd_SeqAIJ. */
570   char              transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
571   PetscScalar       alpha = 1.0;
572   PetscScalar       beta;
573   char              matdescra[6];
574 
575   PetscFunctionBegin;
576   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
577   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
578 
579   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
580   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
581   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
582   aa   = a->a;  /* Nonzero elements stored row-by-row. */
583   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
584 
585   /* Call MKL sparse BLAS routine to do the MatMult. */
586   if (zz == yy) {
587     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
588     beta = 1.0;
589     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
590   } else {
591     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
592      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
593     beta = 0.0;
594     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
595     for (i=0; i<m; i++) {
596       z[i] += y[i];
597     }
598   }
599 
600   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
601   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
602   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
603   PetscFunctionReturn(0);
604 }
605 #endif
606 
607 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
608 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
609 {
610   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
611   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
612   const PetscScalar *x;
613   PetscScalar       *y,*z;
614   PetscErrorCode    ierr;
615   PetscInt          m = A->rmap->n;
616   PetscInt          i;
617 
618   /* Variables not in MatMultAdd_SeqAIJ. */
619   sparse_status_t   stat = SPARSE_STATUS_SUCCESS;
620   PetscObjectState  state;
621 
622   PetscFunctionBegin;
623 
624   /* If there are no nonzero entries, set zz = yy and return immediately. */
625   if(!a->nz) {
626     PetscInt i;
627     ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
628     for (i=0; i<m; i++) {
629       z[i] = y[i];
630     }
631     ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
632     PetscFunctionReturn(0);
633   }
634 
635   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
636   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
637 
638   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
639    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
640    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
641   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
642   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
643     MatSeqAIJMKL_create_mkl_handle(A);
644   }
645 
646   /* Call MKL sparse BLAS routine to do the MatMult. */
647   if (zz == yy) {
648     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
649      * with alpha and beta both set to 1.0. */
650     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
651     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
652   } else {
653     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
654      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
655     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
656     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
657     for (i=0; i<m; i++) {
658       z[i] += y[i];
659     }
660   }
661 
662   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
663   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
664   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
665   PetscFunctionReturn(0);
666 }
667 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
668 
669 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
670 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
671 {
672   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
673   const PetscScalar *x;
674   PetscScalar       *y,*z;
675   const MatScalar   *aa;
676   PetscErrorCode    ierr;
677   PetscInt          m = A->rmap->n;
678   PetscInt          n = A->cmap->n;
679   const PetscInt    *aj,*ai;
680   PetscInt          i;
681 
682   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
683   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
684   PetscScalar       alpha = 1.0;
685   PetscScalar       beta;
686   char              matdescra[6];
687 
688   PetscFunctionBegin;
689   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
690   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
691 
692   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
693   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
694   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
695   aa   = a->a;  /* Nonzero elements stored row-by-row. */
696   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
697 
698   /* Call MKL sparse BLAS routine to do the MatMult. */
699   if (zz == yy) {
700     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
701     beta = 1.0;
702     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
703   } else {
704     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
705      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
706     beta = 0.0;
707     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
708     for (i=0; i<n; i++) {
709       z[i] += y[i];
710     }
711   }
712 
713   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
714   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
715   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
716   PetscFunctionReturn(0);
717 }
718 #endif
719 
720 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
721 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
722 {
723   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
724   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
725   const PetscScalar *x;
726   PetscScalar       *y,*z;
727   PetscErrorCode    ierr;
728   PetscInt          n = A->cmap->n;
729   PetscInt          i;
730   PetscObjectState  state;
731 
732   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
733   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
734 
735   PetscFunctionBegin;
736 
737   /* If there are no nonzero entries, set zz = yy and return immediately. */
738   if(!a->nz) {
739     PetscInt i;
740     ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
741     for (i=0; i<n; i++) {
742       z[i] = y[i];
743     }
744     ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
745     PetscFunctionReturn(0);
746   }
747 
748   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
749   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
750 
751   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
752    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
753    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
754   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
755   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
756     MatSeqAIJMKL_create_mkl_handle(A);
757   }
758 
759   /* Call MKL sparse BLAS routine to do the MatMult. */
760   if (zz == yy) {
761     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
762      * with alpha and beta both set to 1.0. */
763     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
764     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
765   } else {
766     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
767      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
768     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
769     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
770     for (i=0; i<n; i++) {
771       z[i] += y[i];
772     }
773   }
774 
775   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
776   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
777   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
778   PetscFunctionReturn(0);
779 }
780 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
781 
782 /* -------------------------- MatProduct code -------------------------- */
783 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
784 static PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(Mat A,const sparse_operation_t transA,Mat B,const sparse_operation_t transB,Mat C)
785 {
786   Mat_SeqAIJMKL       *a, *b;
787   sparse_matrix_t     csrA, csrB, csrC;
788   PetscInt            nrows,ncols;
789   PetscErrorCode      ierr;
790   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
791   struct matrix_descr descr_type_gen;
792   PetscObjectState    state;
793 
794   PetscFunctionBegin;
795   /* Determine the number of rows and columns that the result matrix C will have. We have to do this ourselves because MKL does
796    * not handle sparse matrices with zero rows or columns. */
797   if (transA == SPARSE_OPERATION_NON_TRANSPOSE) nrows = A->rmap->N;
798   else nrows = A->cmap->N;
799   if (transB == SPARSE_OPERATION_NON_TRANSPOSE) ncols = B->cmap->N;
800   else ncols = B->rmap->N;
801 
802   a = (Mat_SeqAIJMKL*)A->spptr;
803   b = (Mat_SeqAIJMKL*)B->spptr;
804   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
805   if (!a->sparse_optimized || a->state != state) {
806     MatSeqAIJMKL_create_mkl_handle(A);
807   }
808   ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr);
809   if (!b->sparse_optimized || b->state != state) {
810     MatSeqAIJMKL_create_mkl_handle(B);
811   }
812   csrA = a->csrA;
813   csrB = b->csrA;
814   descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
815 
816   if (csrA && csrB) {
817     stat = mkl_sparse_sp2m(transA,descr_type_gen,csrA,transB,descr_type_gen,csrB,SPARSE_STAGE_FULL_MULT_NO_VAL,&csrC);
818     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete symbolic stage of sparse matrix-matrix multiply");
819   } else {
820     csrC = PETSC_NULL;
821   }
822 
823   ierr = MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF,csrC,nrows,ncols,C);CHKERRQ(ierr);
824 
825   PetscFunctionReturn(0);
826 }
827 
828 PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(Mat A,const sparse_operation_t transA,Mat B,const sparse_operation_t transB,Mat C)
829 {
830   Mat_SeqAIJMKL       *a, *b, *c;
831   sparse_matrix_t     csrA, csrB, csrC;
832   PetscErrorCode      ierr;
833   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
834   struct matrix_descr descr_type_gen;
835   PetscObjectState    state;
836 
837   PetscFunctionBegin;
838   a = (Mat_SeqAIJMKL*)A->spptr;
839   b = (Mat_SeqAIJMKL*)B->spptr;
840   c = (Mat_SeqAIJMKL*)C->spptr;
841   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
842   if (!a->sparse_optimized || a->state != state) {
843     MatSeqAIJMKL_create_mkl_handle(A);
844   }
845   ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr);
846   if (!b->sparse_optimized || b->state != state) {
847     MatSeqAIJMKL_create_mkl_handle(B);
848   }
849   csrA = a->csrA;
850   csrB = b->csrA;
851   csrC = c->csrA;
852   descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
853 
854   if (csrA && csrB) {
855     stat = mkl_sparse_sp2m(transA,descr_type_gen,csrA,transB,descr_type_gen,csrB,SPARSE_STAGE_FINALIZE_MULT,&csrC);
856     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete numerical stage of sparse matrix-matrix multiply");
857   } else {
858     csrC = PETSC_NULL;
859   }
860 
861   /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */
862   ierr = MatSeqAIJMKL_update_from_mkl_handle(C);CHKERRQ(ierr);
863 
864   PetscFunctionReturn(0);
865 }
866 
867 PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C)
868 {
869   PetscErrorCode ierr;
870 
871   PetscFunctionBegin;
872   ierr = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);CHKERRQ(ierr);
873   PetscFunctionReturn(0);
874 }
875 
876 PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C)
877 {
878   PetscErrorCode ierr;
879 
880   PetscFunctionBegin;
881   ierr = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);CHKERRQ(ierr);
882   PetscFunctionReturn(0);
883 }
884 
885 PetscErrorCode MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C)
886 {
887   PetscErrorCode ierr;
888 
889   PetscFunctionBegin;
890   ierr = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);CHKERRQ(ierr);
891   PetscFunctionReturn(0);
892 }
893 
894 PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C)
895 {
896   PetscErrorCode ierr;
897 
898   PetscFunctionBegin;
899   ierr = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);CHKERRQ(ierr);
900   PetscFunctionReturn(0);
901 }
902 
903 PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C)
904 {
905   PetscErrorCode ierr;
906 
907   PetscFunctionBegin;
908   ierr = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_TRANSPOSE,C);CHKERRQ(ierr);
909   PetscFunctionReturn(0);
910 }
911 
912 PetscErrorCode MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C)
913 {
914   PetscErrorCode ierr;
915 
916   PetscFunctionBegin;
917   ierr = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_TRANSPOSE,C);CHKERRQ(ierr);
918   PetscFunctionReturn(0);
919 }
920 
921 static PetscErrorCode MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL(Mat C)
922 {
923   PetscErrorCode ierr;
924   Mat_Product    *product = C->product;
925   Mat            A = product->A,B = product->B;
926 
927   PetscFunctionBegin;
928   ierr = MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(A,B,C);CHKERRQ(ierr);
929   PetscFunctionReturn(0);
930 }
931 
932 static PetscErrorCode MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL(Mat C)
933 {
934   PetscErrorCode ierr;
935   Mat_Product    *product = C->product;
936   Mat            A = product->A,B = product->B;
937   PetscReal      fill = product->fill;
938 
939   PetscFunctionBegin;
940   ierr = MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(A,B,fill,C);CHKERRQ(ierr);
941 
942   C->ops->productnumeric = MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL;
943   PetscFunctionReturn(0);
944 }
945 
946 PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat P,Mat C)
947 {
948   Mat                 Ct;
949   Vec                 zeros;
950   Mat_SeqAIJMKL       *a, *p, *c;
951   sparse_matrix_t     csrA, csrP, csrC;
952   PetscBool           set, flag;
953   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
954   struct matrix_descr descr_type_sym;
955   PetscObjectState    state;
956   PetscErrorCode      ierr;
957 
958   PetscFunctionBegin;
959   ierr = MatIsSymmetricKnown(A,&set,&flag);
960   if (!set || (set && !flag)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL() called on matrix A not marked as symmetric");
961 
962   a = (Mat_SeqAIJMKL*)A->spptr;
963   p = (Mat_SeqAIJMKL*)P->spptr;
964   c = (Mat_SeqAIJMKL*)C->spptr;
965   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
966   if (!a->sparse_optimized || a->state != state) {
967     MatSeqAIJMKL_create_mkl_handle(A);
968   }
969   ierr = PetscObjectStateGet((PetscObject)P,&state);CHKERRQ(ierr);
970   if (!p->sparse_optimized || p->state != state) {
971     MatSeqAIJMKL_create_mkl_handle(P);
972   }
973   csrA = a->csrA;
974   csrP = p->csrA;
975   csrC = c->csrA;
976   descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
977   descr_type_sym.mode = SPARSE_FILL_MODE_UPPER;
978   descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
979 
980   /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
981   stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FINALIZE_MULT);
982   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to finalize mkl_sparse_sypr");
983 
984   /* Update the PETSc AIJ representation for matrix C from contents of MKL handle.
985    * This is more complicated than it should be: it turns out that, though mkl_sparse_sypr() will accept a full AIJ/CSR matrix,
986    * the output matrix only contains the upper or lower triangle (we arbitrarily have chosen upper) of the symmetric matrix.
987    * We have to fill in the missing portion, which we currently do below by forming the tranpose and performing at MatAXPY
988    * operation. This may kill any performance benefit of using the optimized mkl_sparse_sypr() routine. Performance might
989    * improve if we come up with a more efficient way to do this, or we can convince the MKL team to provide an option to output
990    * the full matrix. */
991   ierr = MatSeqAIJMKL_update_from_mkl_handle(C);CHKERRQ(ierr);
992   ierr = MatTranspose(C,MAT_INITIAL_MATRIX,&Ct);CHKERRQ(ierr);
993   ierr = MatCreateVecs(C,&zeros,NULL);CHKERRQ(ierr);
994   ierr = VecSetFromOptions(zeros);CHKERRQ(ierr);
995   ierr = VecZeroEntries(zeros);CHKERRQ(ierr);
996   ierr = MatDiagonalSet(Ct,zeros,INSERT_VALUES);CHKERRQ(ierr);
997   ierr = MatAXPY(C,1.0,Ct,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
998   /* Note: The MatAXPY() call destroys the MatProduct, so we must recreate it. */
999   ierr = MatProductCreateWithMat(A,P,PETSC_NULL,C);CHKERRQ(ierr);
1000   C->product->type = MATPRODUCT_PtAP;
1001   ierr = MatSeqAIJMKL_create_mkl_handle(C);CHKERRQ(ierr);
1002   ierr = VecDestroy(&zeros);CHKERRQ(ierr);
1003   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
1004 
1005   PetscFunctionReturn(0);
1006 }
1007 
1008 PetscErrorCode MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat C)
1009 {
1010   Mat_Product         *product = C->product;
1011   Mat                 A = product->A,P = product->B;
1012   Mat_SeqAIJMKL       *a,*p;
1013   sparse_matrix_t     csrA,csrP,csrC;
1014   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
1015   struct matrix_descr descr_type_sym;
1016   PetscObjectState    state;
1017   PetscErrorCode      ierr;
1018 
1019   PetscFunctionBegin;
1020   a = (Mat_SeqAIJMKL*)A->spptr;
1021   p = (Mat_SeqAIJMKL*)P->spptr;
1022   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
1023   if (!a->sparse_optimized || a->state != state) {
1024     MatSeqAIJMKL_create_mkl_handle(A);
1025   }
1026   ierr = PetscObjectStateGet((PetscObject)P,&state);CHKERRQ(ierr);
1027   if (!p->sparse_optimized || p->state != state) {
1028     MatSeqAIJMKL_create_mkl_handle(P);
1029   }
1030   csrA = a->csrA;
1031   csrP = p->csrA;
1032   descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
1033   descr_type_sym.mode = SPARSE_FILL_MODE_UPPER;
1034   descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
1035 
1036   /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
1037   if (csrP && csrA) {
1038     stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FULL_MULT_NO_VAL);
1039     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to perform symbolic mkl_sparse_sypr");
1040   } else {
1041     csrC = PETSC_NULL;
1042   }
1043 
1044   /* Update the I and J arrays of the PETSc AIJ representation for matrix C from contents of MKL handle.
1045    * Note that, because mkl_sparse_sypr() only computes one triangle of the symmetric matrix, this representation will only contain
1046    * the upper triangle of the symmetric matrix. We fix this in MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL(). I believe that leaving things
1047    * in this incomplete state is OK because the numeric product should follow soon after, but am not certain if this is
1048    * guaranteed. */
1049   ierr = MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF,csrC,P->cmap->N,P->cmap->N,C);CHKERRQ(ierr);
1050 
1051   C->ops->productnumeric = MatProductNumeric_PtAP;
1052   PetscFunctionReturn(0);
1053 }
1054 
1055 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AB(Mat C)
1056 {
1057   PetscFunctionBegin;
1058   C->ops->productsymbolic = MatProductSymbolic_AB;
1059   C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL;
1060   PetscFunctionReturn(0);
1061 }
1062 
1063 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AtB(Mat C)
1064 {
1065   PetscFunctionBegin;
1066   C->ops->productsymbolic = MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL;
1067   PetscFunctionReturn(0);
1068 }
1069 
1070 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABt(Mat C)
1071 {
1072   PetscFunctionBegin;
1073   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ;
1074   C->ops->productsymbolic          = MatProductSymbolic_ABt;
1075   PetscFunctionReturn(0);
1076 }
1077 
1078 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_PtAP(Mat C)
1079 {
1080   PetscErrorCode ierr;
1081   Mat_Product    *product = C->product;
1082   Mat            A = product->A;
1083   PetscBool      set, flag;
1084 
1085   PetscFunctionBegin;
1086 #if defined(PETSC_USE_COMPLEX)
1087   /* By setting C->ops->productsymbolic to NULL, we ensure that MatProductSymbolic_Basic() will be used.
1088    * We do this in several other locations in this file. This works for the time being, but the _Basic()
1089    * routines are considered unsafe and may be removed from the MatProduct code in the future.
1090    * TODO: Add proper MATSEQAIJMKL implementations, instead of relying on the _Basic() routines. */
1091   C->ops->productsymbolic = NULL;
1092 #else
1093   /* AIJMKL only has an optimized routine for PtAP when A is symmetric and real. */
1094   ierr = MatIsSymmetricKnown(A,&set,&flag);CHKERRQ(ierr);
1095   if (set && flag) {
1096     C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal;
1097     PetscFunctionReturn(0);
1098   } else {
1099     C->ops->productsymbolic = NULL; /* MatProductSymbolic_Basic() will be used. */
1100   }
1101   /* Note that we don't set C->ops->productnumeric here, as this has must happen in MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL(),
1102    * depending on whether the algorithm for the general case vs. the real symmetric one is used. */
1103 #endif
1104   PetscFunctionReturn(0);
1105 }
1106 
1107 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_RARt(Mat C)
1108 {
1109   PetscFunctionBegin;
1110   C->ops->productsymbolic = NULL; /* MatProductSymbolic_Basic() will be used. */
1111   PetscFunctionReturn(0);
1112 }
1113 
1114 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABC(Mat C)
1115 {
1116   PetscFunctionBegin;
1117   C->ops->productsymbolic = NULL; /* MatProductSymbolic_Basic() will be used. */
1118   PetscFunctionReturn(0);
1119 }
1120 
1121 PetscErrorCode MatProductSetFromOptions_SeqAIJMKL(Mat C)
1122 {
1123   PetscErrorCode ierr;
1124   Mat_Product    *product = C->product;
1125 
1126   PetscFunctionBegin;
1127   switch (product->type) {
1128   case MATPRODUCT_AB:
1129     ierr = MatProductSetFromOptions_SeqAIJMKL_AB(C);CHKERRQ(ierr);
1130     break;
1131   case MATPRODUCT_AtB:
1132     ierr = MatProductSetFromOptions_SeqAIJMKL_AtB(C);CHKERRQ(ierr);
1133     break;
1134   case MATPRODUCT_ABt:
1135     ierr = MatProductSetFromOptions_SeqAIJMKL_ABt(C);CHKERRQ(ierr);
1136     break;
1137   case MATPRODUCT_PtAP:
1138     ierr = MatProductSetFromOptions_SeqAIJMKL_PtAP(C);CHKERRQ(ierr);
1139     break;
1140   case MATPRODUCT_RARt:
1141     ierr = MatProductSetFromOptions_SeqAIJMKL_RARt(C);CHKERRQ(ierr);
1142     break;
1143   case MATPRODUCT_ABC:
1144     ierr = MatProductSetFromOptions_SeqAIJMKL_ABC(C);CHKERRQ(ierr);
1145     break;
1146   default:
1147     break;
1148   }
1149   PetscFunctionReturn(0);
1150 }
1151 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
1152 /* ------------------------ End MatProduct code ------------------------ */
1153 
1154 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a
1155  * SeqAIJMKL matrix.  This routine is called by the MatCreate_SeqAIJMKL()
1156  * routine, but can also be used to convert an assembled SeqAIJ matrix
1157  * into a SeqAIJMKL one. */
1158 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
1159 {
1160   PetscErrorCode ierr;
1161   Mat            B = *newmat;
1162   Mat_SeqAIJMKL  *aijmkl;
1163   PetscBool      set;
1164   PetscBool      sametype;
1165 
1166   PetscFunctionBegin;
1167   if (reuse == MAT_INITIAL_MATRIX) {
1168     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
1169   }
1170 
1171   ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr);
1172   if (sametype) PetscFunctionReturn(0);
1173 
1174   ierr     = PetscNewLog(B,&aijmkl);CHKERRQ(ierr);
1175   B->spptr = (void*) aijmkl;
1176 
1177   /* Set function pointers for methods that we inherit from AIJ but override.
1178    * We also parse some command line options below, since those determine some of the methods we point to. */
1179   B->ops->duplicate        = MatDuplicate_SeqAIJMKL;
1180   B->ops->assemblyend      = MatAssemblyEnd_SeqAIJMKL;
1181   B->ops->destroy          = MatDestroy_SeqAIJMKL;
1182 
1183   aijmkl->sparse_optimized = PETSC_FALSE;
1184 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1185   aijmkl->no_SpMV2 = PETSC_FALSE;  /* Default to using the SpMV2 routines if our MKL supports them. */
1186 #else
1187   aijmkl->no_SpMV2 = PETSC_TRUE;
1188 #endif
1189   aijmkl->eager_inspection = PETSC_FALSE;
1190 
1191   /* Parse command line options. */
1192   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");CHKERRQ(ierr);
1193   ierr = PetscOptionsBool("-mat_aijmkl_no_spmv2","Disable use of inspector-executor (SpMV 2) routines","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);CHKERRQ(ierr);
1194   ierr = PetscOptionsBool("-mat_aijmkl_eager_inspection","Run inspection at matrix assembly time, instead of waiting until needed by an operation","None",(PetscBool)aijmkl->eager_inspection,(PetscBool*)&aijmkl->eager_inspection,&set);CHKERRQ(ierr);
1195   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1196 #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1197   if(!aijmkl->no_SpMV2) {
1198     ierr = PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize();  defaulting to non-SpMV2 routines.\n");
1199     aijmkl->no_SpMV2 = PETSC_TRUE;
1200   }
1201 #endif
1202 
1203 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1204   B->ops->mult                    = MatMult_SeqAIJMKL_SpMV2;
1205   B->ops->multtranspose           = MatMultTranspose_SeqAIJMKL_SpMV2;
1206   B->ops->multadd                 = MatMultAdd_SeqAIJMKL_SpMV2;
1207   B->ops->multtransposeadd        = MatMultTransposeAdd_SeqAIJMKL_SpMV2;
1208 # if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
1209   B->ops->productsetfromoptions   = MatProductSetFromOptions_SeqAIJMKL;
1210   B->ops->matmultsymbolic         = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL;
1211   B->ops->matmultnumeric          = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL;
1212   B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL;
1213   B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL;
1214 #   if !defined(PETSC_USE_COMPLEX)
1215   B->ops->ptapnumeric             = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL;
1216 #   else
1217   B->ops->ptapnumeric             = NULL;
1218 #   endif
1219 # endif
1220 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
1221 
1222 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
1223   /* In MKL version 18, update 2, the old sparse BLAS interfaces were marked as deprecated. If "no_SpMV2" has been specified by the
1224    * user and the old SpBLAS interfaces are deprecated in our MKL version, we use the new _SpMV2 routines (set above), but do not
1225    * call mkl_sparse_optimize(), which results in the old numerical kernels (without the inspector-executor model) being used. For
1226    * versions in which the older interface has not been deprecated, we use the old interface. */
1227   if (aijmkl->no_SpMV2) {
1228     B->ops->mult             = MatMult_SeqAIJMKL;
1229     B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL;
1230     B->ops->multadd          = MatMultAdd_SeqAIJMKL;
1231     B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL;
1232   }
1233 #endif
1234 
1235   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr);
1236 
1237   if(!aijmkl->no_SpMV2) {
1238 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1239 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
1240     ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_seqaijmkl_seqaijmkl_C",MatProductSetFromOptions_SeqAIJMKL);CHKERRQ(ierr);
1241 #endif
1242 #endif
1243   }
1244 
1245   ierr    = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr);
1246   *newmat = B;
1247   PetscFunctionReturn(0);
1248 }
1249 
1250 /*@C
1251    MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL.
1252    This type inherits from AIJ and is largely identical, but uses sparse BLAS
1253    routines from Intel MKL whenever possible.
1254    If the installed version of MKL supports the "SpMV2" sparse
1255    inspector-executor routines, then those are used by default.
1256    MatMult, MatMultAdd, MatMultTranspose, MatMultTransposeAdd, MatMatMult, MatTransposeMatMult, and MatPtAP (for
1257    symmetric A) operations are currently supported.
1258    Note that MKL version 18, update 2 or later is required for MatPtAP/MatPtAPNumeric and MatMatMultNumeric.
1259 
1260    Collective
1261 
1262    Input Parameters:
1263 +  comm - MPI communicator, set to PETSC_COMM_SELF
1264 .  m - number of rows
1265 .  n - number of columns
1266 .  nz - number of nonzeros per row (same for all rows)
1267 -  nnz - array containing the number of nonzeros in the various rows
1268          (possibly different for each row) or NULL
1269 
1270    Output Parameter:
1271 .  A - the matrix
1272 
1273    Options Database Keys:
1274 +  -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines
1275 -  -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, performing this step the first time the matrix is applied
1276 
1277    Notes:
1278    If nnz is given then nz is ignored
1279 
1280    Level: intermediate
1281 
1282 .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues()
1283 @*/
1284 PetscErrorCode  MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
1285 {
1286   PetscErrorCode ierr;
1287 
1288   PetscFunctionBegin;
1289   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1290   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
1291   ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr);
1292   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr);
1293   PetscFunctionReturn(0);
1294 }
1295 
1296 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A)
1297 {
1298   PetscErrorCode ierr;
1299 
1300   PetscFunctionBegin;
1301   ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
1302   ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
1303   PetscFunctionReturn(0);
1304 }
1305