xref: /petsc/src/mat/impls/aij/seq/aijmkl/aijmkl.c (revision efa12513287cff49a2b9648ae83199dcbfaad71a)
1 
2 /*
3   Defines basic operations for the MATSEQAIJMKL matrix class.
4   This class is derived from the MATSEQAIJ class and retains the
5   compressed row storage (aka Yale sparse matrix format) but uses
6   sparse BLAS operations from the Intel Math Kernel Library (MKL)
7   wherever possible.
8 */
9 
10 #include <../src/mat/impls/aij/seq/aij.h>
11 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h>
12 #include <mkl_spblas.h>
13 
14 typedef struct {
15   PetscBool           no_SpMV2;  /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */
16   PetscBool           eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */
17   PetscBool           sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
18   PetscObjectState    state;
19 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
20   sparse_matrix_t     csrA; /* "Handle" used by SpMV2 inspector-executor routines. */
21   struct matrix_descr descr;
22 #endif
23 } Mat_SeqAIJMKL;
24 
25 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType);
26 
27 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat)
28 {
29   /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */
30   /* so we will ignore 'MatType type'. */
31   PetscErrorCode ierr;
32   Mat            B       = *newmat;
33 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
34   Mat_SeqAIJMKL  *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
35 #endif
36 
37   PetscFunctionBegin;
38   if (reuse == MAT_INITIAL_MATRIX) {
39     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
40   }
41 
42   /* Reset the original function pointers. */
43   B->ops->duplicate               = MatDuplicate_SeqAIJ;
44   B->ops->assemblyend             = MatAssemblyEnd_SeqAIJ;
45   B->ops->destroy                 = MatDestroy_SeqAIJ;
46   B->ops->mult                    = MatMult_SeqAIJ;
47   B->ops->multtranspose           = MatMultTranspose_SeqAIJ;
48   B->ops->multadd                 = MatMultAdd_SeqAIJ;
49   B->ops->multtransposeadd        = MatMultTransposeAdd_SeqAIJ;
50   B->ops->productsetfromoptions   = MatProductSetFromOptions_SeqAIJ;
51   B->ops->matmultsymbolic         = MatMatMultSymbolic_SeqAIJ_SeqAIJ;
52   B->ops->matmultnumeric          = MatMatMultNumeric_SeqAIJ_SeqAIJ;
53   B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ;
54   B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ;
55   B->ops->ptapnumeric             = MatPtAPNumeric_SeqAIJ_SeqAIJ;
56 
57   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);CHKERRQ(ierr);
58 
59 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
60   /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this
61    * simply involves destroying the MKL sparse matrix handle and then freeing
62    * the spptr pointer. */
63   if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr;
64 
65   if (aijmkl->sparse_optimized) {
66     sparse_status_t stat;
67     stat = mkl_sparse_destroy(aijmkl->csrA);
68     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set hints/complete mkl_sparse_optimize()");
69   }
70 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
71   ierr = PetscFree(B->spptr);CHKERRQ(ierr);
72 
73   /* Change the type of B to MATSEQAIJ. */
74   ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr);
75 
76   *newmat = B;
77   PetscFunctionReturn(0);
78 }
79 
80 PetscErrorCode MatDestroy_SeqAIJMKL(Mat A)
81 {
82   PetscErrorCode ierr;
83   Mat_SeqAIJMKL  *aijmkl = (Mat_SeqAIJMKL*) A->spptr;
84 
85   PetscFunctionBegin;
86 
87   /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an spptr pointer. */
88   if (aijmkl) {
89     /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */
90 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
91     if (aijmkl->sparse_optimized) {
92       sparse_status_t stat = SPARSE_STATUS_SUCCESS;
93       stat = mkl_sparse_destroy(aijmkl->csrA);
94       if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_destroy()");
95     }
96 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
97     ierr = PetscFree(A->spptr);CHKERRQ(ierr);
98   }
99 
100   /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
101    * to destroy everything that remains. */
102   ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr);
103   /* Note that I don't call MatSetType().  I believe this is because that
104    * is only to be called when *building* a matrix.  I could be wrong, but
105    * that is how things work for the SuperLU matrix class. */
106   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
107   PetscFunctionReturn(0);
108 }
109 
110 /* MatSeqAIJMKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it,
111  * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize().
112  * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix
113  * handle, creates a new one, and then calls mkl_sparse_optimize().
114  * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been
115  * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of
116  * an unoptimized matrix handle here. */
117 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A)
118 {
119 #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
120   /* If the MKL library does not have mkl_sparse_optimize(), then this routine
121    * does nothing. We make it callable anyway in this case because it cuts
122    * down on littering the code with #ifdefs. */
123   PetscFunctionBegin;
124   PetscFunctionReturn(0);
125 #else
126   Mat_SeqAIJ       *a = (Mat_SeqAIJ*)A->data;
127   Mat_SeqAIJMKL    *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
128   PetscInt         m,n;
129   MatScalar        *aa;
130   PetscInt         *aj,*ai;
131   sparse_status_t  stat;
132   PetscErrorCode   ierr;
133 
134   PetscFunctionBegin;
135 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
136   /* For MKL versions that still support the old, non-inspector-executor interfaces versions, we simply exit here if the no_SpMV2
137    * option has been specified. For versions that have deprecated the old interfaces (version 18, update 2 and later), we must
138    * use the new inspector-executor interfaces, but we can still use the old, non-inspector-executor code by not calling
139    * mkl_sparse_optimize() later. */
140   if (aijmkl->no_SpMV2) PetscFunctionReturn(0);
141 #endif
142 
143   if (aijmkl->sparse_optimized) {
144     /* Matrix has been previously assembled and optimized. Must destroy old
145      * matrix handle before running the optimization step again. */
146     stat = mkl_sparse_destroy(aijmkl->csrA);
147     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_destroy()");
148   }
149   aijmkl->sparse_optimized = PETSC_FALSE;
150 
151   /* Now perform the SpMV2 setup and matrix optimization. */
152   aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
153   aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
154   aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
155   m = A->rmap->n;
156   n = A->cmap->n;
157   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
158   aa   = a->a;  /* Nonzero elements stored row-by-row. */
159   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
160   if (a->nz && aa && !A->structure_only) {
161     /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
162      * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
163     stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa);
164     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle, mkl_sparse_x_create_csr()");
165     stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
166     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_set_mv_hint()");
167     stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
168     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_set_memory_hint()");
169     if (!aijmkl->no_SpMV2) {
170       stat = mkl_sparse_optimize(aijmkl->csrA);
171       if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_optimize()");
172     }
173     aijmkl->sparse_optimized = PETSC_TRUE;
174     ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr);
175   } else {
176     aijmkl->csrA = PETSC_NULL;
177   }
178 
179   PetscFunctionReturn(0);
180 #endif
181 }
182 
183 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
184 /* Take an already created but empty matrix and set up the nonzero structure from an MKL sparse matrix handle. */
185 static PetscErrorCode MatSeqAIJMKL_setup_structure_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,PetscInt nrows,PetscInt ncols,Mat A)
186 {
187   PetscErrorCode      ierr;
188   sparse_status_t     stat;
189   sparse_index_base_t indexing;
190   PetscInt            m,n;
191   PetscInt            *aj,*ai,*dummy;
192   MatScalar           *aa;
193   Mat_SeqAIJMKL       *aijmkl;
194 
195   if (csrA) {
196     /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
197     stat = mkl_sparse_x_export_csr(csrA,&indexing,&m,&n,&ai,&dummy,&aj,&aa);
198     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");
199     if ((m != nrows) || (n != ncols)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Number of rows/columns does not match those from mkl_sparse_x_export_csr()");
200   } else {
201     aj = ai = PETSC_NULL;
202     aa = PETSC_NULL;
203   }
204 
205   ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
206   ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,nrows,ncols);CHKERRQ(ierr);
207   /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported
208    * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and
209    * they will be destroyed when the MKL handle is destroyed.
210    * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */
211   if (csrA) {
212     ierr = MatSeqAIJSetPreallocationCSR(A,ai,aj,NULL);CHKERRQ(ierr);
213   } else {
214     /* Since MatSeqAIJSetPreallocationCSR does initial set up and assembly begin/end, we must do that ourselves here. */
215     ierr = MatSetUp(A);CHKERRQ(ierr);
216     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
217     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
218   }
219 
220   /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle.
221    * Now turn it into a MATSEQAIJMKL. */
222   ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
223 
224   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
225   aijmkl->csrA = csrA;
226 
227   /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but
228    * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one,
229    * and just need to be able to run the MKL optimization step. */
230   aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
231   aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
232   aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
233   if (csrA) {
234     stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
235     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_set_mv_hint()");
236     stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
237     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_set_memory_hint()");
238   }
239   ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr);
240   PetscFunctionReturn(0);
241 }
242 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
243 
244 /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle.
245  * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in
246  * MatMatMultNumeric(). */
247 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
248 static PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A)
249 {
250   PetscInt            i;
251   PetscInt            nrows,ncols;
252   PetscInt            nz;
253   PetscInt            *ai,*aj,*dummy;
254   PetscScalar         *aa;
255   PetscErrorCode      ierr;
256   Mat_SeqAIJMKL       *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
257   sparse_status_t     stat;
258   sparse_index_base_t indexing;
259 
260   /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */
261   if (!aijmkl->csrA) PetscFunctionReturn(0);
262 
263   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
264   stat = mkl_sparse_x_export_csr(aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);
265   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");
266 
267   /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc
268    * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */
269   for (i=0; i<nrows; i++) {
270     nz = ai[i+1] - ai[i];
271     ierr = MatSetValues_SeqAIJ(A, 1, &i, nz, aj+ai[i], aa+ai[i], INSERT_VALUES);CHKERRQ(ierr);
272   }
273 
274   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
275   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
276 
277   ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr);
278   /* At this point our matrix has a valid MKL handle, the contents of which match the PETSc AIJ representation.
279    * The MKL handle has *not* had mkl_sparse_optimize() called on it, though -- the MKL developers have confirmed
280    * that the matrix inspection/optimization step is not performed when matrix-matrix multiplication is finalized. */
281   aijmkl->sparse_optimized = PETSC_FALSE;
282   PetscFunctionReturn(0);
283 }
284 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
285 
286 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
287 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_view_mkl_handle(Mat A,PetscViewer viewer)
288 {
289   PetscInt            i,j,k;
290   PetscInt            nrows,ncols;
291   PetscInt            nz;
292   PetscInt            *ai,*aj,*dummy;
293   PetscScalar         *aa;
294   PetscErrorCode      ierr;
295   Mat_SeqAIJMKL       *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
296   sparse_status_t     stat;
297   sparse_index_base_t indexing;
298 
299   ierr = PetscViewerASCIIPrintf(viewer,"Contents of MKL sparse matrix handle for MATSEQAIJMKL object:\n");CHKERRQ(ierr);
300 
301   /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */
302   if (!aijmkl->csrA) {
303     ierr = PetscViewerASCIIPrintf(viewer,"MKL matrix handle is NULL\n");CHKERRQ(ierr);
304     PetscFunctionReturn(0);
305   }
306 
307   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
308   stat = mkl_sparse_x_export_csr(aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);
309   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");
310 
311   k = 0;
312   for (i=0; i<nrows; i++) {
313     ierr = PetscViewerASCIIPrintf(viewer,"row %D: ",i);CHKERRQ(ierr);
314     nz = ai[i+1] - ai[i];
315     for (j=0; j<nz; j++) {
316       if (aa) {
317         ierr = PetscViewerASCIIPrintf(viewer,"(%D, %g)  ",aj[k],PetscRealPart(aa[k]));CHKERRQ(ierr);
318       } else {
319         ierr = PetscViewerASCIIPrintf(viewer,"(%D, NULL)",aj[k]);CHKERRQ(ierr);
320       }
321       k++;
322     }
323     ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
324   }
325   PetscFunctionReturn(0);
326 }
327 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
328 
329 PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
330 {
331   PetscErrorCode ierr;
332   Mat_SeqAIJMKL  *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
333   Mat_SeqAIJMKL  *aijmkl_dest;
334 
335   PetscFunctionBegin;
336   ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr);
337   aijmkl_dest = (Mat_SeqAIJMKL*)(*M)->spptr;
338   ierr = PetscArraycpy(aijmkl_dest,aijmkl,1);CHKERRQ(ierr);
339   aijmkl_dest->sparse_optimized = PETSC_FALSE;
340   if (aijmkl->eager_inspection) {
341     ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
342   }
343   PetscFunctionReturn(0);
344 }
345 
346 PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode)
347 {
348   PetscErrorCode  ierr;
349   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
350   Mat_SeqAIJMKL   *aijmkl;
351 
352   PetscFunctionBegin;
353   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
354 
355   /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some
356    * extra information and some different methods, call the AssemblyEnd
357    * routine for a MATSEQAIJ.
358    * I'm not sure if this is the best way to do this, but it avoids
359    * a lot of code duplication. */
360   a->inode.use = PETSC_FALSE;  /* Must disable: otherwise the MKL routines won't get used. */
361   ierr = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr);
362 
363   /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks).
364    * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */
365   aijmkl = (Mat_SeqAIJMKL*)A->spptr;
366   if (aijmkl->eager_inspection) {
367     ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
368   }
369 
370   PetscFunctionReturn(0);
371 }
372 
373 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
374 PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy)
375 {
376   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
377   const PetscScalar *x;
378   PetscScalar       *y;
379   const MatScalar   *aa;
380   PetscErrorCode    ierr;
381   PetscInt          m = A->rmap->n;
382   PetscInt          n = A->cmap->n;
383   PetscScalar       alpha = 1.0;
384   PetscScalar       beta = 0.0;
385   const PetscInt    *aj,*ai;
386   char              matdescra[6];
387 
388 
389   /* Variables not in MatMult_SeqAIJ. */
390   char transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
391 
392   PetscFunctionBegin;
393   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
394   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
395   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
396   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
397   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
398   aa   = a->a;  /* Nonzero elements stored row-by-row. */
399   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
400 
401   /* Call MKL sparse BLAS routine to do the MatMult. */
402   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
403 
404   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
405   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
406   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
407   PetscFunctionReturn(0);
408 }
409 #endif
410 
411 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
412 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
413 {
414   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
415   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
416   const PetscScalar *x;
417   PetscScalar       *y;
418   PetscErrorCode    ierr;
419   sparse_status_t   stat = SPARSE_STATUS_SUCCESS;
420   PetscObjectState  state;
421 
422   PetscFunctionBegin;
423 
424   /* If there are no nonzero entries, zero yy and return immediately. */
425   if (!a->nz) {
426     PetscInt i;
427     PetscInt m=A->rmap->n;
428     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
429     for (i=0; i<m; i++) {
430       y[i] = 0.0;
431     }
432     ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
433     PetscFunctionReturn(0);
434   }
435 
436   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
437   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
438 
439   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
440    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
441    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
442   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
443   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
444     MatSeqAIJMKL_create_mkl_handle(A);
445   }
446 
447   /* Call MKL SpMV2 executor routine to do the MatMult. */
448   stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
449   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()");
450 
451   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
452   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
453   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
454   PetscFunctionReturn(0);
455 }
456 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
457 
458 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
459 PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy)
460 {
461   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
462   const PetscScalar *x;
463   PetscScalar       *y;
464   const MatScalar   *aa;
465   PetscErrorCode    ierr;
466   PetscInt          m = A->rmap->n;
467   PetscInt          n = A->cmap->n;
468   PetscScalar       alpha = 1.0;
469   PetscScalar       beta = 0.0;
470   const PetscInt    *aj,*ai;
471   char              matdescra[6];
472 
473   /* Variables not in MatMultTranspose_SeqAIJ. */
474   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
475 
476   PetscFunctionBegin;
477   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
478   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
479   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
480   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
481   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
482   aa   = a->a;  /* Nonzero elements stored row-by-row. */
483   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
484 
485   /* Call MKL sparse BLAS routine to do the MatMult. */
486   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
487 
488   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
489   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
490   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
491   PetscFunctionReturn(0);
492 }
493 #endif
494 
495 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
496 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
497 {
498   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
499   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
500   const PetscScalar *x;
501   PetscScalar       *y;
502   PetscErrorCode    ierr;
503   sparse_status_t   stat;
504   PetscObjectState  state;
505 
506   PetscFunctionBegin;
507 
508   /* If there are no nonzero entries, zero yy and return immediately. */
509   if (!a->nz) {
510     PetscInt i;
511     PetscInt n=A->cmap->n;
512     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
513     for (i=0; i<n; i++) {
514       y[i] = 0.0;
515     }
516     ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
517     PetscFunctionReturn(0);
518   }
519 
520   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
521   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
522 
523   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
524    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
525    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
526   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
527   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
528     MatSeqAIJMKL_create_mkl_handle(A);
529   }
530 
531   /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
532   stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
533   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()");
534 
535   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
536   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
537   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
538   PetscFunctionReturn(0);
539 }
540 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
541 
542 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
543 PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
544 {
545   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
546   const PetscScalar *x;
547   PetscScalar       *y,*z;
548   const MatScalar   *aa;
549   PetscErrorCode    ierr;
550   PetscInt          m = A->rmap->n;
551   PetscInt          n = A->cmap->n;
552   const PetscInt    *aj,*ai;
553   PetscInt          i;
554 
555   /* Variables not in MatMultAdd_SeqAIJ. */
556   char              transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
557   PetscScalar       alpha = 1.0;
558   PetscScalar       beta;
559   char              matdescra[6];
560 
561   PetscFunctionBegin;
562   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
563   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
564 
565   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
566   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
567   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
568   aa   = a->a;  /* Nonzero elements stored row-by-row. */
569   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
570 
571   /* Call MKL sparse BLAS routine to do the MatMult. */
572   if (zz == yy) {
573     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
574     beta = 1.0;
575     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
576   } else {
577     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
578      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
579     beta = 0.0;
580     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
581     for (i=0; i<m; i++) {
582       z[i] += y[i];
583     }
584   }
585 
586   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
587   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
588   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
589   PetscFunctionReturn(0);
590 }
591 #endif
592 
593 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
594 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
595 {
596   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
597   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
598   const PetscScalar *x;
599   PetscScalar       *y,*z;
600   PetscErrorCode    ierr;
601   PetscInt          m = A->rmap->n;
602   PetscInt          i;
603 
604   /* Variables not in MatMultAdd_SeqAIJ. */
605   sparse_status_t   stat = SPARSE_STATUS_SUCCESS;
606   PetscObjectState  state;
607 
608   PetscFunctionBegin;
609 
610   /* If there are no nonzero entries, set zz = yy and return immediately. */
611   if (!a->nz) {
612     PetscInt i;
613     ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
614     for (i=0; i<m; i++) {
615       z[i] = y[i];
616     }
617     ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
618     PetscFunctionReturn(0);
619   }
620 
621   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
622   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
623 
624   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
625    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
626    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
627   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
628   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
629     MatSeqAIJMKL_create_mkl_handle(A);
630   }
631 
632   /* Call MKL sparse BLAS routine to do the MatMult. */
633   if (zz == yy) {
634     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
635      * with alpha and beta both set to 1.0. */
636     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
637     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()");
638   } else {
639     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
640      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
641     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
642     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()");
643     for (i=0; i<m; i++) {
644       z[i] += y[i];
645     }
646   }
647 
648   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
649   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
650   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
651   PetscFunctionReturn(0);
652 }
653 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
654 
655 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
656 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
657 {
658   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
659   const PetscScalar *x;
660   PetscScalar       *y,*z;
661   const MatScalar   *aa;
662   PetscErrorCode    ierr;
663   PetscInt          m = A->rmap->n;
664   PetscInt          n = A->cmap->n;
665   const PetscInt    *aj,*ai;
666   PetscInt          i;
667 
668   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
669   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
670   PetscScalar       alpha = 1.0;
671   PetscScalar       beta;
672   char              matdescra[6];
673 
674   PetscFunctionBegin;
675   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
676   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
677 
678   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
679   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
680   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
681   aa   = a->a;  /* Nonzero elements stored row-by-row. */
682   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
683 
684   /* Call MKL sparse BLAS routine to do the MatMult. */
685   if (zz == yy) {
686     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
687     beta = 1.0;
688     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
689   } else {
690     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
691      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
692     beta = 0.0;
693     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
694     for (i=0; i<n; i++) {
695       z[i] += y[i];
696     }
697   }
698 
699   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
700   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
701   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
702   PetscFunctionReturn(0);
703 }
704 #endif
705 
706 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
707 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
708 {
709   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
710   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
711   const PetscScalar *x;
712   PetscScalar       *y,*z;
713   PetscErrorCode    ierr;
714   PetscInt          n = A->cmap->n;
715   PetscInt          i;
716   PetscObjectState  state;
717 
718   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
719   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
720 
721   PetscFunctionBegin;
722 
723   /* If there are no nonzero entries, set zz = yy and return immediately. */
724   if (!a->nz) {
725     PetscInt i;
726     ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
727     for (i=0; i<n; i++) {
728       z[i] = y[i];
729     }
730     ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
731     PetscFunctionReturn(0);
732   }
733 
734   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
735   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
736 
737   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
738    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
739    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
740   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
741   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
742     MatSeqAIJMKL_create_mkl_handle(A);
743   }
744 
745   /* Call MKL sparse BLAS routine to do the MatMult. */
746   if (zz == yy) {
747     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
748      * with alpha and beta both set to 1.0. */
749     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
750     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()");
751   } else {
752     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
753      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
754     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
755     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: failure in mkl_sparse_x_mv()");
756     for (i=0; i<n; i++) {
757       z[i] += y[i];
758     }
759   }
760 
761   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
762   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
763   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
764   PetscFunctionReturn(0);
765 }
766 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
767 
768 /* -------------------------- MatProduct code -------------------------- */
769 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
770 static PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(Mat A,const sparse_operation_t transA,Mat B,const sparse_operation_t transB,Mat C)
771 {
772   Mat_SeqAIJMKL       *a = (Mat_SeqAIJMKL*)A->spptr,*b = (Mat_SeqAIJMKL*)B->spptr;
773   sparse_matrix_t     csrA,csrB,csrC;
774   PetscInt            nrows,ncols;
775   PetscErrorCode      ierr;
776   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
777   struct matrix_descr descr_type_gen;
778   PetscObjectState    state;
779 
780   PetscFunctionBegin;
781   /* Determine the number of rows and columns that the result matrix C will have. We have to do this ourselves because MKL does
782    * not handle sparse matrices with zero rows or columns. */
783   if (transA == SPARSE_OPERATION_NON_TRANSPOSE) nrows = A->rmap->N;
784   else nrows = A->cmap->N;
785   if (transB == SPARSE_OPERATION_NON_TRANSPOSE) ncols = B->cmap->N;
786   else ncols = B->rmap->N;
787 
788   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
789   if (!a->sparse_optimized || a->state != state) {
790     MatSeqAIJMKL_create_mkl_handle(A);
791   }
792   ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr);
793   if (!b->sparse_optimized || b->state != state) {
794     MatSeqAIJMKL_create_mkl_handle(B);
795   }
796   csrA = a->csrA;
797   csrB = b->csrA;
798   descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
799 
800   if (csrA && csrB) {
801     stat = mkl_sparse_sp2m(transA,descr_type_gen,csrA,transB,descr_type_gen,csrB,SPARSE_STAGE_FULL_MULT_NO_VAL,&csrC);
802     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete symbolic stage of sparse matrix-matrix multiply in mkl_sparse_sp2m()");
803   } else {
804     csrC = PETSC_NULL;
805   }
806 
807   ierr = MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF,csrC,nrows,ncols,C);CHKERRQ(ierr);
808 
809   PetscFunctionReturn(0);
810 }
811 
812 PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(Mat A,const sparse_operation_t transA,Mat B,const sparse_operation_t transB,Mat C)
813 {
814   Mat_SeqAIJMKL       *a = (Mat_SeqAIJMKL*)A->spptr,*b = (Mat_SeqAIJMKL*)B->spptr,*c = (Mat_SeqAIJMKL*)C->spptr;
815   sparse_matrix_t     csrA, csrB, csrC;
816   PetscErrorCode      ierr;
817   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
818   struct matrix_descr descr_type_gen;
819   PetscObjectState    state;
820 
821   PetscFunctionBegin;
822   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
823   if (!a->sparse_optimized || a->state != state) {
824     MatSeqAIJMKL_create_mkl_handle(A);
825   }
826   ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr);
827   if (!b->sparse_optimized || b->state != state) {
828     MatSeqAIJMKL_create_mkl_handle(B);
829   }
830   csrA = a->csrA;
831   csrB = b->csrA;
832   csrC = c->csrA;
833   descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
834 
835   if (csrA && csrB) {
836     stat = mkl_sparse_sp2m(transA,descr_type_gen,csrA,transB,descr_type_gen,csrB,SPARSE_STAGE_FINALIZE_MULT,&csrC);
837     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete numerical stage of sparse matrix-matrix multiply in mkl_sparse_sp2m()");
838   } else {
839     csrC = PETSC_NULL;
840   }
841 
842   /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */
843   ierr = MatSeqAIJMKL_update_from_mkl_handle(C);CHKERRQ(ierr);
844 
845   PetscFunctionReturn(0);
846 }
847 
848 PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C)
849 {
850   PetscErrorCode ierr;
851 
852   PetscFunctionBegin;
853   ierr = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);CHKERRQ(ierr);
854   PetscFunctionReturn(0);
855 }
856 
857 PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C)
858 {
859   PetscErrorCode ierr;
860 
861   PetscFunctionBegin;
862   ierr = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);CHKERRQ(ierr);
863   PetscFunctionReturn(0);
864 }
865 
866 PetscErrorCode MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C)
867 {
868   PetscErrorCode ierr;
869 
870   PetscFunctionBegin;
871   ierr = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);CHKERRQ(ierr);
872   PetscFunctionReturn(0);
873 }
874 
875 PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C)
876 {
877   PetscErrorCode ierr;
878 
879   PetscFunctionBegin;
880   ierr = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);CHKERRQ(ierr);
881   PetscFunctionReturn(0);
882 }
883 
884 PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C)
885 {
886   PetscErrorCode ierr;
887 
888   PetscFunctionBegin;
889   ierr = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_TRANSPOSE,C);CHKERRQ(ierr);
890   PetscFunctionReturn(0);
891 }
892 
893 PetscErrorCode MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C)
894 {
895   PetscErrorCode ierr;
896 
897   PetscFunctionBegin;
898   ierr = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_TRANSPOSE,C);CHKERRQ(ierr);
899   PetscFunctionReturn(0);
900 }
901 
902 static PetscErrorCode MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL(Mat C)
903 {
904   PetscErrorCode ierr;
905   Mat_Product    *product = C->product;
906   Mat            A = product->A,B = product->B;
907 
908   PetscFunctionBegin;
909   ierr = MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(A,B,C);CHKERRQ(ierr);
910   PetscFunctionReturn(0);
911 }
912 
913 static PetscErrorCode MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL(Mat C)
914 {
915   PetscErrorCode ierr;
916   Mat_Product    *product = C->product;
917   Mat            A = product->A,B = product->B;
918   PetscReal      fill = product->fill;
919 
920   PetscFunctionBegin;
921   ierr = MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(A,B,fill,C);CHKERRQ(ierr);
922   C->ops->productnumeric = MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL;
923   PetscFunctionReturn(0);
924 }
925 
926 PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat A,Mat P,Mat C)
927 {
928   Mat                 Ct;
929   Vec                 zeros;
930   Mat_SeqAIJMKL       *a = (Mat_SeqAIJMKL*)A->spptr,*p = (Mat_SeqAIJMKL*)P->spptr,*c = (Mat_SeqAIJMKL*)C->spptr;
931   sparse_matrix_t     csrA, csrP, csrC;
932   PetscBool           set, flag;
933   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
934   struct matrix_descr descr_type_sym;
935   PetscObjectState    state;
936   PetscErrorCode      ierr;
937 
938   PetscFunctionBegin;
939   ierr = MatIsSymmetricKnown(A,&set,&flag);
940   if (!set || (set && !flag)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal() called on matrix A not marked as symmetric");
941 
942   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
943   if (!a->sparse_optimized || a->state != state) {
944     MatSeqAIJMKL_create_mkl_handle(A);
945   }
946   ierr = PetscObjectStateGet((PetscObject)P,&state);CHKERRQ(ierr);
947   if (!p->sparse_optimized || p->state != state) {
948     MatSeqAIJMKL_create_mkl_handle(P);
949   }
950   csrA = a->csrA;
951   csrP = p->csrA;
952   csrC = c->csrA;
953   descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
954   descr_type_sym.mode = SPARSE_FILL_MODE_UPPER;
955   descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
956 
957   /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
958   stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FINALIZE_MULT);
959   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to finalize mkl_sparse_sypr()");
960 
961   /* Update the PETSc AIJ representation for matrix C from contents of MKL handle.
962    * This is more complicated than it should be: it turns out that, though mkl_sparse_sypr() will accept a full AIJ/CSR matrix,
963    * the output matrix only contains the upper or lower triangle (we arbitrarily have chosen upper) of the symmetric matrix.
964    * We have to fill in the missing portion, which we currently do below by forming the tranpose and performing at MatAXPY
965    * operation. This may kill any performance benefit of using the optimized mkl_sparse_sypr() routine. Performance might
966    * improve if we come up with a more efficient way to do this, or we can convince the MKL team to provide an option to output
967    * the full matrix. */
968   ierr = MatSeqAIJMKL_update_from_mkl_handle(C);CHKERRQ(ierr);
969   ierr = MatTranspose(C,MAT_INITIAL_MATRIX,&Ct);CHKERRQ(ierr);
970   ierr = MatCreateVecs(C,&zeros,NULL);CHKERRQ(ierr);
971   ierr = VecSetFromOptions(zeros);CHKERRQ(ierr);
972   ierr = VecZeroEntries(zeros);CHKERRQ(ierr);
973   ierr = MatDiagonalSet(Ct,zeros,INSERT_VALUES);CHKERRQ(ierr);
974   ierr = MatAXPY(C,1.0,Ct,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
975   /* Note: The MatAXPY() call destroys the MatProduct, so we must recreate it. */
976   ierr = MatProductCreateWithMat(A,P,PETSC_NULL,C);CHKERRQ(ierr);
977   ierr = MatProductSetType(C,MATPRODUCT_PtAP);CHKERRQ(ierr);
978   ierr = MatSeqAIJMKL_create_mkl_handle(C);CHKERRQ(ierr);
979   ierr = VecDestroy(&zeros);CHKERRQ(ierr);
980   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
981   PetscFunctionReturn(0);
982 }
983 
984 PetscErrorCode MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat C)
985 {
986   Mat_Product         *product = C->product;
987   Mat                 A = product->A,P = product->B;
988   Mat_SeqAIJMKL       *a = (Mat_SeqAIJMKL*)A->spptr,*p = (Mat_SeqAIJMKL*)P->spptr;
989   sparse_matrix_t     csrA,csrP,csrC;
990   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
991   struct matrix_descr descr_type_sym;
992   PetscObjectState    state;
993   PetscErrorCode      ierr;
994 
995   PetscFunctionBegin;
996   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
997   if (!a->sparse_optimized || a->state != state) {
998     MatSeqAIJMKL_create_mkl_handle(A);
999   }
1000   ierr = PetscObjectStateGet((PetscObject)P,&state);CHKERRQ(ierr);
1001   if (!p->sparse_optimized || p->state != state) {
1002     MatSeqAIJMKL_create_mkl_handle(P);
1003   }
1004   csrA = a->csrA;
1005   csrP = p->csrA;
1006   descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
1007   descr_type_sym.mode = SPARSE_FILL_MODE_UPPER;
1008   descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
1009 
1010   /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
1011   if (csrP && csrA) {
1012     stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FULL_MULT_NO_VAL);
1013     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to perform symbolic mkl_sparse_sypr()");
1014   } else {
1015     csrC = PETSC_NULL;
1016   }
1017 
1018   /* Update the I and J arrays of the PETSc AIJ representation for matrix C from contents of MKL handle.
1019    * Note that, because mkl_sparse_sypr() only computes one triangle of the symmetric matrix, this representation will only contain
1020    * the upper triangle of the symmetric matrix. We fix this in MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(). I believe that
1021    * leaving things in this incomplete state is OK because the numeric product should follow soon after, but am not certain if this
1022    * is guaranteed. */
1023   ierr = MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF,csrC,P->cmap->N,P->cmap->N,C);CHKERRQ(ierr);
1024 
1025   C->ops->productnumeric = MatProductNumeric_PtAP;
1026   PetscFunctionReturn(0);
1027 }
1028 
1029 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AB(Mat C)
1030 {
1031   PetscFunctionBegin;
1032   C->ops->productsymbolic = MatProductSymbolic_AB;
1033   C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL;
1034   PetscFunctionReturn(0);
1035 }
1036 
1037 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AtB(Mat C)
1038 {
1039   PetscFunctionBegin;
1040   C->ops->productsymbolic = MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL;
1041   PetscFunctionReturn(0);
1042 }
1043 
1044 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABt(Mat C)
1045 {
1046   PetscFunctionBegin;
1047   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ;
1048   C->ops->productsymbolic          = MatProductSymbolic_ABt;
1049   PetscFunctionReturn(0);
1050 }
1051 
1052 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_PtAP(Mat C)
1053 {
1054   PetscErrorCode ierr;
1055   Mat_Product    *product = C->product;
1056   Mat            A = product->A;
1057   PetscBool      set, flag;
1058 
1059   PetscFunctionBegin;
1060 #if defined(PETSC_USE_COMPLEX)
1061   /* By setting C->ops->productsymbolic to NULL, we ensure that MatProductSymbolic_Unsafe() will be used.
1062    * We do this in several other locations in this file. This works for the time being, but these
1063    * routines are considered unsafe and may be removed from the MatProduct code in the future.
1064    * TODO: Add proper MATSEQAIJMKL implementations */
1065   C->ops->productsymbolic = NULL;
1066 #else
1067   /* AIJMKL only has an optimized routine for PtAP when A is symmetric and real. */
1068   ierr = MatIsSymmetricKnown(A,&set,&flag);CHKERRQ(ierr);
1069   if (set && flag) {
1070     C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal;
1071     PetscFunctionReturn(0);
1072   } else {
1073     C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
1074   }
1075   /* Note that we don't set C->ops->productnumeric here, as this must happen in MatProductSymbolic_PtAP_XXX(),
1076    * depending on whether the algorithm for the general case vs. the real symmetric one is used. */
1077 #endif
1078   PetscFunctionReturn(0);
1079 }
1080 
1081 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_RARt(Mat C)
1082 {
1083   PetscFunctionBegin;
1084   C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
1085   PetscFunctionReturn(0);
1086 }
1087 
1088 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABC(Mat C)
1089 {
1090   PetscFunctionBegin;
1091   C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
1092   PetscFunctionReturn(0);
1093 }
1094 
1095 PetscErrorCode MatProductSetFromOptions_SeqAIJMKL(Mat C)
1096 {
1097   PetscErrorCode ierr;
1098   Mat_Product    *product = C->product;
1099 
1100   PetscFunctionBegin;
1101   switch (product->type) {
1102   case MATPRODUCT_AB:
1103     ierr = MatProductSetFromOptions_SeqAIJMKL_AB(C);CHKERRQ(ierr);
1104     break;
1105   case MATPRODUCT_AtB:
1106     ierr = MatProductSetFromOptions_SeqAIJMKL_AtB(C);CHKERRQ(ierr);
1107     break;
1108   case MATPRODUCT_ABt:
1109     ierr = MatProductSetFromOptions_SeqAIJMKL_ABt(C);CHKERRQ(ierr);
1110     break;
1111   case MATPRODUCT_PtAP:
1112     ierr = MatProductSetFromOptions_SeqAIJMKL_PtAP(C);CHKERRQ(ierr);
1113     break;
1114   case MATPRODUCT_RARt:
1115     ierr = MatProductSetFromOptions_SeqAIJMKL_RARt(C);CHKERRQ(ierr);
1116     break;
1117   case MATPRODUCT_ABC:
1118     ierr = MatProductSetFromOptions_SeqAIJMKL_ABC(C);CHKERRQ(ierr);
1119     break;
1120   default:
1121     break;
1122   }
1123   PetscFunctionReturn(0);
1124 }
1125 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
1126 /* ------------------------ End MatProduct code ------------------------ */
1127 
1128 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a
1129  * SeqAIJMKL matrix.  This routine is called by the MatCreate_SeqAIJMKL()
1130  * routine, but can also be used to convert an assembled SeqAIJ matrix
1131  * into a SeqAIJMKL one. */
1132 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
1133 {
1134   PetscErrorCode ierr;
1135   Mat            B = *newmat;
1136   Mat_SeqAIJMKL  *aijmkl;
1137   PetscBool      set;
1138   PetscBool      sametype;
1139 
1140   PetscFunctionBegin;
1141   if (reuse == MAT_INITIAL_MATRIX) {
1142     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
1143   }
1144 
1145   ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr);
1146   if (sametype) PetscFunctionReturn(0);
1147 
1148   ierr     = PetscNewLog(B,&aijmkl);CHKERRQ(ierr);
1149   B->spptr = (void*) aijmkl;
1150 
1151   /* Set function pointers for methods that we inherit from AIJ but override.
1152    * We also parse some command line options below, since those determine some of the methods we point to. */
1153   B->ops->duplicate        = MatDuplicate_SeqAIJMKL;
1154   B->ops->assemblyend      = MatAssemblyEnd_SeqAIJMKL;
1155   B->ops->destroy          = MatDestroy_SeqAIJMKL;
1156 
1157   aijmkl->sparse_optimized = PETSC_FALSE;
1158 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1159   aijmkl->no_SpMV2 = PETSC_FALSE;  /* Default to using the SpMV2 routines if our MKL supports them. */
1160 #else
1161   aijmkl->no_SpMV2 = PETSC_TRUE;
1162 #endif
1163   aijmkl->eager_inspection = PETSC_FALSE;
1164 
1165   /* Parse command line options. */
1166   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");CHKERRQ(ierr);
1167   ierr = PetscOptionsBool("-mat_aijmkl_no_spmv2","Disable use of inspector-executor (SpMV 2) routines","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);CHKERRQ(ierr);
1168   ierr = PetscOptionsBool("-mat_aijmkl_eager_inspection","Run inspection at matrix assembly time, instead of waiting until needed by an operation","None",(PetscBool)aijmkl->eager_inspection,(PetscBool*)&aijmkl->eager_inspection,&set);CHKERRQ(ierr);
1169   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1170 #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1171   if (!aijmkl->no_SpMV2) {
1172     ierr = PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize();  defaulting to non-SpMV2 routines.\n");
1173     aijmkl->no_SpMV2 = PETSC_TRUE;
1174   }
1175 #endif
1176 
1177 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1178   B->ops->mult                    = MatMult_SeqAIJMKL_SpMV2;
1179   B->ops->multtranspose           = MatMultTranspose_SeqAIJMKL_SpMV2;
1180   B->ops->multadd                 = MatMultAdd_SeqAIJMKL_SpMV2;
1181   B->ops->multtransposeadd        = MatMultTransposeAdd_SeqAIJMKL_SpMV2;
1182 # if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
1183   B->ops->productsetfromoptions   = MatProductSetFromOptions_SeqAIJMKL;
1184   B->ops->matmultsymbolic         = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL;
1185   B->ops->matmultnumeric          = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL;
1186   B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL;
1187   B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL;
1188 #   if !defined(PETSC_USE_COMPLEX)
1189   B->ops->ptapnumeric             = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal;
1190 #   else
1191   B->ops->ptapnumeric             = NULL;
1192 #   endif
1193 # endif
1194 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
1195 
1196 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
1197   /* In MKL version 18, update 2, the old sparse BLAS interfaces were marked as deprecated. If "no_SpMV2" has been specified by the
1198    * user and the old SpBLAS interfaces are deprecated in our MKL version, we use the new _SpMV2 routines (set above), but do not
1199    * call mkl_sparse_optimize(), which results in the old numerical kernels (without the inspector-executor model) being used. For
1200    * versions in which the older interface has not been deprecated, we use the old interface. */
1201   if (aijmkl->no_SpMV2) {
1202     B->ops->mult             = MatMult_SeqAIJMKL;
1203     B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL;
1204     B->ops->multadd          = MatMultAdd_SeqAIJMKL;
1205     B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL;
1206   }
1207 #endif
1208 
1209   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr);
1210 
1211   ierr    = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr);
1212   *newmat = B;
1213   PetscFunctionReturn(0);
1214 }
1215 
1216 /*@C
1217    MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL.
1218    This type inherits from AIJ and is largely identical, but uses sparse BLAS
1219    routines from Intel MKL whenever possible.
1220    If the installed version of MKL supports the "SpMV2" sparse
1221    inspector-executor routines, then those are used by default.
1222    MatMult, MatMultAdd, MatMultTranspose, MatMultTransposeAdd, MatMatMult, MatTransposeMatMult, and MatPtAP (for
1223    symmetric A) operations are currently supported.
1224    Note that MKL version 18, update 2 or later is required for MatPtAP/MatPtAPNumeric and MatMatMultNumeric.
1225 
1226    Collective
1227 
1228    Input Parameters:
1229 +  comm - MPI communicator, set to PETSC_COMM_SELF
1230 .  m - number of rows
1231 .  n - number of columns
1232 .  nz - number of nonzeros per row (same for all rows)
1233 -  nnz - array containing the number of nonzeros in the various rows
1234          (possibly different for each row) or NULL
1235 
1236    Output Parameter:
1237 .  A - the matrix
1238 
1239    Options Database Keys:
1240 +  -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines
1241 -  -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, performing this step the first time the matrix is applied
1242 
1243    Notes:
1244    If nnz is given then nz is ignored
1245 
1246    Level: intermediate
1247 
1248 .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues()
1249 @*/
1250 PetscErrorCode  MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
1251 {
1252   PetscErrorCode ierr;
1253 
1254   PetscFunctionBegin;
1255   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1256   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
1257   ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr);
1258   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr);
1259   PetscFunctionReturn(0);
1260 }
1261 
1262 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A)
1263 {
1264   PetscErrorCode ierr;
1265 
1266   PetscFunctionBegin;
1267   ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
1268   ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
1269   PetscFunctionReturn(0);
1270 }
1271