xref: /petsc/src/mat/impls/aij/seq/aijmkl/aijmkl.c (revision 4e8afd12df985612b40e26aef947da2f566aee4e)
1 
2 /*
3   Defines basic operations for the MATSEQAIJMKL matrix class.
4   This class is derived from the MATSEQAIJ class and retains the
5   compressed row storage (aka Yale sparse matrix format) but uses
6   sparse BLAS operations from the Intel Math Kernel Library (MKL)
7   wherever possible.
8 */
9 
10 #include <../src/mat/impls/aij/seq/aij.h>
11 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h>
12 #include <mkl_spblas.h>
13 
14 typedef struct {
15   PetscBool           no_SpMV2;  /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */
16   PetscBool           eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */
17   PetscBool           sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
18   PetscObjectState    state;
19 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
20   sparse_matrix_t     csrA; /* "Handle" used by SpMV2 inspector-executor routines. */
21   struct matrix_descr descr;
22 #endif
23 } Mat_SeqAIJMKL;
24 
25 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType);
26 
27 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat)
28 {
29   /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */
30   /* so we will ignore 'MatType type'. */
31   PetscErrorCode ierr;
32   Mat            B       = *newmat;
33 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
34   Mat_SeqAIJMKL  *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
35 #endif
36 
37   PetscFunctionBegin;
38   if (reuse == MAT_INITIAL_MATRIX) {
39     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
40   }
41 
42   /* Reset the original function pointers. */
43   B->ops->duplicate        = MatDuplicate_SeqAIJ;
44   B->ops->assemblyend      = MatAssemblyEnd_SeqAIJ;
45   B->ops->destroy          = MatDestroy_SeqAIJ;
46   B->ops->mult             = MatMult_SeqAIJ;
47   B->ops->multtranspose    = MatMultTranspose_SeqAIJ;
48   B->ops->multadd          = MatMultAdd_SeqAIJ;
49   B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ;
50   B->ops->matmultnumeric   = MatMatMultNumeric_SeqAIJ_SeqAIJ;
51   B->ops->ptapnumeric      = MatPtAPNumeric_SeqAIJ_SeqAIJ;
52 
53   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);CHKERRQ(ierr);
54   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr);
55   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr);
56 
57 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
58   if (!aijmkl->no_SpMV2) {
59 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
60     ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr);
61 #endif
62   }
63 
64   /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this
65    * simply involves destroying the MKL sparse matrix handle and then freeing
66    * the spptr pointer. */
67   if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr;
68 
69   if (aijmkl->sparse_optimized) {
70     sparse_status_t stat;
71     stat = mkl_sparse_destroy(aijmkl->csrA);
72     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set hints/complete mkl_sparse_optimize");
73   }
74 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
75   ierr = PetscFree(B->spptr);CHKERRQ(ierr);
76 
77   /* Change the type of B to MATSEQAIJ. */
78   ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr);
79 
80   *newmat = B;
81   PetscFunctionReturn(0);
82 }
83 
84 PetscErrorCode MatDestroy_SeqAIJMKL(Mat A)
85 {
86   PetscErrorCode ierr;
87   Mat_SeqAIJMKL  *aijmkl = (Mat_SeqAIJMKL*) A->spptr;
88 
89   PetscFunctionBegin;
90 
91   /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an
92    * spptr pointer. */
93   if (aijmkl) {
94     /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */
95 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
96     if (aijmkl->sparse_optimized) {
97       sparse_status_t stat = SPARSE_STATUS_SUCCESS;
98       stat = mkl_sparse_destroy(aijmkl->csrA);
99       if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy");
100     }
101 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
102     ierr = PetscFree(A->spptr);CHKERRQ(ierr);
103   }
104 
105   /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
106    * to destroy everything that remains. */
107   ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr);
108   /* Note that I don't call MatSetType().  I believe this is because that
109    * is only to be called when *building* a matrix.  I could be wrong, but
110    * that is how things work for the SuperLU matrix class. */
111   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
112   PetscFunctionReturn(0);
113 }
114 
115 /* MatSeqAIJKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it,
116  * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize().
117  * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix
118  * handle, creates a new one, and then calls mkl_sparse_optimize().
119  * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been
120  * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of
121  * an unoptimized matrix handle here. */
122 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A)
123 {
124 #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
125   /* If the MKL library does not have mkl_sparse_optimize(), then this routine
126    * does nothing. We make it callable anyway in this case because it cuts
127    * down on littering the code with #ifdefs. */
128   PetscFunctionBegin;
129   PetscFunctionReturn(0);
130 #else
131   Mat_SeqAIJ       *a = (Mat_SeqAIJ*)A->data;
132   Mat_SeqAIJMKL    *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
133   PetscInt         m,n;
134   MatScalar        *aa;
135   PetscInt         *aj,*ai;
136   sparse_status_t  stat;
137   PetscErrorCode   ierr;
138 
139   PetscFunctionBegin;
140 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
141   /* For MKL versions that still support the old, non-inspector-executor interfaces versions, we simply exit here if the no_SpMV2
142    * option has been specified. For versions that have deprecated the old interfaces (version 18, update 2 and later), we must
143    * use the new inspector-executor interfaces, but we can still use the old, non-inspector-executor code by not calling
144    * mkl_sparse_optimize() later. */
145   if (aijmkl->no_SpMV2) PetscFunctionReturn(0);
146 #endif
147 
148   if (aijmkl->sparse_optimized) {
149     /* Matrix has been previously assembled and optimized. Must destroy old
150      * matrix handle before running the optimization step again. */
151     stat = mkl_sparse_destroy(aijmkl->csrA);
152     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy");
153   }
154   aijmkl->sparse_optimized = PETSC_FALSE;
155 
156   /* Now perform the SpMV2 setup and matrix optimization. */
157   aijmkl->descr.type        = SPARSE_MATRIX_TYPE_GENERAL;
158   aijmkl->descr.mode        = SPARSE_FILL_MODE_LOWER;
159   aijmkl->descr.diag        = SPARSE_DIAG_NON_UNIT;
160   m = A->rmap->n;
161   n = A->cmap->n;
162   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
163   aa   = a->a;  /* Nonzero elements stored row-by-row. */
164   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
165   if ((a->nz!=0) & !(A->structure_only)) {
166     /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
167      * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
168     stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa);
169     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle");
170     stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
171     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set mv_hint");
172     stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
173     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set memory_hint");
174     if (!aijmkl->no_SpMV2) {
175       stat = mkl_sparse_optimize(aijmkl->csrA);
176       if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_optimize");
177     }
178     aijmkl->sparse_optimized = PETSC_TRUE;
179     ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr);
180   }
181 
182   PetscFunctionReturn(0);
183 #endif
184 }
185 
186 /* MatSeqAIJMKL_create_from_mkl_handle() creates a sequential AIJMKL matrix from an MKL sparse matrix handle.
187  * We need this to implement MatMatMult() using the MKL inspector-executor routines, which return an (unoptimized)
188  * matrix handle.
189  * Note: This routine simply destroys and replaces the original matrix if MAT_REUSE_MATRIX has been specified, as
190  * there is no good alternative. */
191 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
192 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,MatReuse reuse,Mat *mat)
193 {
194   PetscErrorCode      ierr;
195   sparse_status_t     stat;
196   sparse_index_base_t indexing;
197   PetscInt            nrows, ncols;
198   PetscInt            *aj,*ai,*dummy;
199   MatScalar           *aa;
200   Mat                 A;
201   Mat_SeqAIJMKL       *aijmkl;
202 
203   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
204   stat = mkl_sparse_x_export_csr(csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);
205   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");
206 
207   if (reuse == MAT_REUSE_MATRIX) {
208     ierr = MatDestroy(mat);CHKERRQ(ierr);
209   }
210   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
211   ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
212   ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,nrows,ncols);CHKERRQ(ierr);
213   /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported
214    * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and
215    * they will be destroyed when the MKL handle is destroyed.
216    * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */
217   ierr = MatSeqAIJSetPreallocationCSR(A,ai,aj,aa);CHKERRQ(ierr);
218 
219   /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle.
220    * Now turn it into a MATSEQAIJMKL. */
221   ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
222 
223   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
224   aijmkl->csrA = csrA;
225 
226   /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but
227    * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one,
228    * and just need to be able to run the MKL optimization step. */
229   aijmkl->descr.type        = SPARSE_MATRIX_TYPE_GENERAL;
230   aijmkl->descr.mode        = SPARSE_FILL_MODE_LOWER;
231   aijmkl->descr.diag        = SPARSE_DIAG_NON_UNIT;
232   stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
233   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set mv_hint");
234   stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
235   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set memory_hint");
236   if (!aijmkl->no_SpMV2) {
237     stat = mkl_sparse_optimize(aijmkl->csrA);
238     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_optimize");
239   }
240   aijmkl->sparse_optimized = PETSC_TRUE;
241   ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr);
242 
243   *mat = A;
244   PetscFunctionReturn(0);
245 }
246 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
247 
248 /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle.
249  * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in
250  * MatMatMultNumeric(). */
251 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
252 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A)
253 {
254   PetscInt            i;
255   PetscInt            nrows,ncols;
256   PetscInt            nz;
257   PetscInt            *ai,*aj,*dummy;
258   PetscScalar         *aa;
259   PetscErrorCode      ierr;
260   Mat_SeqAIJMKL       *aijmkl;
261   sparse_status_t     stat;
262   sparse_index_base_t indexing;
263 
264   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
265 
266   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
267   stat = mkl_sparse_x_export_csr(aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);
268   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");
269 
270   /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc
271    * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */
272   for (i=0; i<nrows; i++) {
273     nz = ai[i+1] - ai[i];
274     ierr = MatSetValues_SeqAIJ(A, 1, &i, nz, aj+ai[i], aa+ai[i], INSERT_VALUES);CHKERRQ(ierr);
275   }
276 
277   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
278   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
279 
280   ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr);
281   /* We mark our matrix as having a valid, optimized MKL handle.
282    * TODO: It is valid, but I am not sure if it is optimized. Need to ask MKL developers. */
283   aijmkl->sparse_optimized = PETSC_TRUE;
284 
285   PetscFunctionReturn(0);
286 }
287 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
288 
289 PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
290 {
291   PetscErrorCode ierr;
292   Mat_SeqAIJMKL  *aijmkl;
293   Mat_SeqAIJMKL  *aijmkl_dest;
294 
295   PetscFunctionBegin;
296   ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr);
297   aijmkl      = (Mat_SeqAIJMKL*) A->spptr;
298   aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr;
299   ierr = PetscArraycpy(aijmkl_dest,aijmkl,1);CHKERRQ(ierr);
300   aijmkl_dest->sparse_optimized = PETSC_FALSE;
301   if (aijmkl->eager_inspection) {
302     ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
303   }
304   PetscFunctionReturn(0);
305 }
306 
307 PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode)
308 {
309   PetscErrorCode  ierr;
310   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
311   Mat_SeqAIJMKL   *aijmkl;
312 
313   PetscFunctionBegin;
314   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
315 
316   /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some
317    * extra information and some different methods, call the AssemblyEnd
318    * routine for a MATSEQAIJ.
319    * I'm not sure if this is the best way to do this, but it avoids
320    * a lot of code duplication. */
321   a->inode.use = PETSC_FALSE;  /* Must disable: otherwise the MKL routines won't get used. */
322   ierr = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr);
323 
324   /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks).
325    * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */
326   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
327   if (aijmkl->eager_inspection) {
328     ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
329   }
330 
331   PetscFunctionReturn(0);
332 }
333 
334 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
335 PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy)
336 {
337   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
338   const PetscScalar *x;
339   PetscScalar       *y;
340   const MatScalar   *aa;
341   PetscErrorCode    ierr;
342   PetscInt          m=A->rmap->n;
343   PetscInt          n=A->cmap->n;
344   PetscScalar       alpha = 1.0;
345   PetscScalar       beta = 0.0;
346   const PetscInt    *aj,*ai;
347   char              matdescra[6];
348 
349 
350   /* Variables not in MatMult_SeqAIJ. */
351   char transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
352 
353   PetscFunctionBegin;
354   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
355   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
356   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
357   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
358   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
359   aa   = a->a;  /* Nonzero elements stored row-by-row. */
360   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
361 
362   /* Call MKL sparse BLAS routine to do the MatMult. */
363   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
364 
365   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
366   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
367   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
368   PetscFunctionReturn(0);
369 }
370 #endif
371 
372 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
373 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
374 {
375   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
376   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
377   const PetscScalar *x;
378   PetscScalar       *y;
379   PetscErrorCode    ierr;
380   sparse_status_t   stat = SPARSE_STATUS_SUCCESS;
381   PetscObjectState  state;
382 
383   PetscFunctionBegin;
384 
385   /* If there are no nonzero entries, zero yy and return immediately. */
386   if(!a->nz) {
387     PetscInt i;
388     PetscInt m=A->rmap->n;
389     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
390     for (i=0; i<m; i++) {
391       y[i] = 0.0;
392     }
393     ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
394     PetscFunctionReturn(0);
395   }
396 
397   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
398   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
399 
400   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
401    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
402    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
403   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
404   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
405     MatSeqAIJMKL_create_mkl_handle(A);
406   }
407 
408   /* Call MKL SpMV2 executor routine to do the MatMult. */
409   stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
410   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
411 
412   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
413   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
414   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
415   PetscFunctionReturn(0);
416 }
417 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
418 
419 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
420 PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy)
421 {
422   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
423   const PetscScalar *x;
424   PetscScalar       *y;
425   const MatScalar   *aa;
426   PetscErrorCode    ierr;
427   PetscInt          m=A->rmap->n;
428   PetscInt          n=A->cmap->n;
429   PetscScalar       alpha = 1.0;
430   PetscScalar       beta = 0.0;
431   const PetscInt    *aj,*ai;
432   char              matdescra[6];
433 
434   /* Variables not in MatMultTranspose_SeqAIJ. */
435   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
436 
437   PetscFunctionBegin;
438   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
439   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
440   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
441   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
442   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
443   aa   = a->a;  /* Nonzero elements stored row-by-row. */
444   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
445 
446   /* Call MKL sparse BLAS routine to do the MatMult. */
447   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
448 
449   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
450   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
451   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
452   PetscFunctionReturn(0);
453 }
454 #endif
455 
456 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
457 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
458 {
459   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
460   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
461   const PetscScalar *x;
462   PetscScalar       *y;
463   PetscErrorCode    ierr;
464   sparse_status_t   stat;
465   PetscObjectState  state;
466 
467   PetscFunctionBegin;
468 
469   /* If there are no nonzero entries, zero yy and return immediately. */
470   if(!a->nz) {
471     PetscInt i;
472     PetscInt n=A->cmap->n;
473     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
474     for (i=0; i<n; i++) {
475       y[i] = 0.0;
476     }
477     ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
478     PetscFunctionReturn(0);
479   }
480 
481   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
482   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
483 
484   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
485    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
486    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
487   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
488   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
489     MatSeqAIJMKL_create_mkl_handle(A);
490   }
491 
492   /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
493   stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
494   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
495 
496   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
497   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
498   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
499   PetscFunctionReturn(0);
500 }
501 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
502 
503 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
504 PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
505 {
506   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
507   const PetscScalar *x;
508   PetscScalar       *y,*z;
509   const MatScalar   *aa;
510   PetscErrorCode    ierr;
511   PetscInt          m=A->rmap->n;
512   PetscInt          n=A->cmap->n;
513   const PetscInt    *aj,*ai;
514   PetscInt          i;
515 
516   /* Variables not in MatMultAdd_SeqAIJ. */
517   char              transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
518   PetscScalar       alpha = 1.0;
519   PetscScalar       beta;
520   char              matdescra[6];
521 
522   PetscFunctionBegin;
523   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
524   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
525 
526   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
527   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
528   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
529   aa   = a->a;  /* Nonzero elements stored row-by-row. */
530   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
531 
532   /* Call MKL sparse BLAS routine to do the MatMult. */
533   if (zz == yy) {
534     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
535     beta = 1.0;
536     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
537   } else {
538     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
539      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
540     beta = 0.0;
541     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
542     for (i=0; i<m; i++) {
543       z[i] += y[i];
544     }
545   }
546 
547   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
548   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
549   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
550   PetscFunctionReturn(0);
551 }
552 #endif
553 
554 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
555 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
556 {
557   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
558   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
559   const PetscScalar *x;
560   PetscScalar       *y,*z;
561   PetscErrorCode    ierr;
562   PetscInt          m=A->rmap->n;
563   PetscInt          i;
564 
565   /* Variables not in MatMultAdd_SeqAIJ. */
566   sparse_status_t   stat = SPARSE_STATUS_SUCCESS;
567   PetscObjectState  state;
568 
569   PetscFunctionBegin;
570 
571   /* If there are no nonzero entries, set zz = yy and return immediately. */
572   if(!a->nz) {
573     PetscInt i;
574     ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
575     for (i=0; i<m; i++) {
576       z[i] = y[i];
577     }
578     ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
579     PetscFunctionReturn(0);
580   }
581 
582   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
583   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
584 
585   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
586    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
587    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
588   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
589   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
590     MatSeqAIJMKL_create_mkl_handle(A);
591   }
592 
593   /* Call MKL sparse BLAS routine to do the MatMult. */
594   if (zz == yy) {
595     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
596      * with alpha and beta both set to 1.0. */
597     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
598     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
599   } else {
600     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
601      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
602     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
603     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
604     for (i=0; i<m; i++) {
605       z[i] += y[i];
606     }
607   }
608 
609   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
610   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
611   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
612   PetscFunctionReturn(0);
613 }
614 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
615 
616 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
617 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
618 {
619   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
620   const PetscScalar *x;
621   PetscScalar       *y,*z;
622   const MatScalar   *aa;
623   PetscErrorCode    ierr;
624   PetscInt          m=A->rmap->n;
625   PetscInt          n=A->cmap->n;
626   const PetscInt    *aj,*ai;
627   PetscInt          i;
628 
629   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
630   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
631   PetscScalar       alpha = 1.0;
632   PetscScalar       beta;
633   char              matdescra[6];
634 
635   PetscFunctionBegin;
636   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
637   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
638 
639   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
640   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
641   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
642   aa   = a->a;  /* Nonzero elements stored row-by-row. */
643   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
644 
645   /* Call MKL sparse BLAS routine to do the MatMult. */
646   if (zz == yy) {
647     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
648     beta = 1.0;
649     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
650   } else {
651     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
652      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
653     beta = 0.0;
654     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
655     for (i=0; i<n; i++) {
656       z[i] += y[i];
657     }
658   }
659 
660   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
661   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
662   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
663   PetscFunctionReturn(0);
664 }
665 #endif
666 
667 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
668 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
669 {
670   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
671   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
672   const PetscScalar *x;
673   PetscScalar       *y,*z;
674   PetscErrorCode    ierr;
675   PetscInt          n=A->cmap->n;
676   PetscInt          i;
677   PetscObjectState  state;
678 
679   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
680   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
681 
682   PetscFunctionBegin;
683 
684   /* If there are no nonzero entries, set zz = yy and return immediately. */
685   if(!a->nz) {
686     PetscInt i;
687     ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
688     for (i=0; i<n; i++) {
689       z[i] = y[i];
690     }
691     ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
692     PetscFunctionReturn(0);
693   }
694 
695   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
696   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
697 
698   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
699    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
700    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
701   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
702   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
703     MatSeqAIJMKL_create_mkl_handle(A);
704   }
705 
706   /* Call MKL sparse BLAS routine to do the MatMult. */
707   if (zz == yy) {
708     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
709      * with alpha and beta both set to 1.0. */
710     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
711     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
712   } else {
713     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
714      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
715     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
716     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
717     for (i=0; i<n; i++) {
718       z[i] += y[i];
719     }
720   }
721 
722   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
723   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
724   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
725   PetscFunctionReturn(0);
726 }
727 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
728 
729 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
730 PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,Mat C)
731 {
732   Mat_SeqAIJMKL       *a, *b, *c;
733   sparse_matrix_t     csrA, csrB, csrC;
734   PetscErrorCode      ierr;
735   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
736   struct matrix_descr descr_type_gen;
737   PetscObjectState    state;
738 
739   PetscFunctionBegin;
740   a = (Mat_SeqAIJMKL*)A->spptr;
741   b = (Mat_SeqAIJMKL*)B->spptr;
742   c = (Mat_SeqAIJMKL*)C->spptr;
743   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
744   if (!a->sparse_optimized || a->state != state) {
745     MatSeqAIJMKL_create_mkl_handle(A);
746   }
747   ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr);
748   if (!b->sparse_optimized || b->state != state) {
749     MatSeqAIJMKL_create_mkl_handle(B);
750   }
751   csrA = a->csrA;
752   csrB = b->csrA;
753   csrC = c->csrA;
754   descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
755 
756   stat = mkl_sparse_sp2m(SPARSE_OPERATION_NON_TRANSPOSE,descr_type_gen,csrA,
757                          SPARSE_OPERATION_NON_TRANSPOSE,descr_type_gen,csrB,
758                          SPARSE_STAGE_FINALIZE_MULT,&csrC);
759 
760   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete numerical stage of sparse matrix-matrix multiply");
761 
762   /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */
763   ierr = MatSeqAIJMKL_update_from_mkl_handle(C);CHKERRQ(ierr);
764 
765   PetscFunctionReturn(0);
766 }
767 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
768 
769 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
770 PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat P,Mat C)
771 {
772   Mat_SeqAIJMKL       *a, *p, *c;
773   sparse_matrix_t     csrA, csrP, csrC;
774   PetscBool           set, flag;
775   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
776   struct matrix_descr descr_type_sym;
777   PetscObjectState    state;
778   PetscErrorCode      ierr;
779 
780   PetscFunctionBegin;
781   ierr = MatIsSymmetricKnown(A,&set,&flag);
782   if (!set || (set && !flag)) {
783     ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,C);CHKERRQ(ierr);
784     PetscFunctionReturn(0);
785   }
786 
787   a = (Mat_SeqAIJMKL*)A->spptr;
788   p = (Mat_SeqAIJMKL*)P->spptr;
789   c = (Mat_SeqAIJMKL*)C->spptr;
790   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
791   if (!a->sparse_optimized || a->state != state) {
792     MatSeqAIJMKL_create_mkl_handle(A);
793   }
794   ierr = PetscObjectStateGet((PetscObject)P,&state);CHKERRQ(ierr);
795   if (!p->sparse_optimized || p->state != state) {
796     MatSeqAIJMKL_create_mkl_handle(P);
797   }
798   csrA = a->csrA;
799   csrP = p->csrA;
800   csrC = c->csrA;
801   descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
802   descr_type_sym.mode = SPARSE_FILL_MODE_LOWER;
803   descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
804 
805   /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
806   stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FINALIZE_MULT);
807   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to finalize mkl_sparse_sypr");
808 
809   /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */
810   ierr = MatSeqAIJMKL_update_from_mkl_handle(C);CHKERRQ(ierr);
811 
812   PetscFunctionReturn(0);
813 }
814 #endif
815 
816 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a
817  * SeqAIJMKL matrix.  This routine is called by the MatCreate_SeqAIJMKL()
818  * routine, but can also be used to convert an assembled SeqAIJ matrix
819  * into a SeqAIJMKL one. */
820 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
821 {
822   PetscErrorCode ierr;
823   Mat            B = *newmat;
824   Mat_SeqAIJMKL  *aijmkl;
825   PetscBool      set;
826   PetscBool      sametype;
827 
828   PetscFunctionBegin;
829   if (reuse == MAT_INITIAL_MATRIX) {
830     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
831   }
832 
833   ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr);
834   if (sametype) PetscFunctionReturn(0);
835 
836   ierr     = PetscNewLog(B,&aijmkl);CHKERRQ(ierr);
837   B->spptr = (void*) aijmkl;
838 
839   /* Set function pointers for methods that we inherit from AIJ but override.
840    * We also parse some command line options below, since those determine some of the methods we point to. */
841   B->ops->duplicate        = MatDuplicate_SeqAIJMKL;
842   B->ops->assemblyend      = MatAssemblyEnd_SeqAIJMKL;
843   B->ops->destroy          = MatDestroy_SeqAIJMKL;
844 
845   aijmkl->sparse_optimized = PETSC_FALSE;
846 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
847   aijmkl->no_SpMV2 = PETSC_FALSE;  /* Default to using the SpMV2 routines if our MKL supports them. */
848 #else
849   aijmkl->no_SpMV2 = PETSC_TRUE;
850 #endif
851   aijmkl->eager_inspection = PETSC_FALSE;
852 
853   /* Parse command line options. */
854   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");CHKERRQ(ierr);
855   ierr = PetscOptionsBool("-mat_aijmkl_no_spmv2","Disable use of inspector-executor (SpMV 2) routines","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);CHKERRQ(ierr);
856   ierr = PetscOptionsBool("-mat_aijmkl_eager_inspection","Run inspection at matrix assembly time, instead of waiting until needed by an operation","None",(PetscBool)aijmkl->eager_inspection,(PetscBool*)&aijmkl->eager_inspection,&set);CHKERRQ(ierr);
857   ierr = PetscOptionsEnd();CHKERRQ(ierr);
858 #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
859   if(!aijmkl->no_SpMV2) {
860     ierr = PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize();  defaulting to non-SpMV2 routines.\n");
861     aijmkl->no_SpMV2 = PETSC_TRUE;
862   }
863 #endif
864 
865 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
866   B->ops->mult             = MatMult_SeqAIJMKL_SpMV2;
867   B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL_SpMV2;
868   B->ops->multadd          = MatMultAdd_SeqAIJMKL_SpMV2;
869   B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2;
870 # if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
871   B->ops->matmultnumeric   = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2;
872 #   if !defined(PETSC_USE_COMPLEX)
873   B->ops->ptapnumeric      = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2;
874 #   endif
875 # endif
876 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
877 
878 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
879   /* In MKL version 18, update 2, the old sparse BLAS interfaces were marked as deprecated. If "no_SpMV2" has been specified by the
880    * user and the old SpBLAS interfaces are deprecated in our MKL version, we use the new _SpMV2 routines (set above), but do not
881    * call mkl_sparse_optimize(), which results in the old numerical kernels (without the inspector-executor model) being used. For
882    * versions in which the older interface has not been deprecated, we use the old interface. */
883   if (aijmkl->no_SpMV2) {
884     B->ops->mult             = MatMult_SeqAIJMKL;
885     B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL;
886     B->ops->multadd          = MatMultAdd_SeqAIJMKL;
887     B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL;
888   }
889 #endif
890 
891   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr);
892   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",MatMatMultSymbolic_SeqDense_SeqAIJ);CHKERRQ(ierr);
893   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",MatMatMultNumeric_SeqDense_SeqAIJ);CHKERRQ(ierr);
894 
895   if(!aijmkl->no_SpMV2) {
896 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
897 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
898     ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqaijmkl_seqaijmkl_C",MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2);CHKERRQ(ierr);
899 #endif
900 #endif
901   }
902 
903   ierr    = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr);
904   *newmat = B;
905   PetscFunctionReturn(0);
906 }
907 
908 /*@C
909    MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL.
910    This type inherits from AIJ and is largely identical, but uses sparse BLAS
911    routines from Intel MKL whenever possible.
912    If the installed version of MKL supports the "SpMV2" sparse
913    inspector-executor routines, then those are used by default.
914    MatMult, MatMultAdd, MatMultTranspose, MatMultTransposeAdd, MatMatMult, MatTransposeMatMult, and MatPtAP (for
915    symmetric A) operations are currently supported.
916    Note that MKL version 18, update 2 or later is required for MatPtAP/MatPtAPNumeric and MatMatMultNumeric.
917 
918    Collective
919 
920    Input Parameters:
921 +  comm - MPI communicator, set to PETSC_COMM_SELF
922 .  m - number of rows
923 .  n - number of columns
924 .  nz - number of nonzeros per row (same for all rows)
925 -  nnz - array containing the number of nonzeros in the various rows
926          (possibly different for each row) or NULL
927 
928    Output Parameter:
929 .  A - the matrix
930 
931    Options Database Keys:
932 +  -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines
933 -  -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, performing this step the first time the matrix is applied
934 
935    Notes:
936    If nnz is given then nz is ignored
937 
938    Level: intermediate
939 
940 .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues()
941 @*/
942 PetscErrorCode  MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
943 {
944   PetscErrorCode ierr;
945 
946   PetscFunctionBegin;
947   ierr = MatCreate(comm,A);CHKERRQ(ierr);
948   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
949   ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr);
950   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr);
951   PetscFunctionReturn(0);
952 }
953 
954 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A)
955 {
956   PetscErrorCode ierr;
957 
958   PetscFunctionBegin;
959   ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
960   ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
961   PetscFunctionReturn(0);
962 }
963