xref: /petsc/src/mat/impls/aij/seq/aijmkl/aijmkl.c (revision 45fbe4786d0bd596393157c5163c80feffa87361)
1 /*
2   Defines basic operations for the MATSEQAIJMKL matrix class.
3   This class is derived from the MATSEQAIJ class and retains the
4   compressed row storage (aka Yale sparse matrix format) but uses
5   sparse BLAS operations from the Intel Math Kernel Library (MKL)
6   wherever possible.
7 */
8 
9 #include <../src/mat/impls/aij/seq/aij.h>
10 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h>
11 
12 /* MKL include files. */
13 #include <mkl_spblas.h>  /* Sparse BLAS */
14 
15 typedef struct {
16   PetscBool no_SpMV2;  /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */
17   PetscBool eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */
18   PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
19 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
20   sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */
21   struct matrix_descr descr;
22 #endif
23 } Mat_SeqAIJMKL;
24 
25 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType);
26 
27 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat)
28 {
29   /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */
30   /* so we will ignore 'MatType type'. */
31   PetscErrorCode ierr;
32   Mat            B       = *newmat;
33 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
34   Mat_SeqAIJMKL  *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
35 #endif
36 
37   PetscFunctionBegin;
38   if (reuse == MAT_INITIAL_MATRIX) {
39     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
40   }
41 
42   /* Reset the original function pointers. */
43   B->ops->duplicate        = MatDuplicate_SeqAIJ;
44   B->ops->assemblyend      = MatAssemblyEnd_SeqAIJ;
45   B->ops->destroy          = MatDestroy_SeqAIJ;
46   B->ops->mult             = MatMult_SeqAIJ;
47   B->ops->multtranspose    = MatMultTranspose_SeqAIJ;
48   B->ops->multadd          = MatMultAdd_SeqAIJ;
49   B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ;
50   B->ops->matmult          = MatMatMult_SeqAIJ_SeqAIJ;
51   B->ops->scale            = MatScale_SeqAIJ;
52   B->ops->diagonalscale    = MatDiagonalScale_SeqAIJ;
53   B->ops->diagonalset      = MatDiagonalSet_SeqAIJ;
54   B->ops->axpy             = MatAXPY_SeqAIJ;
55 
56   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);CHKERRQ(ierr);
57   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr);
58   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr);
59   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr);
60   if(!aijmkl->no_SpMV2) {
61 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
62     ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr);
63 #endif
64   }
65 
66   /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this
67    * simply involves destroying the MKL sparse matrix handle and then freeing
68    * the spptr pointer. */
69 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
70   if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr;
71 
72   if (aijmkl->sparse_optimized) {
73     sparse_status_t stat;
74     stat = mkl_sparse_destroy(aijmkl->csrA);
75     if (stat != SPARSE_STATUS_SUCCESS) {
76       PetscFunctionReturn(PETSC_ERR_LIB);
77     }
78   }
79 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
80   ierr = PetscFree(B->spptr);CHKERRQ(ierr);
81 
82   /* Change the type of B to MATSEQAIJ. */
83   ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr);
84 
85   *newmat = B;
86   PetscFunctionReturn(0);
87 }
88 
89 PetscErrorCode MatDestroy_SeqAIJMKL(Mat A)
90 {
91   PetscErrorCode ierr;
92   Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr;
93 
94   PetscFunctionBegin;
95 
96   /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an
97    * spptr pointer. */
98   if (aijmkl) {
99     /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */
100 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
101     if (aijmkl->sparse_optimized) {
102       sparse_status_t stat = SPARSE_STATUS_SUCCESS;
103       stat = mkl_sparse_destroy(aijmkl->csrA);
104       if (stat != SPARSE_STATUS_SUCCESS) {
105         PetscFunctionReturn(PETSC_ERR_LIB);
106       }
107     }
108 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
109     ierr = PetscFree(A->spptr);CHKERRQ(ierr);
110   }
111 
112   /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
113    * to destroy everything that remains. */
114   ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr);
115   /* Note that I don't call MatSetType().  I believe this is because that
116    * is only to be called when *building* a matrix.  I could be wrong, but
117    * that is how things work for the SuperLU matrix class. */
118   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
119   PetscFunctionReturn(0);
120 }
121 
122 /* MatSeqAIJKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it,
123  * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize().
124  * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix
125  * handle, creates a new one, and then calls mkl_sparse_optimize().
126  * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been
127  * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of
128  * an unoptimized matrix handle here. */
129 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A)
130 {
131 #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
132   /* If the MKL library does not have mkl_sparse_optimize(), then this routine
133    * does nothing. We make it callable anyway in this case because it cuts
134    * down on littering the code with #ifdefs. */
135   PetscFunctionBegin;
136   PetscFunctionReturn(0);
137 #else
138   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
139   Mat_SeqAIJMKL   *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
140   PetscInt        m,n;
141   MatScalar       *aa;
142   PetscInt        *aj,*ai;
143   sparse_status_t stat;
144 
145   PetscFunctionBegin;
146   if (aijmkl->no_SpMV2) PetscFunctionReturn(0);
147 
148   if (aijmkl->sparse_optimized) {
149     /* Matrix has been previously assembled and optimized. Must destroy old
150      * matrix handle before running the optimization step again. */
151     stat = mkl_sparse_destroy(aijmkl->csrA);
152     if (stat != SPARSE_STATUS_SUCCESS) {
153       PetscFunctionReturn(PETSC_ERR_LIB);
154     }
155   }
156   aijmkl->sparse_optimized = PETSC_FALSE;
157 
158   /* Now perform the SpMV2 setup and matrix optimization. */
159   aijmkl->descr.type        = SPARSE_MATRIX_TYPE_GENERAL;
160   aijmkl->descr.mode        = SPARSE_FILL_MODE_LOWER;
161   aijmkl->descr.diag        = SPARSE_DIAG_NON_UNIT;
162   m = A->rmap->n;
163   n = A->cmap->n;
164   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
165   aa   = a->a;  /* Nonzero elements stored row-by-row. */
166   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
167   if ((a->nz!=0) & !(A->structure_only)) {
168     /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
169      * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
170     stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa);
171     stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
172     stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
173     stat = mkl_sparse_optimize(aijmkl->csrA);
174     if (stat != SPARSE_STATUS_SUCCESS) {
175       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle/complete mkl_sparse_optimize");
176       PetscFunctionReturn(PETSC_ERR_LIB);
177     }
178     aijmkl->sparse_optimized = PETSC_TRUE;
179   }
180 
181   PetscFunctionReturn(0);
182 #endif
183 }
184 
185 /* MatSeqAIJMKL_create_from_mkl_handle() creates a sequential AIJMKL matrix from an MKL sparse matrix handle.
186  * We need this to implement MatMatMult() using the MKL inspector-executor routines, which return an (unoptimized)
187  * matrix handle. */
188 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
189 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,Mat *mat)
190 {
191   PetscErrorCode ierr;
192   sparse_status_t stat;
193   sparse_index_base_t indexing;
194   PetscInt nrows, ncols;
195   PetscInt *aj,*ai,*dummy;
196   MatScalar *aa;
197   Mat A;
198   Mat_SeqAIJMKL *aijmkl;
199 
200   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
201   stat = mkl_sparse_x_export_csr(csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);
202   if (stat != SPARSE_STATUS_SUCCESS) {
203     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");
204     PetscFunctionReturn(PETSC_ERR_LIB);
205   }
206   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
207   ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
208   ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,nrows,ncols);CHKERRQ(ierr);
209   ierr = MatSeqAIJSetPreallocationCSR(A,ai,aj,aa);CHKERRQ(ierr);
210 
211   /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle.
212    * Now turn it into a MATSEQAIJMKL. */
213   ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
214   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
215   aijmkl->csrA = csrA;
216   /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but
217    * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one,
218    * and just need to be able to run the MKL optimization step. */
219   stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
220   stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
221   stat = mkl_sparse_optimize(aijmkl->csrA);
222   if (stat != SPARSE_STATUS_SUCCESS) {
223     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set hints/complete mkl_sparse_optimize");
224     PetscFunctionReturn(PETSC_ERR_LIB);
225   }
226   aijmkl->sparse_optimized = PETSC_TRUE;
227 
228   *mat = A;
229   PetscFunctionReturn(0);
230 }
231 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
232 
233 PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
234 {
235   PetscErrorCode ierr;
236   Mat_SeqAIJMKL *aijmkl;
237   Mat_SeqAIJMKL *aijmkl_dest;
238 
239   PetscFunctionBegin;
240   ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr);
241   aijmkl      = (Mat_SeqAIJMKL*) A->spptr;
242   aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr;
243   ierr = PetscMemcpy(aijmkl_dest,aijmkl,sizeof(Mat_SeqAIJMKL));CHKERRQ(ierr);
244   aijmkl_dest->sparse_optimized = PETSC_FALSE;
245   if (aijmkl->eager_inspection) {
246     ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
247   }
248   PetscFunctionReturn(0);
249 }
250 
251 PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode)
252 {
253   PetscErrorCode  ierr;
254   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
255   Mat_SeqAIJMKL *aijmkl;
256 
257   PetscFunctionBegin;
258   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
259 
260   /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some
261    * extra information and some different methods, call the AssemblyEnd
262    * routine for a MATSEQAIJ.
263    * I'm not sure if this is the best way to do this, but it avoids
264    * a lot of code duplication. */
265   a->inode.use = PETSC_FALSE;  /* Must disable: otherwise the MKL routines won't get used. */
266   ierr = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr);
267 
268   /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks).
269    * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */
270   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
271   if (aijmkl->eager_inspection) {
272     ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
273   }
274 
275   PetscFunctionReturn(0);
276 }
277 
278 PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy)
279 {
280   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
281   const PetscScalar *x;
282   PetscScalar       *y;
283   const MatScalar   *aa;
284   PetscErrorCode    ierr;
285   PetscInt          m=A->rmap->n;
286   PetscInt          n=A->cmap->n;
287   PetscScalar       alpha = 1.0;
288   PetscScalar       beta = 0.0;
289   const PetscInt    *aj,*ai;
290   char              matdescra[6];
291 
292 
293   /* Variables not in MatMult_SeqAIJ. */
294   char transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
295 
296   PetscFunctionBegin;
297   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
298   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
299   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
300   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
301   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
302   aa   = a->a;  /* Nonzero elements stored row-by-row. */
303   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
304 
305   /* Call MKL sparse BLAS routine to do the MatMult. */
306   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
307 
308   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
309   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
310   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
311   PetscFunctionReturn(0);
312 }
313 
314 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
315 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
316 {
317   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
318   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
319   const PetscScalar *x;
320   PetscScalar       *y;
321   PetscErrorCode    ierr;
322   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
323 
324   PetscFunctionBegin;
325 
326   /* If there are no nonzero entries, zero yy and return immediately. */
327   if(!a->nz) {
328     PetscInt i;
329     PetscInt m=A->rmap->n;
330     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
331     for (i=0; i<m; i++) {
332       y[i] = 0.0;
333     }
334     ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
335     PetscFunctionReturn(0);
336   }
337 
338   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
339   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
340 
341   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
342    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
343    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
344   if (!aijmkl->sparse_optimized) {
345     MatSeqAIJMKL_create_mkl_handle(A);
346   }
347 
348   /* Call MKL SpMV2 executor routine to do the MatMult. */
349   stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
350 
351   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
352   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
353   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
354   if (stat != SPARSE_STATUS_SUCCESS) {
355     PetscFunctionReturn(PETSC_ERR_LIB);
356   }
357   PetscFunctionReturn(0);
358 }
359 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
360 
361 PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy)
362 {
363   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
364   const PetscScalar *x;
365   PetscScalar       *y;
366   const MatScalar   *aa;
367   PetscErrorCode    ierr;
368   PetscInt          m=A->rmap->n;
369   PetscInt          n=A->cmap->n;
370   PetscScalar       alpha = 1.0;
371   PetscScalar       beta = 0.0;
372   const PetscInt    *aj,*ai;
373   char              matdescra[6];
374 
375   /* Variables not in MatMultTranspose_SeqAIJ. */
376   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
377 
378   PetscFunctionBegin;
379   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
380   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
381   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
382   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
383   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
384   aa   = a->a;  /* Nonzero elements stored row-by-row. */
385   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
386 
387   /* Call MKL sparse BLAS routine to do the MatMult. */
388   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
389 
390   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
391   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
392   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
393   PetscFunctionReturn(0);
394 }
395 
396 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
397 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
398 {
399   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
400   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
401   const PetscScalar *x;
402   PetscScalar       *y;
403   PetscErrorCode    ierr;
404   sparse_status_t   stat;
405 
406   PetscFunctionBegin;
407 
408   /* If there are no nonzero entries, zero yy and return immediately. */
409   if(!a->nz) {
410     PetscInt i;
411     PetscInt n=A->cmap->n;
412     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
413     for (i=0; i<n; i++) {
414       y[i] = 0.0;
415     }
416     ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
417     PetscFunctionReturn(0);
418   }
419 
420   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
421   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
422 
423   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
424    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
425    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
426   if (!aijmkl->sparse_optimized) {
427     MatSeqAIJMKL_create_mkl_handle(A);
428   }
429 
430   /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
431   stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
432 
433   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
434   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
435   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
436   if (stat != SPARSE_STATUS_SUCCESS) {
437     PetscFunctionReturn(PETSC_ERR_LIB);
438   }
439   PetscFunctionReturn(0);
440 }
441 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
442 
443 PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
444 {
445   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
446   const PetscScalar *x;
447   PetscScalar       *y,*z;
448   const MatScalar   *aa;
449   PetscErrorCode    ierr;
450   PetscInt          m=A->rmap->n;
451   PetscInt          n=A->cmap->n;
452   const PetscInt    *aj,*ai;
453   PetscInt          i;
454 
455   /* Variables not in MatMultAdd_SeqAIJ. */
456   char transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
457   PetscScalar       alpha = 1.0;
458   PetscScalar       beta;
459   char              matdescra[6];
460 
461   PetscFunctionBegin;
462   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
463   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
464 
465   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
466   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
467   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
468   aa   = a->a;  /* Nonzero elements stored row-by-row. */
469   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
470 
471   /* Call MKL sparse BLAS routine to do the MatMult. */
472   if (zz == yy) {
473     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
474     beta = 1.0;
475     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
476   } else {
477     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
478      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
479     beta = 0.0;
480     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
481     for (i=0; i<m; i++) {
482       z[i] += y[i];
483     }
484   }
485 
486   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
487   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
488   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
489   PetscFunctionReturn(0);
490 }
491 
492 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
493 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
494 {
495   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
496   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
497   const PetscScalar *x;
498   PetscScalar       *y,*z;
499   PetscErrorCode    ierr;
500   PetscInt          m=A->rmap->n;
501   PetscInt          i;
502 
503   /* Variables not in MatMultAdd_SeqAIJ. */
504   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
505 
506   PetscFunctionBegin;
507 
508   /* If there are no nonzero entries, set zz = yy and return immediately. */
509   if(!a->nz) {
510     PetscInt i;
511     ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
512     for (i=0; i<m; i++) {
513       z[i] = y[i];
514     }
515     ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
516     PetscFunctionReturn(0);
517   }
518 
519   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
520   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
521 
522   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
523    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
524    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
525   if (!aijmkl->sparse_optimized) {
526     MatSeqAIJMKL_create_mkl_handle(A);
527   }
528 
529   /* Call MKL sparse BLAS routine to do the MatMult. */
530   if (zz == yy) {
531     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
532      * with alpha and beta both set to 1.0. */
533     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
534   } else {
535     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
536      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
537     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
538     for (i=0; i<m; i++) {
539       z[i] += y[i];
540     }
541   }
542 
543   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
544   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
545   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
546   if (stat != SPARSE_STATUS_SUCCESS) {
547     PetscFunctionReturn(PETSC_ERR_LIB);
548   }
549   PetscFunctionReturn(0);
550 }
551 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
552 
553 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
554 {
555   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
556   const PetscScalar *x;
557   PetscScalar       *y,*z;
558   const MatScalar   *aa;
559   PetscErrorCode    ierr;
560   PetscInt          m=A->rmap->n;
561   PetscInt          n=A->cmap->n;
562   const PetscInt    *aj,*ai;
563   PetscInt          i;
564 
565   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
566   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
567   PetscScalar       alpha = 1.0;
568   PetscScalar       beta;
569   char              matdescra[6];
570 
571   PetscFunctionBegin;
572   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
573   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
574 
575   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
576   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
577   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
578   aa   = a->a;  /* Nonzero elements stored row-by-row. */
579   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
580 
581   /* Call MKL sparse BLAS routine to do the MatMult. */
582   if (zz == yy) {
583     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
584     beta = 1.0;
585     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
586   } else {
587     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
588      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
589     beta = 0.0;
590     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
591     for (i=0; i<n; i++) {
592       z[i] += y[i];
593     }
594   }
595 
596   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
597   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
598   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
599   PetscFunctionReturn(0);
600 }
601 
602 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
603 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
604 {
605   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
606   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
607   const PetscScalar *x;
608   PetscScalar       *y,*z;
609   PetscErrorCode    ierr;
610   PetscInt          n=A->cmap->n;
611   PetscInt          i;
612 
613   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
614   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
615 
616   PetscFunctionBegin;
617 
618   /* If there are no nonzero entries, set zz = yy and return immediately. */
619   if(!a->nz) {
620     PetscInt i;
621     ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
622     for (i=0; i<n; i++) {
623       z[i] = y[i];
624     }
625     ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
626     PetscFunctionReturn(0);
627   }
628 
629   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
630   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
631 
632   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
633    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
634    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
635   if (!aijmkl->sparse_optimized) {
636     MatSeqAIJMKL_create_mkl_handle(A);
637   }
638 
639   /* Call MKL sparse BLAS routine to do the MatMult. */
640   if (zz == yy) {
641     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
642      * with alpha and beta both set to 1.0. */
643     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
644   } else {
645     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
646      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
647     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
648     for (i=0; i<n; i++) {
649       z[i] += y[i];
650     }
651   }
652 
653   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
654   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
655   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
656   if (stat != SPARSE_STATUS_SUCCESS) {
657     PetscFunctionReturn(PETSC_ERR_LIB);
658   }
659   PetscFunctionReturn(0);
660 }
661 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
662 
663 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
664 PetscErrorCode MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat*C)
665 {
666   Mat_SeqAIJMKL *a, *b;
667   sparse_matrix_t csrA, csrB, csrC;
668   PetscErrorCode ierr;
669   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
670 
671   PetscFunctionBegin;
672   a = (Mat_SeqAIJMKL*)A->spptr;
673   b = (Mat_SeqAIJMKL*)B->spptr;
674   if (!a->sparse_optimized) {
675     MatSeqAIJMKL_create_mkl_handle(A);
676   }
677   if (!b->sparse_optimized) {
678     MatSeqAIJMKL_create_mkl_handle(B);
679   }
680   csrA = a->csrA;
681   csrB = b->csrA;
682 
683   stat = mkl_sparse_spmm(SPARSE_OPERATION_NON_TRANSPOSE,csrA,csrB,&csrC);
684   if (stat != SPARSE_STATUS_SUCCESS) {
685     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete sparse matrix-matrix multiply");
686     PetscFunctionReturn(PETSC_ERR_LIB);
687   }
688 
689   /* TODO: Make this handle MAT_REUSE_MATRIX sensibly; MKL has no notion of this, but we still need to do the right thing. */
690   ierr = MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,C);CHKERRQ(ierr);
691 
692   PetscFunctionReturn(0);
693 }
694 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
695 
696 PetscErrorCode MatScale_SeqAIJMKL(Mat inA,PetscScalar alpha)
697 {
698   PetscErrorCode ierr;
699 
700   PetscFunctionBegin;
701   ierr = MatScale_SeqAIJ(inA,alpha);CHKERRQ(ierr);
702   ierr = MatSeqAIJMKL_create_mkl_handle(inA);CHKERRQ(ierr);
703   PetscFunctionReturn(0);
704 }
705 
706 PetscErrorCode MatDiagonalScale_SeqAIJMKL(Mat A,Vec ll,Vec rr)
707 {
708   PetscErrorCode ierr;
709 
710   PetscFunctionBegin;
711   ierr = MatDiagonalScale_SeqAIJ(A,ll,rr);CHKERRQ(ierr);
712   ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
713   PetscFunctionReturn(0);
714 }
715 
716 PetscErrorCode MatDiagonalSet_SeqAIJMKL(Mat Y,Vec D,InsertMode is)
717 {
718   PetscErrorCode ierr;
719 
720   PetscFunctionBegin;
721   ierr = MatDiagonalSet_SeqAIJ(Y,D,is);CHKERRQ(ierr);
722   ierr = MatSeqAIJMKL_create_mkl_handle(Y);CHKERRQ(ierr);
723   PetscFunctionReturn(0);
724 }
725 
726 PetscErrorCode MatAXPY_SeqAIJMKL(Mat Y,PetscScalar a,Mat X,MatStructure str)
727 {
728   PetscErrorCode ierr;
729 
730   PetscFunctionBegin;
731   ierr = MatAXPY_SeqAIJ(Y,a,X,str);CHKERRQ(ierr);
732   if (str == SAME_NONZERO_PATTERN) {
733     /* MatAssemblyEnd() is not called if SAME_NONZERO_PATTERN, so we need to force update of the MKL matrix handle. */
734     ierr = MatSeqAIJMKL_create_mkl_handle(Y);CHKERRQ(ierr);
735   }
736   PetscFunctionReturn(0);
737 }
738 
739 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a
740  * SeqAIJMKL matrix.  This routine is called by the MatCreate_SeqMKLAIJ()
741  * routine, but can also be used to convert an assembled SeqAIJ matrix
742  * into a SeqAIJMKL one. */
743 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
744 {
745   PetscErrorCode ierr;
746   Mat            B = *newmat;
747   Mat_SeqAIJMKL  *aijmkl;
748   PetscBool      set;
749   PetscBool      sametype;
750 
751   PetscFunctionBegin;
752   if (reuse == MAT_INITIAL_MATRIX) {
753     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
754   }
755 
756   ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr);
757   if (sametype) PetscFunctionReturn(0);
758 
759   ierr     = PetscNewLog(B,&aijmkl);CHKERRQ(ierr);
760   B->spptr = (void*) aijmkl;
761 
762   /* Set function pointers for methods that we inherit from AIJ but override.
763    * We also parse some command line options below, since those determine some of the methods we point to. */
764   B->ops->duplicate        = MatDuplicate_SeqAIJMKL;
765   B->ops->assemblyend      = MatAssemblyEnd_SeqAIJMKL;
766   B->ops->destroy          = MatDestroy_SeqAIJMKL;
767 
768   aijmkl->sparse_optimized = PETSC_FALSE;
769 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
770   aijmkl->no_SpMV2 = PETSC_FALSE;  /* Default to using the SpMV2 routines if our MKL supports them. */
771 #else
772   aijmkl->no_SpMV2 = PETSC_TRUE;
773 #endif
774   aijmkl->eager_inspection = PETSC_FALSE;
775 
776   /* Parse command line options. */
777   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");CHKERRQ(ierr);
778   ierr = PetscOptionsBool("-mat_aijmkl_no_spmv2","NoSPMV2","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);CHKERRQ(ierr);
779   ierr = PetscOptionsBool("-mat_aijmkl_eager_inspection","Eager Inspection","None",(PetscBool)aijmkl->eager_inspection,(PetscBool*)&aijmkl->eager_inspection,&set);CHKERRQ(ierr);
780   ierr = PetscOptionsEnd();CHKERRQ(ierr);
781 #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
782   if(!aijmkl->no_SpMV2) {
783     ierr = PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize();  defaulting to non-SpMV2 routines.\n");
784     aijmkl->no_SpMV2 = PETSC_TRUE;
785   }
786 #endif
787 
788   if(!aijmkl->no_SpMV2) {
789 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
790     B->ops->mult             = MatMult_SeqAIJMKL_SpMV2;
791     B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL_SpMV2;
792     B->ops->multadd          = MatMultAdd_SeqAIJMKL_SpMV2;
793     B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2;
794     B->ops->matmult          = MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2;
795 #endif
796   } else {
797     B->ops->mult             = MatMult_SeqAIJMKL;
798     B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL;
799     B->ops->multadd          = MatMultAdd_SeqAIJMKL;
800     B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL;
801   }
802 
803   B->ops->scale              = MatScale_SeqAIJMKL;
804   B->ops->diagonalscale      = MatDiagonalScale_SeqAIJMKL;
805   B->ops->diagonalset        = MatDiagonalSet_SeqAIJMKL;
806   B->ops->axpy               = MatAXPY_SeqAIJMKL;
807 
808   ierr = PetscObjectComposeFunction((PetscObject)B,"MatScale_SeqAIJMKL_C",MatScale_SeqAIJMKL);CHKERRQ(ierr);
809   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr);
810   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",MatMatMult_SeqDense_SeqAIJ);CHKERRQ(ierr);
811   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",MatMatMultSymbolic_SeqDense_SeqAIJ);CHKERRQ(ierr);
812   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",MatMatMultNumeric_SeqDense_SeqAIJ);CHKERRQ(ierr);
813   if(!aijmkl->no_SpMV2) {
814 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
815     ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqaijmkl_seqaijmkl_C",MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2);CHKERRQ(ierr);
816 #endif
817   }
818 
819   ierr    = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr);
820   *newmat = B;
821   PetscFunctionReturn(0);
822 }
823 
824 /*@C
825    MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL.
826    This type inherits from AIJ and is largely identical, but uses sparse BLAS
827    routines from Intel MKL whenever possible.
828    MatMult, MatMultAdd, MatMultTranspose, and MatMultTransposeAdd
829    operations are currently supported.
830    If the installed version of MKL supports the "SpMV2" sparse
831    inspector-executor routines, then those are used by default.
832 
833    Collective on MPI_Comm
834 
835    Input Parameters:
836 +  comm - MPI communicator, set to PETSC_COMM_SELF
837 .  m - number of rows
838 .  n - number of columns
839 .  nz - number of nonzeros per row (same for all rows)
840 -  nnz - array containing the number of nonzeros in the various rows
841          (possibly different for each row) or NULL
842 
843    Output Parameter:
844 .  A - the matrix
845 
846    Options Database Keys:
847 .  -mat_aijmkl_no_spmv2 - disables use of the SpMV2 inspector-executor routines
848 
849    Notes:
850    If nnz is given then nz is ignored
851 
852    Level: intermediate
853 
854 .keywords: matrix, MKL, sparse, parallel
855 
856 .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues()
857 @*/
858 PetscErrorCode  MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
859 {
860   PetscErrorCode ierr;
861 
862   PetscFunctionBegin;
863   ierr = MatCreate(comm,A);CHKERRQ(ierr);
864   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
865   ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr);
866   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr);
867   PetscFunctionReturn(0);
868 }
869 
870 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A)
871 {
872   PetscErrorCode ierr;
873 
874   PetscFunctionBegin;
875   ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
876   ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
877   PetscFunctionReturn(0);
878 }
879