xref: /petsc/src/mat/impls/aij/seq/aijmkl/aijmkl.c (revision 6c87cf429fe62f7a02f26ddab3797caaa02a84b7)
1 /*
2   Defines basic operations for the MATSEQAIJMKL matrix class.
3   This class is derived from the MATSEQAIJ class and retains the
4   compressed row storage (aka Yale sparse matrix format) but uses
5   sparse BLAS operations from the Intel Math Kernel Library (MKL)
6   wherever possible.
7 */
8 
9 #include <../src/mat/impls/aij/seq/aij.h>
10 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h>
11 
12 /* MKL include files. */
13 #include <mkl_spblas.h>  /* Sparse BLAS */
14 
15 typedef struct {
16   PetscBool no_SpMV2;  /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */
17   PetscBool eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */
18   PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
19 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
20   sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */
21   struct matrix_descr descr;
22 #endif
23 } Mat_SeqAIJMKL;
24 
25 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType);
26 
27 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat)
28 {
29   /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */
30   /* so we will ignore 'MatType type'. */
31   PetscErrorCode ierr;
32   Mat            B       = *newmat;
33 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
34   Mat_SeqAIJMKL  *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
35 #endif
36 
37   PetscFunctionBegin;
38   if (reuse == MAT_INITIAL_MATRIX) {
39     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
40   }
41 
42   /* Reset the original function pointers. */
43   B->ops->duplicate        = MatDuplicate_SeqAIJ;
44   B->ops->assemblyend      = MatAssemblyEnd_SeqAIJ;
45   B->ops->destroy          = MatDestroy_SeqAIJ;
46   B->ops->mult             = MatMult_SeqAIJ;
47   B->ops->multtranspose    = MatMultTranspose_SeqAIJ;
48   B->ops->multadd          = MatMultAdd_SeqAIJ;
49   B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ;
50   B->ops->matmult          = MatMatMult_SeqAIJ_SeqAIJ;
51   B->ops->scale            = MatScale_SeqAIJ;
52   B->ops->diagonalscale    = MatDiagonalScale_SeqAIJ;
53   B->ops->diagonalset      = MatDiagonalSet_SeqAIJ;
54   B->ops->axpy             = MatAXPY_SeqAIJ;
55 
56   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);CHKERRQ(ierr);
57   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr);
58   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr);
59   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr);
60   if(!aijmkl->no_SpMV2) {
61 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
62     ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr);
63 #endif
64   }
65 
66   /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this
67    * simply involves destroying the MKL sparse matrix handle and then freeing
68    * the spptr pointer. */
69 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
70   if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr;
71 
72   if (aijmkl->sparse_optimized) {
73     sparse_status_t stat;
74     stat = mkl_sparse_destroy(aijmkl->csrA);
75     if (stat != SPARSE_STATUS_SUCCESS) {
76       PetscFunctionReturn(PETSC_ERR_LIB);
77     }
78   }
79 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
80   ierr = PetscFree(B->spptr);CHKERRQ(ierr);
81 
82   /* Change the type of B to MATSEQAIJ. */
83   ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr);
84 
85   *newmat = B;
86   PetscFunctionReturn(0);
87 }
88 
89 PetscErrorCode MatDestroy_SeqAIJMKL(Mat A)
90 {
91   PetscErrorCode ierr;
92   Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr;
93 
94   PetscFunctionBegin;
95 
96   /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an
97    * spptr pointer. */
98   if (aijmkl) {
99     /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */
100 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
101     if (aijmkl->sparse_optimized) {
102       sparse_status_t stat = SPARSE_STATUS_SUCCESS;
103       stat = mkl_sparse_destroy(aijmkl->csrA);
104       if (stat != SPARSE_STATUS_SUCCESS) {
105         PetscFunctionReturn(PETSC_ERR_LIB);
106       }
107     }
108 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
109     ierr = PetscFree(A->spptr);CHKERRQ(ierr);
110   }
111 
112   /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
113    * to destroy everything that remains. */
114   ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr);
115   /* Note that I don't call MatSetType().  I believe this is because that
116    * is only to be called when *building* a matrix.  I could be wrong, but
117    * that is how things work for the SuperLU matrix class. */
118   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
119   PetscFunctionReturn(0);
120 }
121 
122 /* MatSeqAIJKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it,
123  * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize().
124  * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix
125  * handle, creates a new one, and then calls mkl_sparse_optimize().
126  * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been
127  * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of
128  * an unoptimized matrix handle here. */
129 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A)
130 {
131 #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
132   /* If the MKL library does not have mkl_sparse_optimize(), then this routine
133    * does nothing. We make it callable anyway in this case because it cuts
134    * down on littering the code with #ifdefs. */
135   PetscFunctionBegin;
136   PetscFunctionReturn(0);
137 #else
138   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
139   Mat_SeqAIJMKL   *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
140   PetscInt        m,n;
141   MatScalar       *aa;
142   PetscInt        *aj,*ai;
143   sparse_status_t stat;
144 
145   PetscFunctionBegin;
146   if (aijmkl->no_SpMV2) PetscFunctionReturn(0);
147 
148   if (aijmkl->sparse_optimized) {
149     /* Matrix has been previously assembled and optimized. Must destroy old
150      * matrix handle before running the optimization step again. */
151     stat = mkl_sparse_destroy(aijmkl->csrA);
152     if (stat != SPARSE_STATUS_SUCCESS) {
153       PetscFunctionReturn(PETSC_ERR_LIB);
154     }
155   }
156   aijmkl->sparse_optimized = PETSC_FALSE;
157 
158   /* Now perform the SpMV2 setup and matrix optimization. */
159   aijmkl->descr.type        = SPARSE_MATRIX_TYPE_GENERAL;
160   aijmkl->descr.mode        = SPARSE_FILL_MODE_LOWER;
161   aijmkl->descr.diag        = SPARSE_DIAG_NON_UNIT;
162   m = A->rmap->n;
163   n = A->cmap->n;
164   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
165   aa   = a->a;  /* Nonzero elements stored row-by-row. */
166   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
167   if ((a->nz!=0) & !(A->structure_only)) {
168     /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
169      * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
170     stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa);
171     stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
172     stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
173     stat = mkl_sparse_optimize(aijmkl->csrA);
174     if (stat != SPARSE_STATUS_SUCCESS) {
175       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle/complete mkl_sparse_optimize");
176       PetscFunctionReturn(PETSC_ERR_LIB);
177     }
178     aijmkl->sparse_optimized = PETSC_TRUE;
179   }
180 
181   PetscFunctionReturn(0);
182 #endif
183 }
184 
185 /* MatSeqAIJMKL_create_from_mkl_handle() creates a sequential AIJMKL matrix from an MKL sparse matrix handle.
186  * We need this to implement MatMatMult() using the MKL inspector-executor routines, which return an (unoptimized)
187  * matrix handle.
188  * Note: This routine supports replacing the numerical values in an existing matrix that has the same sparsity
189  * structure as in the MKL handle. However, this code currently doesn't actually get used when MatMatMult()
190  * is called with MAT_REUSE_MATRIX, because the MatMatMult() interface code calls MatMatMultNumeric() in this case.
191  * MKL has no notion of separately callable symbolic vs. numeric phases of sparse matrix-matrix multiply, so in the
192  * MAT_REUSE_MATRIX case, the SeqAIJ routines end up being used. Even though this means that the (hopefully more
193  * optimized) MKL routines do not get used, this probably is best because the MKL routines would waste time re-computing
194  * the symbolic portion, whereas the native PETSc SeqAIJ routines will avoid this. */
195 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
196 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,MatReuse reuse,Mat *mat)
197 {
198   PetscErrorCode ierr;
199   sparse_status_t stat;
200   sparse_index_base_t indexing;
201   PetscInt nrows, ncols;
202   PetscInt *aj,*ai,*dummy;
203   MatScalar *aa;
204   Mat A;
205   Mat_SeqAIJ *a;
206   Mat_SeqAIJMKL *aijmkl;
207 
208   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
209   stat = mkl_sparse_x_export_csr(csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);
210   if (stat != SPARSE_STATUS_SUCCESS) {
211     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");
212     PetscFunctionReturn(PETSC_ERR_LIB);
213   }
214 
215   if (reuse == MAT_INITIAL_MATRIX) {
216     ierr = MatCreate(comm,&A);CHKERRQ(ierr);
217     ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
218     ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,nrows,ncols);CHKERRQ(ierr);
219     ierr = MatSeqAIJSetPreallocationCSR(A,ai,aj,aa);CHKERRQ(ierr);
220 
221     /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle.
222      * Now turn it into a MATSEQAIJMKL. */
223     ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
224   } else {
225     A = *mat;
226   }
227 
228   a = (Mat_SeqAIJ*)A->data;
229   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
230 
231   if (reuse == MAT_REUSE_MATRIX) {
232     /* Need to destroy old MKL handle. */
233     stat = mkl_sparse_destroy(aijmkl->csrA);
234     if (stat != SPARSE_STATUS_SUCCESS) {
235       PetscFunctionReturn(PETSC_ERR_LIB);
236     }
237 
238     /* The new matrix is supposed to have the same sparsity pattern, so copy only the array of numerical values. */
239     ierr = PetscMemcpy(a->a,aa,sizeof(MatScalar)*a->nz);CHKERRQ(ierr);
240   }
241   aijmkl->csrA = csrA;
242 
243   /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but
244    * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one,
245    * and just need to be able to run the MKL optimization step. */
246   stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
247   stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
248   stat = mkl_sparse_optimize(aijmkl->csrA);
249   if (stat != SPARSE_STATUS_SUCCESS) {
250     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set hints/complete mkl_sparse_optimize");
251     PetscFunctionReturn(PETSC_ERR_LIB);
252   }
253   aijmkl->sparse_optimized = PETSC_TRUE;
254 
255   *mat = A;
256   PetscFunctionReturn(0);
257 }
258 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
259 
260 PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
261 {
262   PetscErrorCode ierr;
263   Mat_SeqAIJMKL *aijmkl;
264   Mat_SeqAIJMKL *aijmkl_dest;
265 
266   PetscFunctionBegin;
267   ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr);
268   aijmkl      = (Mat_SeqAIJMKL*) A->spptr;
269   aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr;
270   ierr = PetscMemcpy(aijmkl_dest,aijmkl,sizeof(Mat_SeqAIJMKL));CHKERRQ(ierr);
271   aijmkl_dest->sparse_optimized = PETSC_FALSE;
272   if (aijmkl->eager_inspection) {
273     ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
274   }
275   PetscFunctionReturn(0);
276 }
277 
278 PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode)
279 {
280   PetscErrorCode  ierr;
281   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
282   Mat_SeqAIJMKL *aijmkl;
283 
284   PetscFunctionBegin;
285   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
286 
287   /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some
288    * extra information and some different methods, call the AssemblyEnd
289    * routine for a MATSEQAIJ.
290    * I'm not sure if this is the best way to do this, but it avoids
291    * a lot of code duplication. */
292   a->inode.use = PETSC_FALSE;  /* Must disable: otherwise the MKL routines won't get used. */
293   ierr = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr);
294 
295   /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks).
296    * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */
297   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
298   if (aijmkl->eager_inspection) {
299     ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
300   }
301 
302   PetscFunctionReturn(0);
303 }
304 
305 PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy)
306 {
307   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
308   const PetscScalar *x;
309   PetscScalar       *y;
310   const MatScalar   *aa;
311   PetscErrorCode    ierr;
312   PetscInt          m=A->rmap->n;
313   PetscInt          n=A->cmap->n;
314   PetscScalar       alpha = 1.0;
315   PetscScalar       beta = 0.0;
316   const PetscInt    *aj,*ai;
317   char              matdescra[6];
318 
319 
320   /* Variables not in MatMult_SeqAIJ. */
321   char transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
322 
323   PetscFunctionBegin;
324   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
325   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
326   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
327   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
328   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
329   aa   = a->a;  /* Nonzero elements stored row-by-row. */
330   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
331 
332   /* Call MKL sparse BLAS routine to do the MatMult. */
333   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
334 
335   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
336   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
337   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
338   PetscFunctionReturn(0);
339 }
340 
341 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
342 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
343 {
344   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
345   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
346   const PetscScalar *x;
347   PetscScalar       *y;
348   PetscErrorCode    ierr;
349   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
350 
351   PetscFunctionBegin;
352 
353   /* If there are no nonzero entries, zero yy and return immediately. */
354   if(!a->nz) {
355     PetscInt i;
356     PetscInt m=A->rmap->n;
357     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
358     for (i=0; i<m; i++) {
359       y[i] = 0.0;
360     }
361     ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
362     PetscFunctionReturn(0);
363   }
364 
365   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
366   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
367 
368   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
369    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
370    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
371   if (!aijmkl->sparse_optimized) {
372     MatSeqAIJMKL_create_mkl_handle(A);
373   }
374 
375   /* Call MKL SpMV2 executor routine to do the MatMult. */
376   stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
377 
378   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
379   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
380   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
381   if (stat != SPARSE_STATUS_SUCCESS) {
382     PetscFunctionReturn(PETSC_ERR_LIB);
383   }
384   PetscFunctionReturn(0);
385 }
386 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
387 
388 PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy)
389 {
390   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
391   const PetscScalar *x;
392   PetscScalar       *y;
393   const MatScalar   *aa;
394   PetscErrorCode    ierr;
395   PetscInt          m=A->rmap->n;
396   PetscInt          n=A->cmap->n;
397   PetscScalar       alpha = 1.0;
398   PetscScalar       beta = 0.0;
399   const PetscInt    *aj,*ai;
400   char              matdescra[6];
401 
402   /* Variables not in MatMultTranspose_SeqAIJ. */
403   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
404 
405   PetscFunctionBegin;
406   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
407   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
408   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
409   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
410   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
411   aa   = a->a;  /* Nonzero elements stored row-by-row. */
412   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
413 
414   /* Call MKL sparse BLAS routine to do the MatMult. */
415   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
416 
417   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
418   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
419   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
420   PetscFunctionReturn(0);
421 }
422 
423 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
424 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
425 {
426   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
427   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
428   const PetscScalar *x;
429   PetscScalar       *y;
430   PetscErrorCode    ierr;
431   sparse_status_t   stat;
432 
433   PetscFunctionBegin;
434 
435   /* If there are no nonzero entries, zero yy and return immediately. */
436   if(!a->nz) {
437     PetscInt i;
438     PetscInt n=A->cmap->n;
439     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
440     for (i=0; i<n; i++) {
441       y[i] = 0.0;
442     }
443     ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
444     PetscFunctionReturn(0);
445   }
446 
447   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
448   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
449 
450   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
451    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
452    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
453   if (!aijmkl->sparse_optimized) {
454     MatSeqAIJMKL_create_mkl_handle(A);
455   }
456 
457   /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
458   stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
459 
460   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
461   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
462   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
463   if (stat != SPARSE_STATUS_SUCCESS) {
464     PetscFunctionReturn(PETSC_ERR_LIB);
465   }
466   PetscFunctionReturn(0);
467 }
468 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
469 
470 PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
471 {
472   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
473   const PetscScalar *x;
474   PetscScalar       *y,*z;
475   const MatScalar   *aa;
476   PetscErrorCode    ierr;
477   PetscInt          m=A->rmap->n;
478   PetscInt          n=A->cmap->n;
479   const PetscInt    *aj,*ai;
480   PetscInt          i;
481 
482   /* Variables not in MatMultAdd_SeqAIJ. */
483   char transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
484   PetscScalar       alpha = 1.0;
485   PetscScalar       beta;
486   char              matdescra[6];
487 
488   PetscFunctionBegin;
489   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
490   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
491 
492   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
493   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
494   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
495   aa   = a->a;  /* Nonzero elements stored row-by-row. */
496   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
497 
498   /* Call MKL sparse BLAS routine to do the MatMult. */
499   if (zz == yy) {
500     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
501     beta = 1.0;
502     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
503   } else {
504     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
505      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
506     beta = 0.0;
507     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
508     for (i=0; i<m; i++) {
509       z[i] += y[i];
510     }
511   }
512 
513   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
514   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
515   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
516   PetscFunctionReturn(0);
517 }
518 
519 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
520 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
521 {
522   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
523   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
524   const PetscScalar *x;
525   PetscScalar       *y,*z;
526   PetscErrorCode    ierr;
527   PetscInt          m=A->rmap->n;
528   PetscInt          i;
529 
530   /* Variables not in MatMultAdd_SeqAIJ. */
531   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
532 
533   PetscFunctionBegin;
534 
535   /* If there are no nonzero entries, set zz = yy and return immediately. */
536   if(!a->nz) {
537     PetscInt i;
538     ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
539     for (i=0; i<m; i++) {
540       z[i] = y[i];
541     }
542     ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
543     PetscFunctionReturn(0);
544   }
545 
546   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
547   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
548 
549   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
550    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
551    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
552   if (!aijmkl->sparse_optimized) {
553     MatSeqAIJMKL_create_mkl_handle(A);
554   }
555 
556   /* Call MKL sparse BLAS routine to do the MatMult. */
557   if (zz == yy) {
558     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
559      * with alpha and beta both set to 1.0. */
560     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
561   } else {
562     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
563      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
564     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
565     for (i=0; i<m; i++) {
566       z[i] += y[i];
567     }
568   }
569 
570   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
571   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
572   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
573   if (stat != SPARSE_STATUS_SUCCESS) {
574     PetscFunctionReturn(PETSC_ERR_LIB);
575   }
576   PetscFunctionReturn(0);
577 }
578 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
579 
580 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
581 {
582   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
583   const PetscScalar *x;
584   PetscScalar       *y,*z;
585   const MatScalar   *aa;
586   PetscErrorCode    ierr;
587   PetscInt          m=A->rmap->n;
588   PetscInt          n=A->cmap->n;
589   const PetscInt    *aj,*ai;
590   PetscInt          i;
591 
592   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
593   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
594   PetscScalar       alpha = 1.0;
595   PetscScalar       beta;
596   char              matdescra[6];
597 
598   PetscFunctionBegin;
599   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
600   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
601 
602   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
603   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
604   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
605   aa   = a->a;  /* Nonzero elements stored row-by-row. */
606   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
607 
608   /* Call MKL sparse BLAS routine to do the MatMult. */
609   if (zz == yy) {
610     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
611     beta = 1.0;
612     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
613   } else {
614     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
615      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
616     beta = 0.0;
617     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
618     for (i=0; i<n; i++) {
619       z[i] += y[i];
620     }
621   }
622 
623   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
624   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
625   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
626   PetscFunctionReturn(0);
627 }
628 
629 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
630 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
631 {
632   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
633   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
634   const PetscScalar *x;
635   PetscScalar       *y,*z;
636   PetscErrorCode    ierr;
637   PetscInt          n=A->cmap->n;
638   PetscInt          i;
639 
640   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
641   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
642 
643   PetscFunctionBegin;
644 
645   /* If there are no nonzero entries, set zz = yy and return immediately. */
646   if(!a->nz) {
647     PetscInt i;
648     ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
649     for (i=0; i<n; i++) {
650       z[i] = y[i];
651     }
652     ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
653     PetscFunctionReturn(0);
654   }
655 
656   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
657   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
658 
659   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
660    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
661    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
662   if (!aijmkl->sparse_optimized) {
663     MatSeqAIJMKL_create_mkl_handle(A);
664   }
665 
666   /* Call MKL sparse BLAS routine to do the MatMult. */
667   if (zz == yy) {
668     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
669      * with alpha and beta both set to 1.0. */
670     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
671   } else {
672     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
673      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
674     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
675     for (i=0; i<n; i++) {
676       z[i] += y[i];
677     }
678   }
679 
680   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
681   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
682   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
683   if (stat != SPARSE_STATUS_SUCCESS) {
684     PetscFunctionReturn(PETSC_ERR_LIB);
685   }
686   PetscFunctionReturn(0);
687 }
688 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
689 
690 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
691 PetscErrorCode MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat*C)
692 {
693   Mat_SeqAIJMKL *a, *b;
694   sparse_matrix_t csrA, csrB, csrC;
695   PetscErrorCode ierr;
696   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
697 
698   PetscFunctionBegin;
699   a = (Mat_SeqAIJMKL*)A->spptr;
700   b = (Mat_SeqAIJMKL*)B->spptr;
701   if (!a->sparse_optimized) {
702     MatSeqAIJMKL_create_mkl_handle(A);
703   }
704   if (!b->sparse_optimized) {
705     MatSeqAIJMKL_create_mkl_handle(B);
706   }
707   csrA = a->csrA;
708   csrB = b->csrA;
709 
710   stat = mkl_sparse_spmm(SPARSE_OPERATION_NON_TRANSPOSE,csrA,csrB,&csrC);
711   if (stat != SPARSE_STATUS_SUCCESS) {
712     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete sparse matrix-matrix multiply");
713     PetscFunctionReturn(PETSC_ERR_LIB);
714   }
715 
716   ierr = MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);CHKERRQ(ierr);
717 
718   PetscFunctionReturn(0);
719 }
720 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
721 
722 PetscErrorCode MatScale_SeqAIJMKL(Mat inA,PetscScalar alpha)
723 {
724   PetscErrorCode ierr;
725 
726   PetscFunctionBegin;
727   ierr = MatScale_SeqAIJ(inA,alpha);CHKERRQ(ierr);
728   ierr = MatSeqAIJMKL_create_mkl_handle(inA);CHKERRQ(ierr);
729   PetscFunctionReturn(0);
730 }
731 
732 PetscErrorCode MatDiagonalScale_SeqAIJMKL(Mat A,Vec ll,Vec rr)
733 {
734   PetscErrorCode ierr;
735 
736   PetscFunctionBegin;
737   ierr = MatDiagonalScale_SeqAIJ(A,ll,rr);CHKERRQ(ierr);
738   ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
739   PetscFunctionReturn(0);
740 }
741 
742 PetscErrorCode MatDiagonalSet_SeqAIJMKL(Mat Y,Vec D,InsertMode is)
743 {
744   PetscErrorCode ierr;
745 
746   PetscFunctionBegin;
747   ierr = MatDiagonalSet_SeqAIJ(Y,D,is);CHKERRQ(ierr);
748   ierr = MatSeqAIJMKL_create_mkl_handle(Y);CHKERRQ(ierr);
749   PetscFunctionReturn(0);
750 }
751 
752 PetscErrorCode MatAXPY_SeqAIJMKL(Mat Y,PetscScalar a,Mat X,MatStructure str)
753 {
754   PetscErrorCode ierr;
755 
756   PetscFunctionBegin;
757   ierr = MatAXPY_SeqAIJ(Y,a,X,str);CHKERRQ(ierr);
758   if (str == SAME_NONZERO_PATTERN) {
759     /* MatAssemblyEnd() is not called if SAME_NONZERO_PATTERN, so we need to force update of the MKL matrix handle. */
760     ierr = MatSeqAIJMKL_create_mkl_handle(Y);CHKERRQ(ierr);
761   }
762   PetscFunctionReturn(0);
763 }
764 
765 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a
766  * SeqAIJMKL matrix.  This routine is called by the MatCreate_SeqMKLAIJ()
767  * routine, but can also be used to convert an assembled SeqAIJ matrix
768  * into a SeqAIJMKL one. */
769 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
770 {
771   PetscErrorCode ierr;
772   Mat            B = *newmat;
773   Mat_SeqAIJMKL  *aijmkl;
774   PetscBool      set;
775   PetscBool      sametype;
776 
777   PetscFunctionBegin;
778   if (reuse == MAT_INITIAL_MATRIX) {
779     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
780   }
781 
782   ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr);
783   if (sametype) PetscFunctionReturn(0);
784 
785   ierr     = PetscNewLog(B,&aijmkl);CHKERRQ(ierr);
786   B->spptr = (void*) aijmkl;
787 
788   /* Set function pointers for methods that we inherit from AIJ but override.
789    * We also parse some command line options below, since those determine some of the methods we point to. */
790   B->ops->duplicate        = MatDuplicate_SeqAIJMKL;
791   B->ops->assemblyend      = MatAssemblyEnd_SeqAIJMKL;
792   B->ops->destroy          = MatDestroy_SeqAIJMKL;
793 
794   aijmkl->sparse_optimized = PETSC_FALSE;
795 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
796   aijmkl->no_SpMV2 = PETSC_FALSE;  /* Default to using the SpMV2 routines if our MKL supports them. */
797 #else
798   aijmkl->no_SpMV2 = PETSC_TRUE;
799 #endif
800   aijmkl->eager_inspection = PETSC_FALSE;
801 
802   /* Parse command line options. */
803   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");CHKERRQ(ierr);
804   ierr = PetscOptionsBool("-mat_aijmkl_no_spmv2","NoSPMV2","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);CHKERRQ(ierr);
805   ierr = PetscOptionsBool("-mat_aijmkl_eager_inspection","Eager Inspection","None",(PetscBool)aijmkl->eager_inspection,(PetscBool*)&aijmkl->eager_inspection,&set);CHKERRQ(ierr);
806   ierr = PetscOptionsEnd();CHKERRQ(ierr);
807 #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
808   if(!aijmkl->no_SpMV2) {
809     ierr = PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize();  defaulting to non-SpMV2 routines.\n");
810     aijmkl->no_SpMV2 = PETSC_TRUE;
811   }
812 #endif
813 
814   if(!aijmkl->no_SpMV2) {
815 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
816     B->ops->mult             = MatMult_SeqAIJMKL_SpMV2;
817     B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL_SpMV2;
818     B->ops->multadd          = MatMultAdd_SeqAIJMKL_SpMV2;
819     B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2;
820     B->ops->matmult          = MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2;
821 #endif
822   } else {
823     B->ops->mult             = MatMult_SeqAIJMKL;
824     B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL;
825     B->ops->multadd          = MatMultAdd_SeqAIJMKL;
826     B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL;
827   }
828 
829   B->ops->scale              = MatScale_SeqAIJMKL;
830   B->ops->diagonalscale      = MatDiagonalScale_SeqAIJMKL;
831   B->ops->diagonalset        = MatDiagonalSet_SeqAIJMKL;
832   B->ops->axpy               = MatAXPY_SeqAIJMKL;
833 
834   ierr = PetscObjectComposeFunction((PetscObject)B,"MatScale_SeqAIJMKL_C",MatScale_SeqAIJMKL);CHKERRQ(ierr);
835   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr);
836   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",MatMatMult_SeqDense_SeqAIJ);CHKERRQ(ierr);
837   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",MatMatMultSymbolic_SeqDense_SeqAIJ);CHKERRQ(ierr);
838   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",MatMatMultNumeric_SeqDense_SeqAIJ);CHKERRQ(ierr);
839   if(!aijmkl->no_SpMV2) {
840 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
841     ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqaijmkl_seqaijmkl_C",MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2);CHKERRQ(ierr);
842 #endif
843   }
844 
845   ierr    = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr);
846   *newmat = B;
847   PetscFunctionReturn(0);
848 }
849 
850 /*@C
851    MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL.
852    This type inherits from AIJ and is largely identical, but uses sparse BLAS
853    routines from Intel MKL whenever possible.
854    MatMult, MatMultAdd, MatMultTranspose, and MatMultTransposeAdd
855    operations are currently supported.
856    If the installed version of MKL supports the "SpMV2" sparse
857    inspector-executor routines, then those are used by default.
858 
859    Collective on MPI_Comm
860 
861    Input Parameters:
862 +  comm - MPI communicator, set to PETSC_COMM_SELF
863 .  m - number of rows
864 .  n - number of columns
865 .  nz - number of nonzeros per row (same for all rows)
866 -  nnz - array containing the number of nonzeros in the various rows
867          (possibly different for each row) or NULL
868 
869    Output Parameter:
870 .  A - the matrix
871 
872    Options Database Keys:
873 .  -mat_aijmkl_no_spmv2 - disables use of the SpMV2 inspector-executor routines
874 
875    Notes:
876    If nnz is given then nz is ignored
877 
878    Level: intermediate
879 
880 .keywords: matrix, MKL, sparse, parallel
881 
882 .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues()
883 @*/
884 PetscErrorCode  MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
885 {
886   PetscErrorCode ierr;
887 
888   PetscFunctionBegin;
889   ierr = MatCreate(comm,A);CHKERRQ(ierr);
890   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
891   ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr);
892   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr);
893   PetscFunctionReturn(0);
894 }
895 
896 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A)
897 {
898   PetscErrorCode ierr;
899 
900   PetscFunctionBegin;
901   ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
902   ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
903   PetscFunctionReturn(0);
904 }
905