xref: /petsc/src/mat/impls/aij/seq/aijmkl/aijmkl.c (revision 190ae7a464f1af264fdb4f847bcdc9a1e70f793c)
1 
2 /*
3   Defines basic operations for the MATSEQAIJMKL matrix class.
4   This class is derived from the MATSEQAIJ class and retains the
5   compressed row storage (aka Yale sparse matrix format) but uses
6   sparse BLAS operations from the Intel Math Kernel Library (MKL)
7   wherever possible.
8 */
9 
10 #include <../src/mat/impls/aij/seq/aij.h>
11 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h>
12 #include <mkl_spblas.h>
13 
14 typedef struct {
15   PetscBool           no_SpMV2;  /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */
16   PetscBool           eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */
17   PetscBool           sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
18   PetscObjectState    state;
19 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
20   sparse_matrix_t     csrA; /* "Handle" used by SpMV2 inspector-executor routines. */
21   struct matrix_descr descr;
22 #endif
23 } Mat_SeqAIJMKL;
24 
25 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType);
26 
27 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat)
28 {
29   /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */
30   /* so we will ignore 'MatType type'. */
31   PetscErrorCode ierr;
32   Mat            B       = *newmat;
33 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
34   Mat_SeqAIJMKL  *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
35 #endif
36 
37   PetscFunctionBegin;
38   if (reuse == MAT_INITIAL_MATRIX) {
39     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
40   }
41 
42   /* Reset the original function pointers. */
43   B->ops->duplicate               = MatDuplicate_SeqAIJ;
44   B->ops->assemblyend             = MatAssemblyEnd_SeqAIJ;
45   B->ops->destroy                 = MatDestroy_SeqAIJ;
46   B->ops->mult                    = MatMult_SeqAIJ;
47   B->ops->multtranspose           = MatMultTranspose_SeqAIJ;
48   B->ops->multadd                 = MatMultAdd_SeqAIJ;
49   B->ops->multtransposeadd        = MatMultTransposeAdd_SeqAIJ;
50   B->ops->productsetfromoptions   = MatProductSetFromOptions_SeqAIJ;
51   B->ops->matmultsymbolic         = MatMatMultSymbolic_SeqAIJ_SeqAIJ;
52   B->ops->matmultnumeric          = MatMatMultNumeric_SeqAIJ_SeqAIJ;
53   B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ;
54   B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ;
55   B->ops->ptapnumeric             = MatPtAPNumeric_SeqAIJ_SeqAIJ;
56 
57   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);CHKERRQ(ierr);
58 
59 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
60   if (!aijmkl->no_SpMV2) {
61 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
62     ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr);
63 #endif
64   }
65 
66   /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this
67    * simply involves destroying the MKL sparse matrix handle and then freeing
68    * the spptr pointer. */
69   if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr;
70 
71   if (aijmkl->sparse_optimized) {
72     sparse_status_t stat;
73     stat = mkl_sparse_destroy(aijmkl->csrA);
74     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set hints/complete mkl_sparse_optimize");
75   }
76 #endif
77   ierr = PetscFree(B->spptr);CHKERRQ(ierr);
78 
79   /* Change the type of B to MATSEQAIJ. */
80   ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr);
81 
82   *newmat = B;
83   PetscFunctionReturn(0);
84 }
85 
86 PetscErrorCode MatDestroy_SeqAIJMKL(Mat A)
87 {
88   PetscErrorCode ierr;
89   Mat_SeqAIJMKL  *aijmkl = (Mat_SeqAIJMKL*) A->spptr;
90 
91   PetscFunctionBegin;
92 
93   /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an
94    * spptr pointer. */
95   if (aijmkl) {
96     /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */
97 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
98     if (aijmkl->sparse_optimized) {
99       sparse_status_t stat = SPARSE_STATUS_SUCCESS;
100       stat = mkl_sparse_destroy(aijmkl->csrA);
101       if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy");
102     }
103 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
104     ierr = PetscFree(A->spptr);CHKERRQ(ierr);
105   }
106 
107   /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
108    * to destroy everything that remains. */
109   ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr);
110   /* Note that I don't call MatSetType().  I believe this is because that
111    * is only to be called when *building* a matrix.  I could be wrong, but
112    * that is how things work for the SuperLU matrix class. */
113   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
114   PetscFunctionReturn(0);
115 }
116 
117 /* MatSeqAIJMKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it,
118  * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize().
119  * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix
120  * handle, creates a new one, and then calls mkl_sparse_optimize().
121  * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been
122  * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of
123  * an unoptimized matrix handle here. */
124 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A)
125 {
126 #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
127   /* If the MKL library does not have mkl_sparse_optimize(), then this routine
128    * does nothing. We make it callable anyway in this case because it cuts
129    * down on littering the code with #ifdefs. */
130   PetscFunctionBegin;
131   PetscFunctionReturn(0);
132 #else
133   Mat_SeqAIJ       *a = (Mat_SeqAIJ*)A->data;
134   Mat_SeqAIJMKL    *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
135   PetscInt         m,n;
136   MatScalar        *aa;
137   PetscInt         *aj,*ai;
138   sparse_status_t  stat;
139   PetscErrorCode   ierr;
140 
141   PetscFunctionBegin;
142 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
143   /* For MKL versions that still support the old, non-inspector-executor interfaces versions, we simply exit here if the no_SpMV2
144    * option has been specified. For versions that have deprecated the old interfaces (version 18, update 2 and later), we must
145    * use the new inspector-executor interfaces, but we can still use the old, non-inspector-executor code by not calling
146    * mkl_sparse_optimize() later. */
147   if (aijmkl->no_SpMV2) PetscFunctionReturn(0);
148 #endif
149 
150   if (aijmkl->sparse_optimized) {
151     /* Matrix has been previously assembled and optimized. Must destroy old
152      * matrix handle before running the optimization step again. */
153     stat = mkl_sparse_destroy(aijmkl->csrA);
154     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy");
155   }
156   aijmkl->sparse_optimized = PETSC_FALSE;
157 
158   /* Now perform the SpMV2 setup and matrix optimization. */
159   aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
160   aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
161   aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
162   m = A->rmap->n;
163   n = A->cmap->n;
164   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
165   aa   = a->a;  /* Nonzero elements stored row-by-row. */
166   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
167   if ((a->nz!=0) && aa && !(A->structure_only)) {
168     /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
169      * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
170     stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa);
171     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle");
172     stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
173     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set mv_hint");
174     stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
175     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set memory_hint");
176     if (!aijmkl->no_SpMV2) {
177       stat = mkl_sparse_optimize(aijmkl->csrA);
178       if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_optimize");
179     }
180     aijmkl->sparse_optimized = PETSC_TRUE;
181     ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr);
182   } else {
183     aijmkl->csrA = PETSC_NULL;
184   }
185 
186   PetscFunctionReturn(0);
187 #endif
188 }
189 
190 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
191 /* Take an already created but empty matrix and set up the nonzero structure from an MKL sparse matrix handle. */
192 static PetscErrorCode MatSeqAIJMKL_setup_structure_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,PetscInt nrows,PetscInt ncols,Mat A)
193 {
194   PetscErrorCode      ierr;
195   sparse_status_t     stat;
196   sparse_index_base_t indexing;
197   PetscInt            m,n;
198   PetscInt            *aj,*ai,*dummy;
199   MatScalar           *aa;
200   Mat_SeqAIJMKL       *aijmkl;
201 
202   if (csrA) {
203     /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
204     stat = mkl_sparse_x_export_csr(csrA,&indexing,&m,&n,&ai,&dummy,&aj,&aa);
205     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");
206     if ((m != nrows) || (n != ncols)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Number of rows/columns does not match those from mkl_sparse_x_export_csr()");
207   } else {
208     aj = ai = PETSC_NULL;
209     aa = PETSC_NULL;
210   }
211 
212   ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
213   ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,nrows,ncols);CHKERRQ(ierr);
214   /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported
215    * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and
216    * they will be destroyed when the MKL handle is destroyed.
217    * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */
218   if (csrA) {
219     ierr = MatSeqAIJSetPreallocationCSR(A,ai,aj,NULL);CHKERRQ(ierr);
220   } else {
221     /* Since MatSeqAIJSetPreallocationCSR does initial set up and assembly begin/end, we must do that ourselves here. */
222     ierr = MatSetUp(A);CHKERRQ(ierr);
223     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
224     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
225   }
226 
227   /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle.
228    * Now turn it into a MATSEQAIJMKL. */
229   ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
230 
231   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
232   aijmkl->csrA = csrA;
233 
234   /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but
235    * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one,
236    * and just need to be able to run the MKL optimization step. */
237   aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
238   aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
239   aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
240   if (csrA) {
241     stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
242     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set mv_hint");
243     stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
244     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set memory_hint");
245   }
246   ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr);
247 
248   PetscFunctionReturn(0);
249 }
250 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
251 
252 
253 /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle.
254  * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in
255  * MatMatMultNumeric(). */
256 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
257 static PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A)
258 {
259   PetscInt            i;
260   PetscInt            nrows,ncols;
261   PetscInt            nz;
262   PetscInt            *ai,*aj,*dummy;
263   PetscScalar         *aa;
264   PetscErrorCode      ierr;
265   Mat_SeqAIJMKL       *aijmkl;
266   sparse_status_t     stat;
267   sparse_index_base_t indexing;
268 
269   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
270 
271   /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */
272   if (!aijmkl->csrA) PetscFunctionReturn(0);
273 
274   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
275   stat = mkl_sparse_x_export_csr(aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);
276   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");
277 
278   /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc
279    * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */
280   for (i=0; i<nrows; i++) {
281     nz = ai[i+1] - ai[i];
282     ierr = MatSetValues_SeqAIJ(A, 1, &i, nz, aj+ai[i], aa+ai[i], INSERT_VALUES);CHKERRQ(ierr);
283   }
284 
285   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
286   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
287 
288   ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr);
289   /* We mark our matrix as having a valid, optimized MKL handle.
290    * TODO: It is valid, but I am not sure if it is optimized. Need to ask MKL developers. */
291   aijmkl->sparse_optimized = PETSC_TRUE;
292 
293   PetscFunctionReturn(0);
294 }
295 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
296 
297 PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
298 {
299   PetscErrorCode ierr;
300   Mat_SeqAIJMKL  *aijmkl;
301   Mat_SeqAIJMKL  *aijmkl_dest;
302 
303   PetscFunctionBegin;
304   ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr);
305   aijmkl      = (Mat_SeqAIJMKL*) A->spptr;
306   aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr;
307   ierr = PetscArraycpy(aijmkl_dest,aijmkl,1);CHKERRQ(ierr);
308   aijmkl_dest->sparse_optimized = PETSC_FALSE;
309   if (aijmkl->eager_inspection) {
310     ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
311   }
312   PetscFunctionReturn(0);
313 }
314 
315 PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode)
316 {
317   PetscErrorCode  ierr;
318   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
319   Mat_SeqAIJMKL   *aijmkl;
320 
321   PetscFunctionBegin;
322   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
323 
324   /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some
325    * extra information and some different methods, call the AssemblyEnd
326    * routine for a MATSEQAIJ.
327    * I'm not sure if this is the best way to do this, but it avoids
328    * a lot of code duplication. */
329   a->inode.use = PETSC_FALSE;  /* Must disable: otherwise the MKL routines won't get used. */
330   ierr = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr);
331 
332   /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks).
333    * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */
334   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
335   if (aijmkl->eager_inspection) {
336     ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
337   }
338 
339   PetscFunctionReturn(0);
340 }
341 
342 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
343 PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy)
344 {
345   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
346   const PetscScalar *x;
347   PetscScalar       *y;
348   const MatScalar   *aa;
349   PetscErrorCode    ierr;
350   PetscInt          m = A->rmap->n;
351   PetscInt          n = A->cmap->n;
352   PetscScalar       alpha = 1.0;
353   PetscScalar       beta = 0.0;
354   const PetscInt    *aj,*ai;
355   char              matdescra[6];
356 
357 
358   /* Variables not in MatMult_SeqAIJ. */
359   char transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
360 
361   PetscFunctionBegin;
362   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
363   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
364   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
365   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
366   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
367   aa   = a->a;  /* Nonzero elements stored row-by-row. */
368   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
369 
370   /* Call MKL sparse BLAS routine to do the MatMult. */
371   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
372 
373   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
374   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
375   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
376   PetscFunctionReturn(0);
377 }
378 #endif
379 
380 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
381 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
382 {
383   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
384   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
385   const PetscScalar *x;
386   PetscScalar       *y;
387   PetscErrorCode    ierr;
388   sparse_status_t   stat = SPARSE_STATUS_SUCCESS;
389   PetscObjectState  state;
390 
391   PetscFunctionBegin;
392 
393   /* If there are no nonzero entries, zero yy and return immediately. */
394   if(!a->nz) {
395     PetscInt i;
396     PetscInt m=A->rmap->n;
397     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
398     for (i=0; i<m; i++) {
399       y[i] = 0.0;
400     }
401     ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
402     PetscFunctionReturn(0);
403   }
404 
405   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
406   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
407 
408   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
409    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
410    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
411   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
412   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
413     MatSeqAIJMKL_create_mkl_handle(A);
414   }
415 
416   /* Call MKL SpMV2 executor routine to do the MatMult. */
417   stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
418   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
419 
420   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
421   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
422   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
423   PetscFunctionReturn(0);
424 }
425 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
426 
427 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
428 PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy)
429 {
430   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
431   const PetscScalar *x;
432   PetscScalar       *y;
433   const MatScalar   *aa;
434   PetscErrorCode    ierr;
435   PetscInt          m = A->rmap->n;
436   PetscInt          n = A->cmap->n;
437   PetscScalar       alpha = 1.0;
438   PetscScalar       beta = 0.0;
439   const PetscInt    *aj,*ai;
440   char              matdescra[6];
441 
442   /* Variables not in MatMultTranspose_SeqAIJ. */
443   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
444 
445   PetscFunctionBegin;
446   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
447   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
448   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
449   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
450   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
451   aa   = a->a;  /* Nonzero elements stored row-by-row. */
452   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
453 
454   /* Call MKL sparse BLAS routine to do the MatMult. */
455   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
456 
457   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
458   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
459   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
460   PetscFunctionReturn(0);
461 }
462 #endif
463 
464 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
465 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
466 {
467   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
468   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
469   const PetscScalar *x;
470   PetscScalar       *y;
471   PetscErrorCode    ierr;
472   sparse_status_t   stat;
473   PetscObjectState  state;
474 
475   PetscFunctionBegin;
476 
477   /* If there are no nonzero entries, zero yy and return immediately. */
478   if(!a->nz) {
479     PetscInt i;
480     PetscInt n=A->cmap->n;
481     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
482     for (i=0; i<n; i++) {
483       y[i] = 0.0;
484     }
485     ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
486     PetscFunctionReturn(0);
487   }
488 
489   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
490   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
491 
492   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
493    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
494    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
495   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
496   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
497     MatSeqAIJMKL_create_mkl_handle(A);
498   }
499 
500   /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
501   stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
502   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
503 
504   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
505   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
506   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
507   PetscFunctionReturn(0);
508 }
509 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
510 
511 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
512 PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
513 {
514   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
515   const PetscScalar *x;
516   PetscScalar       *y,*z;
517   const MatScalar   *aa;
518   PetscErrorCode    ierr;
519   PetscInt          m = A->rmap->n;
520   PetscInt          n = A->cmap->n;
521   const PetscInt    *aj,*ai;
522   PetscInt          i;
523 
524   /* Variables not in MatMultAdd_SeqAIJ. */
525   char              transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
526   PetscScalar       alpha = 1.0;
527   PetscScalar       beta;
528   char              matdescra[6];
529 
530   PetscFunctionBegin;
531   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
532   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
533 
534   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
535   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
536   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
537   aa   = a->a;  /* Nonzero elements stored row-by-row. */
538   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
539 
540   /* Call MKL sparse BLAS routine to do the MatMult. */
541   if (zz == yy) {
542     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
543     beta = 1.0;
544     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
545   } else {
546     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
547      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
548     beta = 0.0;
549     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
550     for (i=0; i<m; i++) {
551       z[i] += y[i];
552     }
553   }
554 
555   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
556   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
557   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
558   PetscFunctionReturn(0);
559 }
560 #endif
561 
562 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
563 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
564 {
565   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
566   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
567   const PetscScalar *x;
568   PetscScalar       *y,*z;
569   PetscErrorCode    ierr;
570   PetscInt          m = A->rmap->n;
571   PetscInt          i;
572 
573   /* Variables not in MatMultAdd_SeqAIJ. */
574   sparse_status_t   stat = SPARSE_STATUS_SUCCESS;
575   PetscObjectState  state;
576 
577   PetscFunctionBegin;
578 
579   /* If there are no nonzero entries, set zz = yy and return immediately. */
580   if(!a->nz) {
581     PetscInt i;
582     ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
583     for (i=0; i<m; i++) {
584       z[i] = y[i];
585     }
586     ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
587     PetscFunctionReturn(0);
588   }
589 
590   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
591   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
592 
593   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
594    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
595    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
596   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
597   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
598     MatSeqAIJMKL_create_mkl_handle(A);
599   }
600 
601   /* Call MKL sparse BLAS routine to do the MatMult. */
602   if (zz == yy) {
603     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
604      * with alpha and beta both set to 1.0. */
605     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
606     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
607   } else {
608     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
609      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
610     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
611     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
612     for (i=0; i<m; i++) {
613       z[i] += y[i];
614     }
615   }
616 
617   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
618   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
619   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
620   PetscFunctionReturn(0);
621 }
622 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
623 
624 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
625 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
626 {
627   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
628   const PetscScalar *x;
629   PetscScalar       *y,*z;
630   const MatScalar   *aa;
631   PetscErrorCode    ierr;
632   PetscInt          m = A->rmap->n;
633   PetscInt          n = A->cmap->n;
634   const PetscInt    *aj,*ai;
635   PetscInt          i;
636 
637   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
638   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
639   PetscScalar       alpha = 1.0;
640   PetscScalar       beta;
641   char              matdescra[6];
642 
643   PetscFunctionBegin;
644   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
645   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
646 
647   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
648   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
649   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
650   aa   = a->a;  /* Nonzero elements stored row-by-row. */
651   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
652 
653   /* Call MKL sparse BLAS routine to do the MatMult. */
654   if (zz == yy) {
655     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
656     beta = 1.0;
657     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
658   } else {
659     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
660      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
661     beta = 0.0;
662     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
663     for (i=0; i<n; i++) {
664       z[i] += y[i];
665     }
666   }
667 
668   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
669   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
670   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
671   PetscFunctionReturn(0);
672 }
673 #endif
674 
675 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
676 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
677 {
678   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
679   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
680   const PetscScalar *x;
681   PetscScalar       *y,*z;
682   PetscErrorCode    ierr;
683   PetscInt          n = A->cmap->n;
684   PetscInt          i;
685   PetscObjectState  state;
686 
687   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
688   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
689 
690   PetscFunctionBegin;
691 
692   /* If there are no nonzero entries, set zz = yy and return immediately. */
693   if(!a->nz) {
694     PetscInt i;
695     ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
696     for (i=0; i<n; i++) {
697       z[i] = y[i];
698     }
699     ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
700     PetscFunctionReturn(0);
701   }
702 
703   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
704   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
705 
706   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
707    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
708    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
709   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
710   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
711     MatSeqAIJMKL_create_mkl_handle(A);
712   }
713 
714   /* Call MKL sparse BLAS routine to do the MatMult. */
715   if (zz == yy) {
716     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
717      * with alpha and beta both set to 1.0. */
718     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
719     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
720   } else {
721     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
722      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
723     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
724     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
725     for (i=0; i<n; i++) {
726       z[i] += y[i];
727     }
728   }
729 
730   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
731   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
732   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
733   PetscFunctionReturn(0);
734 }
735 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
736 
737 /* -------------------------- MatProduct code -------------------------- */
738 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
739 static PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(Mat A,const sparse_operation_t transA,Mat B,const sparse_operation_t transB,Mat C)
740 {
741   Mat_SeqAIJMKL       *a, *b;
742   sparse_matrix_t     csrA, csrB, csrC;
743   PetscInt            nrows,ncols;
744   PetscErrorCode      ierr;
745   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
746   struct matrix_descr descr_type_gen;
747   PetscObjectState    state;
748 
749   PetscFunctionBegin;
750   /* Determine the number of rows and columns that the result matrix C will have. We have to do this ourselves because MKL does
751    * not handle sparse matrices with zero rows or columns. */
752   if (transA == SPARSE_OPERATION_NON_TRANSPOSE) nrows = A->rmap->N;
753   else nrows = A->cmap->N;
754   if (transB == SPARSE_OPERATION_NON_TRANSPOSE) ncols = B->cmap->N;
755   else ncols = B->rmap->N;
756 
757   a = (Mat_SeqAIJMKL*)A->spptr;
758   b = (Mat_SeqAIJMKL*)B->spptr;
759   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
760   if (!a->sparse_optimized || a->state != state) {
761     MatSeqAIJMKL_create_mkl_handle(A);
762   }
763   ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr);
764   if (!b->sparse_optimized || b->state != state) {
765     MatSeqAIJMKL_create_mkl_handle(B);
766   }
767   csrA = a->csrA;
768   csrB = b->csrA;
769   descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
770 
771   if (csrA && csrB) {
772     stat = mkl_sparse_sp2m(transA,descr_type_gen,csrA,transB,descr_type_gen,csrB,SPARSE_STAGE_FULL_MULT_NO_VAL,&csrC);
773     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete symbolic stage of sparse matrix-matrix multiply");
774   } else {
775     csrC = PETSC_NULL;
776   }
777 
778   ierr = MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF,csrC,nrows,ncols,C);CHKERRQ(ierr);
779 
780   PetscFunctionReturn(0);
781 }
782 
783 PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(Mat A,const sparse_operation_t transA,Mat B,const sparse_operation_t transB,Mat C)
784 {
785   Mat_SeqAIJMKL       *a, *b, *c;
786   sparse_matrix_t     csrA, csrB, csrC;
787   PetscErrorCode      ierr;
788   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
789   struct matrix_descr descr_type_gen;
790   PetscObjectState    state;
791 
792   PetscFunctionBegin;
793   a = (Mat_SeqAIJMKL*)A->spptr;
794   b = (Mat_SeqAIJMKL*)B->spptr;
795   c = (Mat_SeqAIJMKL*)C->spptr;
796   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
797   if (!a->sparse_optimized || a->state != state) {
798     MatSeqAIJMKL_create_mkl_handle(A);
799   }
800   ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr);
801   if (!b->sparse_optimized || b->state != state) {
802     MatSeqAIJMKL_create_mkl_handle(B);
803   }
804   csrA = a->csrA;
805   csrB = b->csrA;
806   csrC = c->csrA;
807   descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
808 
809   if (csrA && csrB) {
810     stat = mkl_sparse_sp2m(transA,descr_type_gen,csrA,transB,descr_type_gen,csrB,SPARSE_STAGE_FINALIZE_MULT,&csrC);
811     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete numerical stage of sparse matrix-matrix multiply");
812   } else {
813     csrC = PETSC_NULL;
814   }
815 
816   /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */
817   ierr = MatSeqAIJMKL_update_from_mkl_handle(C);CHKERRQ(ierr);
818 
819   PetscFunctionReturn(0);
820 }
821 
822 PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C)
823 {
824   PetscErrorCode ierr;
825 
826   PetscFunctionBegin;
827   ierr = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);CHKERRQ(ierr);
828   PetscFunctionReturn(0);
829 }
830 
831 PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C)
832 {
833   PetscErrorCode ierr;
834 
835   PetscFunctionBegin;
836   ierr = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);CHKERRQ(ierr);
837   PetscFunctionReturn(0);
838 }
839 
840 PetscErrorCode MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C)
841 {
842   PetscErrorCode ierr;
843 
844   PetscFunctionBegin;
845   ierr = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);CHKERRQ(ierr);
846   PetscFunctionReturn(0);
847 }
848 
849 PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C)
850 {
851   PetscErrorCode ierr;
852 
853   PetscFunctionBegin;
854   ierr = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_TRANSPOSE,B,SPARSE_OPERATION_NON_TRANSPOSE,C);CHKERRQ(ierr);
855   PetscFunctionReturn(0);
856 }
857 
858 PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C)
859 {
860   PetscErrorCode ierr;
861 
862   PetscFunctionBegin;
863   ierr = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_TRANSPOSE,C);CHKERRQ(ierr);
864   PetscFunctionReturn(0);
865 }
866 
867 PetscErrorCode MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C)
868 {
869   PetscErrorCode ierr;
870 
871   PetscFunctionBegin;
872   ierr = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A,SPARSE_OPERATION_NON_TRANSPOSE,B,SPARSE_OPERATION_TRANSPOSE,C);CHKERRQ(ierr);
873   PetscFunctionReturn(0);
874 }
875 
876 static PetscErrorCode MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL(Mat C)
877 {
878   PetscErrorCode ierr;
879   Mat_Product    *product = C->product;
880   Mat            A = product->A,B = product->B;
881 
882   PetscFunctionBegin;
883   ierr = MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(A,B,C);CHKERRQ(ierr);
884   PetscFunctionReturn(0);
885 }
886 
887 static PetscErrorCode MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL(Mat C)
888 {
889   PetscErrorCode ierr;
890   Mat_Product    *product = C->product;
891   Mat            A = product->A,B = product->B;
892   PetscReal      fill = product->fill;
893 
894   PetscFunctionBegin;
895   ierr = MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(A,B,fill,C);CHKERRQ(ierr);
896 
897   C->ops->productnumeric = MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL;
898   PetscFunctionReturn(0);
899 }
900 
901 PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat P,Mat C)
902 {
903   Mat                 Ct;
904   Vec                 zeros;
905   Mat_SeqAIJMKL       *a, *p, *c;
906   sparse_matrix_t     csrA, csrP, csrC;
907   PetscBool           set, flag;
908   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
909   struct matrix_descr descr_type_sym;
910   PetscObjectState    state;
911   PetscErrorCode      ierr;
912 
913   PetscFunctionBegin;
914   ierr = MatIsSymmetricKnown(A,&set,&flag);
915   if (!set || (set && !flag)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL() called on matrix A not marked as symmetric");
916 
917   a = (Mat_SeqAIJMKL*)A->spptr;
918   p = (Mat_SeqAIJMKL*)P->spptr;
919   c = (Mat_SeqAIJMKL*)C->spptr;
920   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
921   if (!a->sparse_optimized || a->state != state) {
922     MatSeqAIJMKL_create_mkl_handle(A);
923   }
924   ierr = PetscObjectStateGet((PetscObject)P,&state);CHKERRQ(ierr);
925   if (!p->sparse_optimized || p->state != state) {
926     MatSeqAIJMKL_create_mkl_handle(P);
927   }
928   csrA = a->csrA;
929   csrP = p->csrA;
930   csrC = c->csrA;
931   descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
932   descr_type_sym.mode = SPARSE_FILL_MODE_UPPER;
933   descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
934 
935   /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
936   stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FINALIZE_MULT);
937   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to finalize mkl_sparse_sypr");
938 
939   /* Update the PETSc AIJ representation for matrix C from contents of MKL handle.
940    * This is more complicated than it should be: it turns out that, though mkl_sparse_sypr() will accept a full AIJ/CSR matrix,
941    * the output matrix only contains the upper or lower triangle (we arbitrarily have chosen upper) of the symmetric matrix.
942    * We have to fill in the missing portion, which we currently do below by forming the tranpose and performing at MatAXPY
943    * operation. This may kill any performance benefit of using the optimized mkl_sparse_sypr() routine. Performance might
944    * improve if we come up with a more efficient way to do this, or we can convince the MKL team to provide an option to output
945    * the full matrix. */
946   ierr = MatSeqAIJMKL_update_from_mkl_handle(C);CHKERRQ(ierr);
947   ierr = MatTranspose(C,MAT_INITIAL_MATRIX,&Ct);CHKERRQ(ierr);
948   ierr = MatCreateVecs(C,&zeros,NULL);CHKERRQ(ierr);
949   ierr = VecSetFromOptions(zeros);CHKERRQ(ierr);
950   ierr = VecZeroEntries(zeros);CHKERRQ(ierr);
951   ierr = MatDiagonalSet(Ct,zeros,INSERT_VALUES);CHKERRQ(ierr);
952   ierr = MatAXPY(C,1.0,Ct,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
953   /* Note: The MatAXPY() call destroys the MatProduct, so we must recreate it. */
954   ierr = MatProductCreateWithMat(A,P,PETSC_NULL,C);CHKERRQ(ierr);
955   C->product->type = MATPRODUCT_PtAP;
956   ierr = MatSeqAIJMKL_create_mkl_handle(C);CHKERRQ(ierr);
957   ierr = VecDestroy(&zeros);CHKERRQ(ierr);
958   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
959 
960   PetscFunctionReturn(0);
961 }
962 
963 PetscErrorCode MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat C)
964 {
965   Mat_Product         *product = C->product;
966   Mat                 A = product->A,P = product->B;
967   Mat_SeqAIJMKL       *a,*p;
968   sparse_matrix_t     csrA,csrP,csrC;
969   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
970   struct matrix_descr descr_type_sym;
971   PetscObjectState    state;
972   PetscErrorCode      ierr;
973 
974   PetscFunctionBegin;
975   a = (Mat_SeqAIJMKL*)A->spptr;
976   p = (Mat_SeqAIJMKL*)P->spptr;
977   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
978   if (!a->sparse_optimized || a->state != state) {
979     MatSeqAIJMKL_create_mkl_handle(A);
980   }
981   ierr = PetscObjectStateGet((PetscObject)P,&state);CHKERRQ(ierr);
982   if (!p->sparse_optimized || p->state != state) {
983     MatSeqAIJMKL_create_mkl_handle(P);
984   }
985   csrA = a->csrA;
986   csrP = p->csrA;
987   descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
988   descr_type_sym.mode = SPARSE_FILL_MODE_UPPER;
989   descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
990 
991   /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
992   if (csrP && csrA) {
993     stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FULL_MULT_NO_VAL);
994     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to perform symbolic mkl_sparse_sypr");
995   } else {
996     csrC = PETSC_NULL;
997   }
998 
999   /* Update the I and J arrays of the PETSc AIJ representation for matrix C from contents of MKL handle.
1000    * Note that, because mkl_sparse_sypr() only computes one triangle of the symmetric matrix, this representation will only contain
1001    * the upper triangle of the symmetric matrix. We fix this in MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL(). I believe that leaving things
1002    * in this incomplete state is OK because the numeric product should follow soon after, but am not certain if this is
1003    * guaranteed. */
1004   ierr = MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF,csrC,P->cmap->N,P->cmap->N,C);CHKERRQ(ierr);
1005 
1006   C->ops->productnumeric = MatProductNumeric_PtAP;
1007   PetscFunctionReturn(0);
1008 }
1009 
1010 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AB(Mat C)
1011 {
1012   PetscFunctionBegin;
1013   C->ops->productsymbolic = MatProductSymbolic_AB;
1014   C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL;
1015   PetscFunctionReturn(0);
1016 }
1017 
1018 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AtB(Mat C)
1019 {
1020   PetscFunctionBegin;
1021   C->ops->productsymbolic = MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL;
1022   PetscFunctionReturn(0);
1023 }
1024 
1025 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABt(Mat C)
1026 {
1027   PetscFunctionBegin;
1028   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ;
1029   C->ops->productsymbolic          = MatProductSymbolic_ABt;
1030   PetscFunctionReturn(0);
1031 }
1032 
1033 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_PtAP(Mat C)
1034 {
1035   PetscErrorCode ierr;
1036   Mat_Product    *product = C->product;
1037   Mat            A = product->A;
1038   PetscBool      set, flag;
1039 
1040   PetscFunctionBegin;
1041 #if defined(PETSC_USE_COMPLEX)
1042   /* By setting C->ops->productsymbolic to NULL, we ensure that MatProductSymbolic_Basic() will be used.
1043    * We do this in several other locations in this file. This works for the time being, but the _Basic()
1044    * routines are considered unsafe and may be removed from the MatProduct code in the future.
1045    * TODO: Add proper MATSEQAIJMKL implementations, instead of relying on the _Basic() routines. */
1046   C->ops->productsymbolic = NULL;
1047 #else
1048   /* AIJMKL only has an optimized routine for PtAP when A is symmetric and real. */
1049   ierr = MatIsSymmetricKnown(A,&set,&flag);CHKERRQ(ierr);
1050   if (set && flag) {
1051     C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal;
1052     PetscFunctionReturn(0);
1053   } else {
1054     C->ops->productsymbolic = NULL; /* MatProductSymbolic_Basic() will be used. */
1055   }
1056   /* Note that we don't set C->ops->productnumeric here, as this has must happen in MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL(),
1057    * depending on whether the algorithm for the general case vs. the real symmetric one is used. */
1058 #endif
1059   PetscFunctionReturn(0);
1060 }
1061 
1062 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_RARt(Mat C)
1063 {
1064   PetscFunctionBegin;
1065   C->ops->productsymbolic = NULL; /* MatProductSymbolic_Basic() will be used. */
1066   PetscFunctionReturn(0);
1067 }
1068 
1069 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABC(Mat C)
1070 {
1071   PetscFunctionBegin;
1072   C->ops->productsymbolic = NULL; /* MatProductSymbolic_Basic() will be used. */
1073   PetscFunctionReturn(0);
1074 }
1075 
1076 PetscErrorCode MatProductSetFromOptions_SeqAIJMKL(Mat C)
1077 {
1078   PetscErrorCode ierr;
1079   Mat_Product    *product = C->product;
1080 
1081   PetscFunctionBegin;
1082   switch (product->type) {
1083   case MATPRODUCT_AB:
1084     ierr = MatProductSetFromOptions_SeqAIJMKL_AB(C);CHKERRQ(ierr);
1085     break;
1086   case MATPRODUCT_AtB:
1087     ierr = MatProductSetFromOptions_SeqAIJMKL_AtB(C);CHKERRQ(ierr);
1088     break;
1089   case MATPRODUCT_ABt:
1090     ierr = MatProductSetFromOptions_SeqAIJMKL_ABt(C);CHKERRQ(ierr);
1091     break;
1092   case MATPRODUCT_PtAP:
1093     ierr = MatProductSetFromOptions_SeqAIJMKL_PtAP(C);CHKERRQ(ierr);
1094     break;
1095   case MATPRODUCT_RARt:
1096     ierr = MatProductSetFromOptions_SeqAIJMKL_RARt(C);CHKERRQ(ierr);
1097     break;
1098   case MATPRODUCT_ABC:
1099     ierr = MatProductSetFromOptions_SeqAIJMKL_ABC(C);CHKERRQ(ierr);
1100     break;
1101   default:
1102     break;
1103   }
1104   PetscFunctionReturn(0);
1105 }
1106 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
1107 /* ------------------------ End MatProduct code ------------------------ */
1108 
1109 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a
1110  * SeqAIJMKL matrix.  This routine is called by the MatCreate_SeqAIJMKL()
1111  * routine, but can also be used to convert an assembled SeqAIJ matrix
1112  * into a SeqAIJMKL one. */
1113 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
1114 {
1115   PetscErrorCode ierr;
1116   Mat            B = *newmat;
1117   Mat_SeqAIJMKL  *aijmkl;
1118   PetscBool      set;
1119   PetscBool      sametype;
1120 
1121   PetscFunctionBegin;
1122   if (reuse == MAT_INITIAL_MATRIX) {
1123     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
1124   }
1125 
1126   ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr);
1127   if (sametype) PetscFunctionReturn(0);
1128 
1129   ierr     = PetscNewLog(B,&aijmkl);CHKERRQ(ierr);
1130   B->spptr = (void*) aijmkl;
1131 
1132   /* Set function pointers for methods that we inherit from AIJ but override.
1133    * We also parse some command line options below, since those determine some of the methods we point to. */
1134   B->ops->duplicate        = MatDuplicate_SeqAIJMKL;
1135   B->ops->assemblyend      = MatAssemblyEnd_SeqAIJMKL;
1136   B->ops->destroy          = MatDestroy_SeqAIJMKL;
1137 
1138   aijmkl->sparse_optimized = PETSC_FALSE;
1139 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1140   aijmkl->no_SpMV2 = PETSC_FALSE;  /* Default to using the SpMV2 routines if our MKL supports them. */
1141 #else
1142   aijmkl->no_SpMV2 = PETSC_TRUE;
1143 #endif
1144   aijmkl->eager_inspection = PETSC_FALSE;
1145 
1146   /* Parse command line options. */
1147   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");CHKERRQ(ierr);
1148   ierr = PetscOptionsBool("-mat_aijmkl_no_spmv2","Disable use of inspector-executor (SpMV 2) routines","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);CHKERRQ(ierr);
1149   ierr = PetscOptionsBool("-mat_aijmkl_eager_inspection","Run inspection at matrix assembly time, instead of waiting until needed by an operation","None",(PetscBool)aijmkl->eager_inspection,(PetscBool*)&aijmkl->eager_inspection,&set);CHKERRQ(ierr);
1150   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1151 #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1152   if(!aijmkl->no_SpMV2) {
1153     ierr = PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize();  defaulting to non-SpMV2 routines.\n");
1154     aijmkl->no_SpMV2 = PETSC_TRUE;
1155   }
1156 #endif
1157 
1158 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1159   B->ops->mult                    = MatMult_SeqAIJMKL_SpMV2;
1160   B->ops->multtranspose           = MatMultTranspose_SeqAIJMKL_SpMV2;
1161   B->ops->multadd                 = MatMultAdd_SeqAIJMKL_SpMV2;
1162   B->ops->multtransposeadd        = MatMultTransposeAdd_SeqAIJMKL_SpMV2;
1163 # if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
1164   B->ops->productsetfromoptions   = MatProductSetFromOptions_SeqAIJMKL;
1165   B->ops->matmultsymbolic         = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL;
1166   B->ops->matmultnumeric          = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL;
1167   B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL;
1168   B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL;
1169 #   if !defined(PETSC_USE_COMPLEX)
1170   B->ops->ptapnumeric             = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL;
1171 #   else
1172   B->ops->ptapnumeric             = NULL;
1173 #   endif
1174 # endif
1175 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
1176 
1177 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
1178   /* In MKL version 18, update 2, the old sparse BLAS interfaces were marked as deprecated. If "no_SpMV2" has been specified by the
1179    * user and the old SpBLAS interfaces are deprecated in our MKL version, we use the new _SpMV2 routines (set above), but do not
1180    * call mkl_sparse_optimize(), which results in the old numerical kernels (without the inspector-executor model) being used. For
1181    * versions in which the older interface has not been deprecated, we use the old interface. */
1182   if (aijmkl->no_SpMV2) {
1183     B->ops->mult             = MatMult_SeqAIJMKL;
1184     B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL;
1185     B->ops->multadd          = MatMultAdd_SeqAIJMKL;
1186     B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL;
1187   }
1188 #endif
1189 
1190   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr);
1191 
1192   if(!aijmkl->no_SpMV2) {
1193 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1194 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
1195     ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_seqaijmkl_seqaijmkl_C",MatProductSetFromOptions_SeqAIJMKL);CHKERRQ(ierr);
1196 #endif
1197 #endif
1198   }
1199 
1200   ierr    = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr);
1201   *newmat = B;
1202   PetscFunctionReturn(0);
1203 }
1204 
1205 /*@C
1206    MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL.
1207    This type inherits from AIJ and is largely identical, but uses sparse BLAS
1208    routines from Intel MKL whenever possible.
1209    If the installed version of MKL supports the "SpMV2" sparse
1210    inspector-executor routines, then those are used by default.
1211    MatMult, MatMultAdd, MatMultTranspose, MatMultTransposeAdd, MatMatMult, MatTransposeMatMult, and MatPtAP (for
1212    symmetric A) operations are currently supported.
1213    Note that MKL version 18, update 2 or later is required for MatPtAP/MatPtAPNumeric and MatMatMultNumeric.
1214 
1215    Collective
1216 
1217    Input Parameters:
1218 +  comm - MPI communicator, set to PETSC_COMM_SELF
1219 .  m - number of rows
1220 .  n - number of columns
1221 .  nz - number of nonzeros per row (same for all rows)
1222 -  nnz - array containing the number of nonzeros in the various rows
1223          (possibly different for each row) or NULL
1224 
1225    Output Parameter:
1226 .  A - the matrix
1227 
1228    Options Database Keys:
1229 +  -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines
1230 -  -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, performing this step the first time the matrix is applied
1231 
1232    Notes:
1233    If nnz is given then nz is ignored
1234 
1235    Level: intermediate
1236 
1237 .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues()
1238 @*/
1239 PetscErrorCode  MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
1240 {
1241   PetscErrorCode ierr;
1242 
1243   PetscFunctionBegin;
1244   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1245   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
1246   ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr);
1247   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr);
1248   PetscFunctionReturn(0);
1249 }
1250 
1251 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A)
1252 {
1253   PetscErrorCode ierr;
1254 
1255   PetscFunctionBegin;
1256   ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
1257   ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
1258   PetscFunctionReturn(0);
1259 }
1260