xref: /petsc/src/mat/impls/aij/seq/aijmkl/aijmkl.c (revision 37eeb8152ec6a2cf24186d3591c2c5de5dfd8fa5)
1 /*
2   Defines basic operations for the MATSEQAIJMKL matrix class.
3   This class is derived from the MATSEQAIJ class and retains the
4   compressed row storage (aka Yale sparse matrix format) but uses
5   sparse BLAS operations from the Intel Math Kernel Library (MKL)
6   wherever possible.
7 */
8 
9 #include <../src/mat/impls/aij/seq/aij.h>
10 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h>
11 
12 /* MKL include files. */
13 #include <mkl_spblas.h>  /* Sparse BLAS */
14 
15 typedef struct {
16   PetscBool no_SpMV2;  /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */
17   PetscBool eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */
18   PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
19   PetscObjectState state;
20 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
21   sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */
22   struct matrix_descr descr;
23 #endif
24 } Mat_SeqAIJMKL;
25 
26 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType);
27 
28 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat)
29 {
30   /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */
31   /* so we will ignore 'MatType type'. */
32   PetscErrorCode ierr;
33   Mat            B       = *newmat;
34 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
35   Mat_SeqAIJMKL  *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
36 #endif
37 
38   PetscFunctionBegin;
39   if (reuse == MAT_INITIAL_MATRIX) {
40     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
41   }
42 
43   /* Reset the original function pointers. */
44   B->ops->duplicate        = MatDuplicate_SeqAIJ;
45   B->ops->assemblyend      = MatAssemblyEnd_SeqAIJ;
46   B->ops->destroy          = MatDestroy_SeqAIJ;
47   B->ops->mult             = MatMult_SeqAIJ;
48   B->ops->multtranspose    = MatMultTranspose_SeqAIJ;
49   B->ops->multadd          = MatMultAdd_SeqAIJ;
50   B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ;
51   B->ops->matmult          = MatMatMult_SeqAIJ_SeqAIJ;
52   B->ops->matmultnumeric   = MatMatMultNumeric_SeqAIJ_SeqAIJ;
53   B->ops->ptap             = MatPtAP_SeqAIJ_SeqAIJ;
54   B->ops->ptapnumeric      = MatPtAPNumeric_SeqAIJ_SeqAIJ;
55   B->ops->transposematmult = MatTransposeMatMult_SeqAIJ_SeqAIJ;
56 
57   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);CHKERRQ(ierr);
58   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr);
59   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr);
60   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr);
61   ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_seqaijmkl_C",NULL);CHKERRQ(ierr);
62 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
63   if(!aijmkl->no_SpMV2) {
64     ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr);
65 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
66     ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr);
67 #endif
68     ierr = PetscObjectComposeFunction((PetscObject)B,"MatTransposeMatMult_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr);
69   }
70 
71   /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this
72    * simply involves destroying the MKL sparse matrix handle and then freeing
73    * the spptr pointer. */
74   if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr;
75 
76   if (aijmkl->sparse_optimized) {
77     sparse_status_t stat;
78     stat = mkl_sparse_destroy(aijmkl->csrA);
79     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set hints/complete mkl_sparse_optimize");
80   }
81 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
82   ierr = PetscFree(B->spptr);CHKERRQ(ierr);
83 
84   /* Change the type of B to MATSEQAIJ. */
85   ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr);
86 
87   *newmat = B;
88   PetscFunctionReturn(0);
89 }
90 
91 PetscErrorCode MatDestroy_SeqAIJMKL(Mat A)
92 {
93   PetscErrorCode ierr;
94   Mat_SeqAIJMKL  *aijmkl = (Mat_SeqAIJMKL*) A->spptr;
95 
96   PetscFunctionBegin;
97 
98   /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an
99    * spptr pointer. */
100   if (aijmkl) {
101     /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */
102 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
103     if (aijmkl->sparse_optimized) {
104       sparse_status_t stat = SPARSE_STATUS_SUCCESS;
105       stat = mkl_sparse_destroy(aijmkl->csrA);
106       if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy");
107     }
108 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
109     ierr = PetscFree(A->spptr);CHKERRQ(ierr);
110   }
111 
112   /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
113    * to destroy everything that remains. */
114   ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr);
115   /* Note that I don't call MatSetType().  I believe this is because that
116    * is only to be called when *building* a matrix.  I could be wrong, but
117    * that is how things work for the SuperLU matrix class. */
118   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
119   PetscFunctionReturn(0);
120 }
121 
122 /* MatSeqAIJKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it,
123  * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize().
124  * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix
125  * handle, creates a new one, and then calls mkl_sparse_optimize().
126  * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been
127  * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of
128  * an unoptimized matrix handle here. */
129 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A)
130 {
131 #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
132   /* If the MKL library does not have mkl_sparse_optimize(), then this routine
133    * does nothing. We make it callable anyway in this case because it cuts
134    * down on littering the code with #ifdefs. */
135   PetscFunctionBegin;
136   PetscFunctionReturn(0);
137 #else
138   Mat_SeqAIJ       *a = (Mat_SeqAIJ*)A->data;
139   Mat_SeqAIJMKL    *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
140   PetscInt         m,n;
141   MatScalar        *aa;
142   PetscInt         *aj,*ai;
143   sparse_status_t  stat;
144   PetscErrorCode   ierr;
145 
146   PetscFunctionBegin;
147 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
148   /* For MKL versions that still support the old, non-inspector-executor interfaces versions, we simply exit here if the no_SpMV2
149    * option has been specified. For versions that have deprecated the old interfaces (version 18, update 2 and later), we must
150    * use the new inspector-executor interfaces, but we can still use the old, non-inspector-executor code by not calling
151    * mkl_sparse_optimize() later. */
152   if (aijmkl->no_SpMV2) PetscFunctionReturn(0);
153 #endif
154 
155   if (aijmkl->sparse_optimized) {
156     /* Matrix has been previously assembled and optimized. Must destroy old
157      * matrix handle before running the optimization step again. */
158     stat = mkl_sparse_destroy(aijmkl->csrA);
159     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy");
160   }
161   aijmkl->sparse_optimized = PETSC_FALSE;
162 
163   /* Now perform the SpMV2 setup and matrix optimization. */
164   aijmkl->descr.type        = SPARSE_MATRIX_TYPE_GENERAL;
165   aijmkl->descr.mode        = SPARSE_FILL_MODE_LOWER;
166   aijmkl->descr.diag        = SPARSE_DIAG_NON_UNIT;
167   m = A->rmap->n;
168   n = A->cmap->n;
169   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
170   aa   = a->a;  /* Nonzero elements stored row-by-row. */
171   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
172   if ((a->nz!=0) & !(A->structure_only)) {
173     /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
174      * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
175     stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa);
176     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle");
177     stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
178     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set mv_hint");
179     stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
180     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set memory_hint");
181     if (!aijmkl->no_SpMV2) {
182       stat = mkl_sparse_optimize(aijmkl->csrA);
183       if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_optimize");
184     }
185     aijmkl->sparse_optimized = PETSC_TRUE;
186     ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr);
187   }
188 
189   PetscFunctionReturn(0);
190 #endif
191 }
192 
193 /* MatSeqAIJMKL_create_from_mkl_handle() creates a sequential AIJMKL matrix from an MKL sparse matrix handle.
194  * We need this to implement MatMatMult() using the MKL inspector-executor routines, which return an (unoptimized)
195  * matrix handle.
196  * Note: This routine simply destroys and replaces the original matrix if MAT_REUSE_MATRIX has been specified, as
197  * there is no good alternative. */
198 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
199 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,MatReuse reuse,Mat *mat)
200 {
201   PetscErrorCode      ierr;
202   sparse_status_t     stat;
203   sparse_index_base_t indexing;
204   PetscInt            nrows, ncols;
205   PetscInt            *aj,*ai,*dummy;
206   MatScalar           *aa;
207   Mat                 A;
208   Mat_SeqAIJMKL       *aijmkl;
209 
210   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
211   stat = mkl_sparse_x_export_csr(csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);
212   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");
213 
214   if (reuse == MAT_REUSE_MATRIX) {
215     ierr = MatDestroy(mat);CHKERRQ(ierr);
216   }
217   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
218   ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
219   ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,nrows,ncols);CHKERRQ(ierr);
220   /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported
221    * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and
222    * they will be destroyed when the MKL handle is destroyed.
223    * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */
224   ierr = MatSeqAIJSetPreallocationCSR(A,ai,aj,aa);CHKERRQ(ierr);
225 
226   /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle.
227    * Now turn it into a MATSEQAIJMKL. */
228   ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
229 
230   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
231   aijmkl->csrA = csrA;
232 
233   /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but
234    * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one,
235    * and just need to be able to run the MKL optimization step. */
236   aijmkl->descr.type        = SPARSE_MATRIX_TYPE_GENERAL;
237   aijmkl->descr.mode        = SPARSE_FILL_MODE_LOWER;
238   aijmkl->descr.diag        = SPARSE_DIAG_NON_UNIT;
239   stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
240   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set mv_hint");
241   stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
242   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set memory_hint");
243   if (!aijmkl->no_SpMV2) {
244     stat = mkl_sparse_optimize(aijmkl->csrA);
245     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_optimize");
246   }
247   aijmkl->sparse_optimized = PETSC_TRUE;
248   ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr);
249 
250   *mat = A;
251   PetscFunctionReturn(0);
252 }
253 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
254 
255 /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle.
256  * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in
257  * MatMatMultNumeric(). */
258 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
259 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A)
260 {
261   PetscInt            i;
262   PetscInt            nrows,ncols;
263   PetscInt            nz;
264   PetscInt            *ai,*aj,*dummy;
265   PetscScalar         *aa;
266   PetscErrorCode      ierr;
267   Mat_SeqAIJMKL       *aijmkl;
268   sparse_status_t     stat;
269   sparse_index_base_t indexing;
270 
271   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
272 
273   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
274   stat = mkl_sparse_x_export_csr(aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);
275   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");
276 
277   /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc
278    * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */
279   for (i=0; i<nrows; i++) {
280     nz = ai[i+1] - ai[i];
281     ierr = MatSetValues_SeqAIJ(A, 1, &i, nz, aj+ai[i], aa+ai[i], INSERT_VALUES);CHKERRQ(ierr);
282   }
283 
284   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
285   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
286 
287   ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr);
288   /* We mark our matrix as having a valid, optimized MKL handle.
289    * TODO: It is valid, but I am not sure if it is optimized. Need to ask MKL developers. */
290   aijmkl->sparse_optimized = PETSC_TRUE;
291 
292   PetscFunctionReturn(0);
293 }
294 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
295 
296 PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
297 {
298   PetscErrorCode ierr;
299   Mat_SeqAIJMKL  *aijmkl;
300   Mat_SeqAIJMKL  *aijmkl_dest;
301 
302   PetscFunctionBegin;
303   ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr);
304   aijmkl      = (Mat_SeqAIJMKL*) A->spptr;
305   aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr;
306   ierr = PetscArraycpy(aijmkl_dest,aijmkl,1);CHKERRQ(ierr);
307   aijmkl_dest->sparse_optimized = PETSC_FALSE;
308   if (aijmkl->eager_inspection) {
309     ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
310   }
311   PetscFunctionReturn(0);
312 }
313 
314 PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode)
315 {
316   PetscErrorCode  ierr;
317   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
318   Mat_SeqAIJMKL   *aijmkl;
319 
320   PetscFunctionBegin;
321   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
322 
323   /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some
324    * extra information and some different methods, call the AssemblyEnd
325    * routine for a MATSEQAIJ.
326    * I'm not sure if this is the best way to do this, but it avoids
327    * a lot of code duplication. */
328   a->inode.use = PETSC_FALSE;  /* Must disable: otherwise the MKL routines won't get used. */
329   ierr = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr);
330 
331   /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks).
332    * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */
333   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
334   if (aijmkl->eager_inspection) {
335     ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
336   }
337 
338   PetscFunctionReturn(0);
339 }
340 
341 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
342 PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy)
343 {
344   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
345   const PetscScalar *x;
346   PetscScalar       *y;
347   const MatScalar   *aa;
348   PetscErrorCode    ierr;
349   PetscInt          m=A->rmap->n;
350   PetscInt          n=A->cmap->n;
351   PetscScalar       alpha = 1.0;
352   PetscScalar       beta = 0.0;
353   const PetscInt    *aj,*ai;
354   char              matdescra[6];
355 
356 
357   /* Variables not in MatMult_SeqAIJ. */
358   char transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
359 
360   PetscFunctionBegin;
361   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
362   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
363   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
364   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
365   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
366   aa   = a->a;  /* Nonzero elements stored row-by-row. */
367   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
368 
369   /* Call MKL sparse BLAS routine to do the MatMult. */
370   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
371 
372   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
373   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
374   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
375   PetscFunctionReturn(0);
376 }
377 #endif
378 
379 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
380 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
381 {
382   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
383   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
384   const PetscScalar *x;
385   PetscScalar       *y;
386   PetscErrorCode    ierr;
387   sparse_status_t   stat = SPARSE_STATUS_SUCCESS;
388   PetscObjectState  state;
389 
390   PetscFunctionBegin;
391 
392   /* If there are no nonzero entries, zero yy and return immediately. */
393   if(!a->nz) {
394     PetscInt i;
395     PetscInt m=A->rmap->n;
396     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
397     for (i=0; i<m; i++) {
398       y[i] = 0.0;
399     }
400     ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
401     PetscFunctionReturn(0);
402   }
403 
404   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
405   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
406 
407   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
408    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
409    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
410   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
411   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
412     MatSeqAIJMKL_create_mkl_handle(A);
413   }
414 
415   /* Call MKL SpMV2 executor routine to do the MatMult. */
416   stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
417   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
418 
419   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
420   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
421   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
422   PetscFunctionReturn(0);
423 }
424 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
425 
426 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
427 PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy)
428 {
429   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
430   const PetscScalar *x;
431   PetscScalar       *y;
432   const MatScalar   *aa;
433   PetscErrorCode    ierr;
434   PetscInt          m=A->rmap->n;
435   PetscInt          n=A->cmap->n;
436   PetscScalar       alpha = 1.0;
437   PetscScalar       beta = 0.0;
438   const PetscInt    *aj,*ai;
439   char              matdescra[6];
440 
441   /* Variables not in MatMultTranspose_SeqAIJ. */
442   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
443 
444   PetscFunctionBegin;
445   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
446   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
447   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
448   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
449   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
450   aa   = a->a;  /* Nonzero elements stored row-by-row. */
451   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
452 
453   /* Call MKL sparse BLAS routine to do the MatMult. */
454   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
455 
456   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
457   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
458   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
459   PetscFunctionReturn(0);
460 }
461 #endif
462 
463 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
464 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
465 {
466   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
467   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
468   const PetscScalar *x;
469   PetscScalar       *y;
470   PetscErrorCode    ierr;
471   sparse_status_t   stat;
472   PetscObjectState  state;
473 
474   PetscFunctionBegin;
475 
476   /* If there are no nonzero entries, zero yy and return immediately. */
477   if(!a->nz) {
478     PetscInt i;
479     PetscInt n=A->cmap->n;
480     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
481     for (i=0; i<n; i++) {
482       y[i] = 0.0;
483     }
484     ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
485     PetscFunctionReturn(0);
486   }
487 
488   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
489   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
490 
491   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
492    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
493    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
494   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
495   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
496     MatSeqAIJMKL_create_mkl_handle(A);
497   }
498 
499   /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
500   stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
501   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
502 
503   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
504   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
505   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
506   PetscFunctionReturn(0);
507 }
508 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
509 
510 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
511 PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
512 {
513   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
514   const PetscScalar *x;
515   PetscScalar       *y,*z;
516   const MatScalar   *aa;
517   PetscErrorCode    ierr;
518   PetscInt          m=A->rmap->n;
519   PetscInt          n=A->cmap->n;
520   const PetscInt    *aj,*ai;
521   PetscInt          i;
522 
523   /* Variables not in MatMultAdd_SeqAIJ. */
524   char              transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
525   PetscScalar       alpha = 1.0;
526   PetscScalar       beta;
527   char              matdescra[6];
528 
529   PetscFunctionBegin;
530   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
531   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
532 
533   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
534   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
535   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
536   aa   = a->a;  /* Nonzero elements stored row-by-row. */
537   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
538 
539   /* Call MKL sparse BLAS routine to do the MatMult. */
540   if (zz == yy) {
541     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
542     beta = 1.0;
543     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
544   } else {
545     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
546      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
547     beta = 0.0;
548     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
549     for (i=0; i<m; i++) {
550       z[i] += y[i];
551     }
552   }
553 
554   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
555   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
556   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
557   PetscFunctionReturn(0);
558 }
559 #endif
560 
561 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
562 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
563 {
564   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
565   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
566   const PetscScalar *x;
567   PetscScalar       *y,*z;
568   PetscErrorCode    ierr;
569   PetscInt          m=A->rmap->n;
570   PetscInt          i;
571 
572   /* Variables not in MatMultAdd_SeqAIJ. */
573   sparse_status_t   stat = SPARSE_STATUS_SUCCESS;
574   PetscObjectState  state;
575 
576   PetscFunctionBegin;
577 
578   /* If there are no nonzero entries, set zz = yy and return immediately. */
579   if(!a->nz) {
580     PetscInt i;
581     ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
582     for (i=0; i<m; i++) {
583       z[i] = y[i];
584     }
585     ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
586     PetscFunctionReturn(0);
587   }
588 
589   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
590   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
591 
592   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
593    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
594    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
595   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
596   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
597     MatSeqAIJMKL_create_mkl_handle(A);
598   }
599 
600   /* Call MKL sparse BLAS routine to do the MatMult. */
601   if (zz == yy) {
602     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
603      * with alpha and beta both set to 1.0. */
604     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
605     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
606   } else {
607     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
608      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
609     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
610     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
611     for (i=0; i<m; i++) {
612       z[i] += y[i];
613     }
614   }
615 
616   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
617   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
618   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
619   PetscFunctionReturn(0);
620 }
621 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
622 
623 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
624 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
625 {
626   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
627   const PetscScalar *x;
628   PetscScalar       *y,*z;
629   const MatScalar   *aa;
630   PetscErrorCode    ierr;
631   PetscInt          m=A->rmap->n;
632   PetscInt          n=A->cmap->n;
633   const PetscInt    *aj,*ai;
634   PetscInt          i;
635 
636   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
637   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
638   PetscScalar       alpha = 1.0;
639   PetscScalar       beta;
640   char              matdescra[6];
641 
642   PetscFunctionBegin;
643   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
644   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
645 
646   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
647   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
648   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
649   aa   = a->a;  /* Nonzero elements stored row-by-row. */
650   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
651 
652   /* Call MKL sparse BLAS routine to do the MatMult. */
653   if (zz == yy) {
654     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
655     beta = 1.0;
656     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
657   } else {
658     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
659      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
660     beta = 0.0;
661     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
662     for (i=0; i<n; i++) {
663       z[i] += y[i];
664     }
665   }
666 
667   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
668   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
669   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
670   PetscFunctionReturn(0);
671 }
672 #endif
673 
674 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
675 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
676 {
677   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
678   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
679   const PetscScalar *x;
680   PetscScalar       *y,*z;
681   PetscErrorCode    ierr;
682   PetscInt          n=A->cmap->n;
683   PetscInt          i;
684   PetscObjectState  state;
685 
686   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
687   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
688 
689   PetscFunctionBegin;
690 
691   /* If there are no nonzero entries, set zz = yy and return immediately. */
692   if(!a->nz) {
693     PetscInt i;
694     ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
695     for (i=0; i<n; i++) {
696       z[i] = y[i];
697     }
698     ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
699     PetscFunctionReturn(0);
700   }
701 
702   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
703   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
704 
705   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
706    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
707    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
708   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
709   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
710     MatSeqAIJMKL_create_mkl_handle(A);
711   }
712 
713   /* Call MKL sparse BLAS routine to do the MatMult. */
714   if (zz == yy) {
715     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
716      * with alpha and beta both set to 1.0. */
717     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
718     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
719   } else {
720     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
721      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
722     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
723     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
724     for (i=0; i<n; i++) {
725       z[i] += y[i];
726     }
727   }
728 
729   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
730   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
731   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
732   PetscFunctionReturn(0);
733 }
734 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
735 
736 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
737 /* Note that this code currently doesn't actually get used when MatMatMult() is called with MAT_REUSE_MATRIX, because
738  * the MatMatMult() interface code calls MatMatMultNumeric() in this case.
739  * For releases of MKL prior to version 18, update 2:
740  * MKL has no notion of separately callable symbolic vs. numeric phases of sparse matrix-matrix multiply, so in the
741  * MAT_REUSE_MATRIX case, the SeqAIJ routines end up being used. Even though this means that the (hopefully more
742  * optimized) MKL routines do not get used, this probably is best because the MKL routines would waste time re-computing
743  * the symbolic portion, whereas the native PETSc SeqAIJ routines will avoid this. */
744 PetscErrorCode MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat*C)
745 {
746   Mat_SeqAIJMKL    *a, *b;
747   sparse_matrix_t  csrA, csrB, csrC;
748   PetscErrorCode   ierr;
749   sparse_status_t  stat = SPARSE_STATUS_SUCCESS;
750   PetscObjectState state;
751 
752   PetscFunctionBegin;
753   a = (Mat_SeqAIJMKL*)A->spptr;
754   b = (Mat_SeqAIJMKL*)B->spptr;
755   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
756   if (!a->sparse_optimized || a->state != state) {
757     MatSeqAIJMKL_create_mkl_handle(A);
758   }
759   ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr);
760   if (!b->sparse_optimized || b->state != state) {
761     MatSeqAIJMKL_create_mkl_handle(B);
762   }
763   csrA = a->csrA;
764   csrB = b->csrA;
765 
766   stat = mkl_sparse_spmm(SPARSE_OPERATION_NON_TRANSPOSE,csrA,csrB,&csrC);
767   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete sparse matrix-matrix multiply");
768 
769   ierr = MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);CHKERRQ(ierr);
770 
771   PetscFunctionReturn(0);
772 }
773 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
774 
775 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
776 PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,Mat C)
777 {
778   Mat_SeqAIJMKL       *a, *b, *c;
779   sparse_matrix_t     csrA, csrB, csrC;
780   PetscErrorCode      ierr;
781   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
782   struct matrix_descr descr_type_gen;
783   PetscObjectState    state;
784 
785   PetscFunctionBegin;
786   a = (Mat_SeqAIJMKL*)A->spptr;
787   b = (Mat_SeqAIJMKL*)B->spptr;
788   c = (Mat_SeqAIJMKL*)C->spptr;
789   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
790   if (!a->sparse_optimized || a->state != state) {
791     MatSeqAIJMKL_create_mkl_handle(A);
792   }
793   ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr);
794   if (!b->sparse_optimized || b->state != state) {
795     MatSeqAIJMKL_create_mkl_handle(B);
796   }
797   csrA = a->csrA;
798   csrB = b->csrA;
799   csrC = c->csrA;
800   descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
801 
802   stat = mkl_sparse_sp2m(SPARSE_OPERATION_NON_TRANSPOSE,descr_type_gen,csrA,
803                          SPARSE_OPERATION_NON_TRANSPOSE,descr_type_gen,csrB,
804                          SPARSE_STAGE_FINALIZE_MULT,&csrC);
805 
806   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete numerical stage of sparse matrix-matrix multiply");
807 
808   /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */
809   ierr = MatSeqAIJMKL_update_from_mkl_handle(C);CHKERRQ(ierr);
810 
811   PetscFunctionReturn(0);
812 }
813 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
814 
815 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
816 PetscErrorCode MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat*C)
817 {
818   Mat_SeqAIJMKL    *a, *b;
819   sparse_matrix_t  csrA, csrB, csrC;
820   PetscErrorCode   ierr;
821   sparse_status_t  stat = SPARSE_STATUS_SUCCESS;
822   PetscObjectState state;
823 
824   PetscFunctionBegin;
825   a = (Mat_SeqAIJMKL*)A->spptr;
826   b = (Mat_SeqAIJMKL*)B->spptr;
827   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
828   if (!a->sparse_optimized || a->state != state) {
829     MatSeqAIJMKL_create_mkl_handle(A);
830   }
831   ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr);
832   if (!b->sparse_optimized || b->state != state) {
833     MatSeqAIJMKL_create_mkl_handle(B);
834   }
835   csrA = a->csrA;
836   csrB = b->csrA;
837 
838   stat = mkl_sparse_spmm(SPARSE_OPERATION_TRANSPOSE,csrA,csrB,&csrC);
839   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete sparse matrix-matrix multiply");
840 
841   ierr = MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);CHKERRQ(ierr);
842 
843   PetscFunctionReturn(0);
844 }
845 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
846 
847 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
848 PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat P,Mat C)
849 {
850   Mat_SeqAIJMKL       *a, *p, *c;
851   sparse_matrix_t     csrA, csrP, csrC;
852   PetscBool           set, flag;
853   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
854   struct matrix_descr descr_type_sym;
855   PetscObjectState    state;
856   PetscErrorCode      ierr;
857 
858   PetscFunctionBegin;
859   ierr = MatIsSymmetricKnown(A,&set,&flag);
860   if (!set || (set && !flag)) {
861     ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,C);CHKERRQ(ierr);
862     PetscFunctionReturn(0);
863   }
864 
865   a = (Mat_SeqAIJMKL*)A->spptr;
866   p = (Mat_SeqAIJMKL*)P->spptr;
867   c = (Mat_SeqAIJMKL*)C->spptr;
868   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
869   if (!a->sparse_optimized || a->state != state) {
870     MatSeqAIJMKL_create_mkl_handle(A);
871   }
872   ierr = PetscObjectStateGet((PetscObject)P,&state);CHKERRQ(ierr);
873   if (!p->sparse_optimized || p->state != state) {
874     MatSeqAIJMKL_create_mkl_handle(P);
875   }
876   csrA = a->csrA;
877   csrP = p->csrA;
878   csrC = c->csrA;
879   descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
880   descr_type_sym.mode = SPARSE_FILL_MODE_LOWER;
881   descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
882 
883   /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
884   stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FINALIZE_MULT);
885   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to finalize mkl_sparse_sypr");
886 
887   /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */
888   ierr = MatSeqAIJMKL_update_from_mkl_handle(C);CHKERRQ(ierr);
889 
890   PetscFunctionReturn(0);
891 }
892 #endif
893 
894 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
895 PetscErrorCode MatPtAP_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
896 {
897   Mat_SeqAIJMKL       *a, *p;
898   sparse_matrix_t     csrA, csrP, csrC;
899   PetscBool           set, flag;
900   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
901   struct matrix_descr descr_type_sym;
902   PetscObjectState    state;
903   PetscErrorCode      ierr;
904 
905   PetscFunctionBegin;
906   ierr = MatIsSymmetricKnown(A,&set,&flag);
907   if (!set || (set && !flag)) {
908     ierr = MatPtAP_SeqAIJ_SeqAIJ(A,P,scall,fill,C);CHKERRQ(ierr);
909     PetscFunctionReturn(0);
910   }
911 
912   if (scall == MAT_REUSE_MATRIX) {
913     ierr = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2(A,P,*C);CHKERRQ(ierr);
914     PetscFunctionReturn(0);
915   }
916 
917   a = (Mat_SeqAIJMKL*)A->spptr;
918   p = (Mat_SeqAIJMKL*)P->spptr;
919   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
920   if (!a->sparse_optimized || a->state != state) {
921     MatSeqAIJMKL_create_mkl_handle(A);
922   }
923   ierr = PetscObjectStateGet((PetscObject)P,&state);CHKERRQ(ierr);
924   if (!p->sparse_optimized || p->state != state) {
925     MatSeqAIJMKL_create_mkl_handle(P);
926   }
927   csrA = a->csrA;
928   csrP = p->csrA;
929   descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
930   descr_type_sym.mode = SPARSE_FILL_MODE_LOWER;
931   descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
932 
933   /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
934   stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FULL_MULT);
935   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete full mkl_sparse_sypr");
936 
937   ierr = MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);CHKERRQ(ierr);
938   ierr = MatSetOption(*C,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
939 
940   PetscFunctionReturn(0);
941 }
942 #endif
943 
944 /* This function prototype is needed in MatConvert_SeqAIJ_SeqAIJMKL(), below. */
945 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*);
946 
947 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a
948  * SeqAIJMKL matrix.  This routine is called by the MatCreate_SeqAIJMKL()
949  * routine, but can also be used to convert an assembled SeqAIJ matrix
950  * into a SeqAIJMKL one. */
951 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
952 {
953   PetscErrorCode ierr;
954   Mat            B = *newmat;
955   Mat_SeqAIJMKL  *aijmkl;
956   PetscBool      set;
957   PetscBool      sametype;
958 
959   PetscFunctionBegin;
960   if (reuse == MAT_INITIAL_MATRIX) {
961     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
962   }
963 
964   ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr);
965   if (sametype) PetscFunctionReturn(0);
966 
967   ierr     = PetscNewLog(B,&aijmkl);CHKERRQ(ierr);
968   B->spptr = (void*) aijmkl;
969 
970   /* Set function pointers for methods that we inherit from AIJ but override.
971    * We also parse some command line options below, since those determine some of the methods we point to. */
972   B->ops->duplicate        = MatDuplicate_SeqAIJMKL;
973   B->ops->assemblyend      = MatAssemblyEnd_SeqAIJMKL;
974   B->ops->destroy          = MatDestroy_SeqAIJMKL;
975 
976   aijmkl->sparse_optimized = PETSC_FALSE;
977 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
978   aijmkl->no_SpMV2 = PETSC_FALSE;  /* Default to using the SpMV2 routines if our MKL supports them. */
979 #else
980   aijmkl->no_SpMV2 = PETSC_TRUE;
981 #endif
982   aijmkl->eager_inspection = PETSC_FALSE;
983 
984   /* Parse command line options. */
985   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");CHKERRQ(ierr);
986   ierr = PetscOptionsBool("-mat_aijmkl_no_spmv2","NoSPMV2","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);CHKERRQ(ierr);
987   ierr = PetscOptionsBool("-mat_aijmkl_eager_inspection","Eager Inspection","None",(PetscBool)aijmkl->eager_inspection,(PetscBool*)&aijmkl->eager_inspection,&set);CHKERRQ(ierr);
988   ierr = PetscOptionsEnd();CHKERRQ(ierr);
989 #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
990   if(!aijmkl->no_SpMV2) {
991     ierr = PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize();  defaulting to non-SpMV2 routines.\n");
992     aijmkl->no_SpMV2 = PETSC_TRUE;
993   }
994 #endif
995 
996 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
997   B->ops->mult             = MatMult_SeqAIJMKL_SpMV2;
998   B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL_SpMV2;
999   B->ops->multadd          = MatMultAdd_SeqAIJMKL_SpMV2;
1000   B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2;
1001   B->ops->matmult          = MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2;
1002 # if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
1003   B->ops->matmultnumeric   = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2;
1004 #   if !defined(PETSC_USE_COMPLEX)
1005   B->ops->ptap             = MatPtAP_SeqAIJMKL_SeqAIJMKL_SpMV2;
1006   B->ops->ptapnumeric      = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2;
1007 #   endif
1008 # endif
1009   B->ops->transposematmult = MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2;
1010 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
1011 
1012 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
1013   /* In MKL version 18, update 2, the old sparse BLAS interfaces were marked as deprecated. If "no_SpMV2" has been specified by the
1014    * user and the old SpBLAS interfaces are deprecated in our MKL version, we use the new _SpMV2 routines (set above), but do not
1015    * call mkl_sparse_optimize(), which results in the old numerical kernels (without the inspector-executor model) being used. For
1016    * versions in which the older interface has not been deprecated, we use the old interface. */
1017   if (aijmkl->no_SpMV2) {
1018     B->ops->mult             = MatMult_SeqAIJMKL;
1019     B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL;
1020     B->ops->multadd          = MatMultAdd_SeqAIJMKL;
1021     B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL;
1022   }
1023 #endif
1024 
1025   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr);
1026   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",MatMatMult_SeqDense_SeqAIJ);CHKERRQ(ierr);
1027   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",MatMatMultSymbolic_SeqDense_SeqAIJ);CHKERRQ(ierr);
1028   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",MatMatMultNumeric_SeqDense_SeqAIJ);CHKERRQ(ierr);
1029   ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_seqaijmkl_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr);
1030   if(!aijmkl->no_SpMV2) {
1031 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1032     ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqaijmkl_seqaijmkl_C",MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2);CHKERRQ(ierr);
1033 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
1034     ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqaijmkl_seqaijmkl_C",MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2);CHKERRQ(ierr);
1035 #endif
1036     ierr = PetscObjectComposeFunction((PetscObject)B,"MatTransposeMatMult_seqaijmkl_seqaijmkl_C",MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2);CHKERRQ(ierr);
1037 #endif
1038   }
1039 
1040   ierr    = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr);
1041   *newmat = B;
1042   PetscFunctionReturn(0);
1043 }
1044 
1045 /*@C
1046    MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL.
1047    This type inherits from AIJ and is largely identical, but uses sparse BLAS
1048    routines from Intel MKL whenever possible.
1049    If the installed version of MKL supports the "SpMV2" sparse
1050    inspector-executor routines, then those are used by default.
1051    MatMult, MatMultAdd, MatMultTranspose, MatMultTransposeAdd, MatMatMult, MatTransposeMatMult, and MatPtAP (for
1052    symmetric A) operations are currently supported.
1053    Note that MKL version 18, update 2 or later is required for MatPtAP/MatPtAPNumeric and MatMatMultNumeric.
1054 
1055    Collective
1056 
1057    Input Parameters:
1058 +  comm - MPI communicator, set to PETSC_COMM_SELF
1059 .  m - number of rows
1060 .  n - number of columns
1061 .  nz - number of nonzeros per row (same for all rows)
1062 -  nnz - array containing the number of nonzeros in the various rows
1063          (possibly different for each row) or NULL
1064 
1065    Output Parameter:
1066 .  A - the matrix
1067 
1068    Options Database Keys:
1069 +  -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines
1070 -  -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, performing this step the first time the matrix is applied
1071 
1072    Notes:
1073    If nnz is given then nz is ignored
1074 
1075    Level: intermediate
1076 
1077 .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues()
1078 @*/
1079 PetscErrorCode  MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
1080 {
1081   PetscErrorCode ierr;
1082 
1083   PetscFunctionBegin;
1084   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1085   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
1086   ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr);
1087   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr);
1088   PetscFunctionReturn(0);
1089 }
1090 
1091 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A)
1092 {
1093   PetscErrorCode ierr;
1094 
1095   PetscFunctionBegin;
1096   ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
1097   ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
1098   PetscFunctionReturn(0);
1099 }
1100