xref: /petsc/src/mat/impls/aij/seq/aijmkl/aijmkl.c (revision 257f4e5aa63fce47b93517cb665016a4c8ee7c55)
1 
2 /*
3   Defines basic operations for the MATSEQAIJMKL matrix class.
4   This class is derived from the MATSEQAIJ class and retains the
5   compressed row storage (aka Yale sparse matrix format) but uses
6   sparse BLAS operations from the Intel Math Kernel Library (MKL)
7   wherever possible.
8 */
9 
10 #include <../src/mat/impls/aij/seq/aij.h>
11 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h>
12 #include <mkl_spblas.h>
13 
14 typedef struct {
15   PetscBool           no_SpMV2;  /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */
16   PetscBool           eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */
17   PetscBool           sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
18   PetscObjectState    state;
19 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
20   sparse_matrix_t     csrA; /* "Handle" used by SpMV2 inspector-executor routines. */
21   struct matrix_descr descr;
22 #endif
23 } Mat_SeqAIJMKL;
24 
25 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType);
26 
27 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat)
28 {
29   /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */
30   /* so we will ignore 'MatType type'. */
31   PetscErrorCode ierr;
32   Mat            B       = *newmat;
33 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
34   Mat_SeqAIJMKL  *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
35 #endif
36 
37   PetscFunctionBegin;
38   if (reuse == MAT_INITIAL_MATRIX) {
39     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
40   }
41 
42   /* Reset the original function pointers. */
43   B->ops->duplicate        = MatDuplicate_SeqAIJ;
44   B->ops->assemblyend      = MatAssemblyEnd_SeqAIJ;
45   B->ops->destroy          = MatDestroy_SeqAIJ;
46   B->ops->mult             = MatMult_SeqAIJ;
47   B->ops->multtranspose    = MatMultTranspose_SeqAIJ;
48   B->ops->multadd          = MatMultAdd_SeqAIJ;
49   B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ;
50   B->ops->matmult          = MatMatMult_SeqAIJ_SeqAIJ;
51   B->ops->matmultnumeric   = MatMatMultNumeric_SeqAIJ_SeqAIJ;
52   B->ops->ptap             = MatPtAP_SeqAIJ_SeqAIJ;
53   B->ops->ptapnumeric      = MatPtAPNumeric_SeqAIJ_SeqAIJ;
54   B->ops->transposematmult = MatTransposeMatMult_SeqAIJ_SeqAIJ;
55 
56   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);CHKERRQ(ierr);
57   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr);
58   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr);
59   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr);
60   ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_seqaijmkl_C",NULL);CHKERRQ(ierr);
61 #if defined(PETSC_HAVE_HYPRE)
62   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr);
63 #endif
64 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
65   if(!aijmkl->no_SpMV2) {
66     ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr);
67 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
68     ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr);
69 #endif
70     ierr = PetscObjectComposeFunction((PetscObject)B,"MatTransposeMatMult_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr);
71   }
72 
73   /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this
74    * simply involves destroying the MKL sparse matrix handle and then freeing
75    * the spptr pointer. */
76   if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr;
77 
78   if (aijmkl->sparse_optimized) {
79     sparse_status_t stat;
80     stat = mkl_sparse_destroy(aijmkl->csrA);
81     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set hints/complete mkl_sparse_optimize");
82   }
83 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
84   ierr = PetscFree(B->spptr);CHKERRQ(ierr);
85 
86   /* Change the type of B to MATSEQAIJ. */
87   ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr);
88 
89   *newmat = B;
90   PetscFunctionReturn(0);
91 }
92 
93 PetscErrorCode MatDestroy_SeqAIJMKL(Mat A)
94 {
95   PetscErrorCode ierr;
96   Mat_SeqAIJMKL  *aijmkl = (Mat_SeqAIJMKL*) A->spptr;
97 
98   PetscFunctionBegin;
99 
100   /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an
101    * spptr pointer. */
102   if (aijmkl) {
103     /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */
104 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
105     if (aijmkl->sparse_optimized) {
106       sparse_status_t stat = SPARSE_STATUS_SUCCESS;
107       stat = mkl_sparse_destroy(aijmkl->csrA);
108       if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy");
109     }
110 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
111     ierr = PetscFree(A->spptr);CHKERRQ(ierr);
112   }
113 
114   /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
115    * to destroy everything that remains. */
116   ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr);
117   /* Note that I don't call MatSetType().  I believe this is because that
118    * is only to be called when *building* a matrix.  I could be wrong, but
119    * that is how things work for the SuperLU matrix class. */
120   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
121   PetscFunctionReturn(0);
122 }
123 
124 /* MatSeqAIJKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it,
125  * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize().
126  * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix
127  * handle, creates a new one, and then calls mkl_sparse_optimize().
128  * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been
129  * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of
130  * an unoptimized matrix handle here. */
131 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A)
132 {
133 #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
134   /* If the MKL library does not have mkl_sparse_optimize(), then this routine
135    * does nothing. We make it callable anyway in this case because it cuts
136    * down on littering the code with #ifdefs. */
137   PetscFunctionBegin;
138   PetscFunctionReturn(0);
139 #else
140   Mat_SeqAIJ       *a = (Mat_SeqAIJ*)A->data;
141   Mat_SeqAIJMKL    *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
142   PetscInt         m,n;
143   MatScalar        *aa;
144   PetscInt         *aj,*ai;
145   sparse_status_t  stat;
146   PetscErrorCode   ierr;
147 
148   PetscFunctionBegin;
149 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
150   /* For MKL versions that still support the old, non-inspector-executor interfaces versions, we simply exit here if the no_SpMV2
151    * option has been specified. For versions that have deprecated the old interfaces (version 18, update 2 and later), we must
152    * use the new inspector-executor interfaces, but we can still use the old, non-inspector-executor code by not calling
153    * mkl_sparse_optimize() later. */
154   if (aijmkl->no_SpMV2) PetscFunctionReturn(0);
155 #endif
156 
157   if (aijmkl->sparse_optimized) {
158     /* Matrix has been previously assembled and optimized. Must destroy old
159      * matrix handle before running the optimization step again. */
160     stat = mkl_sparse_destroy(aijmkl->csrA);
161     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy");
162   }
163   aijmkl->sparse_optimized = PETSC_FALSE;
164 
165   /* Now perform the SpMV2 setup and matrix optimization. */
166   aijmkl->descr.type        = SPARSE_MATRIX_TYPE_GENERAL;
167   aijmkl->descr.mode        = SPARSE_FILL_MODE_LOWER;
168   aijmkl->descr.diag        = SPARSE_DIAG_NON_UNIT;
169   m = A->rmap->n;
170   n = A->cmap->n;
171   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
172   aa   = a->a;  /* Nonzero elements stored row-by-row. */
173   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
174   if ((a->nz!=0) & !(A->structure_only)) {
175     /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
176      * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
177     stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa);
178     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle");
179     stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
180     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set mv_hint");
181     stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
182     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set memory_hint");
183     if (!aijmkl->no_SpMV2) {
184       stat = mkl_sparse_optimize(aijmkl->csrA);
185       if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_optimize");
186     }
187     aijmkl->sparse_optimized = PETSC_TRUE;
188     ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr);
189   }
190 
191   PetscFunctionReturn(0);
192 #endif
193 }
194 
195 /* MatSeqAIJMKL_create_from_mkl_handle() creates a sequential AIJMKL matrix from an MKL sparse matrix handle.
196  * We need this to implement MatMatMult() using the MKL inspector-executor routines, which return an (unoptimized)
197  * matrix handle.
198  * Note: This routine simply destroys and replaces the original matrix if MAT_REUSE_MATRIX has been specified, as
199  * there is no good alternative. */
200 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
201 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,MatReuse reuse,Mat *mat)
202 {
203   PetscErrorCode      ierr;
204   sparse_status_t     stat;
205   sparse_index_base_t indexing;
206   PetscInt            nrows, ncols;
207   PetscInt            *aj,*ai,*dummy;
208   MatScalar           *aa;
209   Mat                 A;
210   Mat_SeqAIJMKL       *aijmkl;
211 
212   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
213   stat = mkl_sparse_x_export_csr(csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);
214   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");
215 
216   if (reuse == MAT_REUSE_MATRIX) {
217     ierr = MatDestroy(mat);CHKERRQ(ierr);
218   }
219   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
220   ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
221   ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,nrows,ncols);CHKERRQ(ierr);
222   /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported
223    * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and
224    * they will be destroyed when the MKL handle is destroyed.
225    * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */
226   ierr = MatSeqAIJSetPreallocationCSR(A,ai,aj,aa);CHKERRQ(ierr);
227 
228   /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle.
229    * Now turn it into a MATSEQAIJMKL. */
230   ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
231 
232   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
233   aijmkl->csrA = csrA;
234 
235   /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but
236    * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one,
237    * and just need to be able to run the MKL optimization step. */
238   aijmkl->descr.type        = SPARSE_MATRIX_TYPE_GENERAL;
239   aijmkl->descr.mode        = SPARSE_FILL_MODE_LOWER;
240   aijmkl->descr.diag        = SPARSE_DIAG_NON_UNIT;
241   stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
242   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set mv_hint");
243   stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
244   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set memory_hint");
245   if (!aijmkl->no_SpMV2) {
246     stat = mkl_sparse_optimize(aijmkl->csrA);
247     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_optimize");
248   }
249   aijmkl->sparse_optimized = PETSC_TRUE;
250   ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr);
251 
252   *mat = A;
253   PetscFunctionReturn(0);
254 }
255 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
256 
257 /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle.
258  * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in
259  * MatMatMultNumeric(). */
260 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
261 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A)
262 {
263   PetscInt            i;
264   PetscInt            nrows,ncols;
265   PetscInt            nz;
266   PetscInt            *ai,*aj,*dummy;
267   PetscScalar         *aa;
268   PetscErrorCode      ierr;
269   Mat_SeqAIJMKL       *aijmkl;
270   sparse_status_t     stat;
271   sparse_index_base_t indexing;
272 
273   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
274 
275   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
276   stat = mkl_sparse_x_export_csr(aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);
277   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");
278 
279   /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc
280    * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */
281   for (i=0; i<nrows; i++) {
282     nz = ai[i+1] - ai[i];
283     ierr = MatSetValues_SeqAIJ(A, 1, &i, nz, aj+ai[i], aa+ai[i], INSERT_VALUES);CHKERRQ(ierr);
284   }
285 
286   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
287   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
288 
289   ierr = PetscObjectStateGet((PetscObject)A,&(aijmkl->state));CHKERRQ(ierr);
290   /* We mark our matrix as having a valid, optimized MKL handle.
291    * TODO: It is valid, but I am not sure if it is optimized. Need to ask MKL developers. */
292   aijmkl->sparse_optimized = PETSC_TRUE;
293 
294   PetscFunctionReturn(0);
295 }
296 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
297 
298 PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
299 {
300   PetscErrorCode ierr;
301   Mat_SeqAIJMKL  *aijmkl;
302   Mat_SeqAIJMKL  *aijmkl_dest;
303 
304   PetscFunctionBegin;
305   ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr);
306   aijmkl      = (Mat_SeqAIJMKL*) A->spptr;
307   aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr;
308   ierr = PetscArraycpy(aijmkl_dest,aijmkl,1);CHKERRQ(ierr);
309   aijmkl_dest->sparse_optimized = PETSC_FALSE;
310   if (aijmkl->eager_inspection) {
311     ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
312   }
313   PetscFunctionReturn(0);
314 }
315 
316 PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode)
317 {
318   PetscErrorCode  ierr;
319   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
320   Mat_SeqAIJMKL   *aijmkl;
321 
322   PetscFunctionBegin;
323   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
324 
325   /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some
326    * extra information and some different methods, call the AssemblyEnd
327    * routine for a MATSEQAIJ.
328    * I'm not sure if this is the best way to do this, but it avoids
329    * a lot of code duplication. */
330   a->inode.use = PETSC_FALSE;  /* Must disable: otherwise the MKL routines won't get used. */
331   ierr = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr);
332 
333   /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks).
334    * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */
335   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
336   if (aijmkl->eager_inspection) {
337     ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
338   }
339 
340   PetscFunctionReturn(0);
341 }
342 
343 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
344 PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy)
345 {
346   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
347   const PetscScalar *x;
348   PetscScalar       *y;
349   const MatScalar   *aa;
350   PetscErrorCode    ierr;
351   PetscInt          m=A->rmap->n;
352   PetscInt          n=A->cmap->n;
353   PetscScalar       alpha = 1.0;
354   PetscScalar       beta = 0.0;
355   const PetscInt    *aj,*ai;
356   char              matdescra[6];
357 
358 
359   /* Variables not in MatMult_SeqAIJ. */
360   char transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
361 
362   PetscFunctionBegin;
363   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
364   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
365   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
366   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
367   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
368   aa   = a->a;  /* Nonzero elements stored row-by-row. */
369   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
370 
371   /* Call MKL sparse BLAS routine to do the MatMult. */
372   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
373 
374   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
375   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
376   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
377   PetscFunctionReturn(0);
378 }
379 #endif
380 
381 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
382 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
383 {
384   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
385   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
386   const PetscScalar *x;
387   PetscScalar       *y;
388   PetscErrorCode    ierr;
389   sparse_status_t   stat = SPARSE_STATUS_SUCCESS;
390   PetscObjectState  state;
391 
392   PetscFunctionBegin;
393 
394   /* If there are no nonzero entries, zero yy and return immediately. */
395   if(!a->nz) {
396     PetscInt i;
397     PetscInt m=A->rmap->n;
398     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
399     for (i=0; i<m; i++) {
400       y[i] = 0.0;
401     }
402     ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
403     PetscFunctionReturn(0);
404   }
405 
406   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
407   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
408 
409   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
410    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
411    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
412   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
413   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
414     MatSeqAIJMKL_create_mkl_handle(A);
415   }
416 
417   /* Call MKL SpMV2 executor routine to do the MatMult. */
418   stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
419   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
420 
421   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
422   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
423   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
424   PetscFunctionReturn(0);
425 }
426 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
427 
428 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
429 PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy)
430 {
431   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
432   const PetscScalar *x;
433   PetscScalar       *y;
434   const MatScalar   *aa;
435   PetscErrorCode    ierr;
436   PetscInt          m=A->rmap->n;
437   PetscInt          n=A->cmap->n;
438   PetscScalar       alpha = 1.0;
439   PetscScalar       beta = 0.0;
440   const PetscInt    *aj,*ai;
441   char              matdescra[6];
442 
443   /* Variables not in MatMultTranspose_SeqAIJ. */
444   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
445 
446   PetscFunctionBegin;
447   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
448   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
449   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
450   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
451   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
452   aa   = a->a;  /* Nonzero elements stored row-by-row. */
453   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
454 
455   /* Call MKL sparse BLAS routine to do the MatMult. */
456   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
457 
458   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
459   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
460   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
461   PetscFunctionReturn(0);
462 }
463 #endif
464 
465 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
466 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
467 {
468   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
469   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
470   const PetscScalar *x;
471   PetscScalar       *y;
472   PetscErrorCode    ierr;
473   sparse_status_t   stat;
474   PetscObjectState  state;
475 
476   PetscFunctionBegin;
477 
478   /* If there are no nonzero entries, zero yy and return immediately. */
479   if(!a->nz) {
480     PetscInt i;
481     PetscInt n=A->cmap->n;
482     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
483     for (i=0; i<n; i++) {
484       y[i] = 0.0;
485     }
486     ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
487     PetscFunctionReturn(0);
488   }
489 
490   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
491   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
492 
493   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
494    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
495    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
496   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
497   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
498     MatSeqAIJMKL_create_mkl_handle(A);
499   }
500 
501   /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
502   stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
503   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
504 
505   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
506   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
507   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
508   PetscFunctionReturn(0);
509 }
510 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
511 
512 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
513 PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
514 {
515   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
516   const PetscScalar *x;
517   PetscScalar       *y,*z;
518   const MatScalar   *aa;
519   PetscErrorCode    ierr;
520   PetscInt          m=A->rmap->n;
521   PetscInt          n=A->cmap->n;
522   const PetscInt    *aj,*ai;
523   PetscInt          i;
524 
525   /* Variables not in MatMultAdd_SeqAIJ. */
526   char              transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
527   PetscScalar       alpha = 1.0;
528   PetscScalar       beta;
529   char              matdescra[6];
530 
531   PetscFunctionBegin;
532   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
533   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
534 
535   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
536   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
537   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
538   aa   = a->a;  /* Nonzero elements stored row-by-row. */
539   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
540 
541   /* Call MKL sparse BLAS routine to do the MatMult. */
542   if (zz == yy) {
543     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
544     beta = 1.0;
545     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
546   } else {
547     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
548      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
549     beta = 0.0;
550     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
551     for (i=0; i<m; i++) {
552       z[i] += y[i];
553     }
554   }
555 
556   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
557   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
558   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
559   PetscFunctionReturn(0);
560 }
561 #endif
562 
563 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
564 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
565 {
566   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
567   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
568   const PetscScalar *x;
569   PetscScalar       *y,*z;
570   PetscErrorCode    ierr;
571   PetscInt          m=A->rmap->n;
572   PetscInt          i;
573 
574   /* Variables not in MatMultAdd_SeqAIJ. */
575   sparse_status_t   stat = SPARSE_STATUS_SUCCESS;
576   PetscObjectState  state;
577 
578   PetscFunctionBegin;
579 
580   /* If there are no nonzero entries, set zz = yy and return immediately. */
581   if(!a->nz) {
582     PetscInt i;
583     ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
584     for (i=0; i<m; i++) {
585       z[i] = y[i];
586     }
587     ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
588     PetscFunctionReturn(0);
589   }
590 
591   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
592   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
593 
594   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
595    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
596    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
597   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
598   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
599     MatSeqAIJMKL_create_mkl_handle(A);
600   }
601 
602   /* Call MKL sparse BLAS routine to do the MatMult. */
603   if (zz == yy) {
604     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
605      * with alpha and beta both set to 1.0. */
606     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
607     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
608   } else {
609     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
610      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
611     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
612     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
613     for (i=0; i<m; i++) {
614       z[i] += y[i];
615     }
616   }
617 
618   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
619   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
620   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
621   PetscFunctionReturn(0);
622 }
623 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
624 
625 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
626 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
627 {
628   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
629   const PetscScalar *x;
630   PetscScalar       *y,*z;
631   const MatScalar   *aa;
632   PetscErrorCode    ierr;
633   PetscInt          m=A->rmap->n;
634   PetscInt          n=A->cmap->n;
635   const PetscInt    *aj,*ai;
636   PetscInt          i;
637 
638   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
639   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
640   PetscScalar       alpha = 1.0;
641   PetscScalar       beta;
642   char              matdescra[6];
643 
644   PetscFunctionBegin;
645   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
646   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
647 
648   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
649   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
650   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
651   aa   = a->a;  /* Nonzero elements stored row-by-row. */
652   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
653 
654   /* Call MKL sparse BLAS routine to do the MatMult. */
655   if (zz == yy) {
656     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
657     beta = 1.0;
658     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
659   } else {
660     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
661      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
662     beta = 0.0;
663     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
664     for (i=0; i<n; i++) {
665       z[i] += y[i];
666     }
667   }
668 
669   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
670   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
671   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
672   PetscFunctionReturn(0);
673 }
674 #endif
675 
676 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
677 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
678 {
679   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
680   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
681   const PetscScalar *x;
682   PetscScalar       *y,*z;
683   PetscErrorCode    ierr;
684   PetscInt          n=A->cmap->n;
685   PetscInt          i;
686   PetscObjectState  state;
687 
688   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
689   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
690 
691   PetscFunctionBegin;
692 
693   /* If there are no nonzero entries, set zz = yy and return immediately. */
694   if(!a->nz) {
695     PetscInt i;
696     ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
697     for (i=0; i<n; i++) {
698       z[i] = y[i];
699     }
700     ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
701     PetscFunctionReturn(0);
702   }
703 
704   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
705   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
706 
707   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
708    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
709    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
710   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
711   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
712     MatSeqAIJMKL_create_mkl_handle(A);
713   }
714 
715   /* Call MKL sparse BLAS routine to do the MatMult. */
716   if (zz == yy) {
717     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
718      * with alpha and beta both set to 1.0. */
719     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
720     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
721   } else {
722     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
723      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
724     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
725     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
726     for (i=0; i<n; i++) {
727       z[i] += y[i];
728     }
729   }
730 
731   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
732   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
733   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
734   PetscFunctionReturn(0);
735 }
736 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
737 
738 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
739 /* Note that this code currently doesn't actually get used when MatMatMult() is called with MAT_REUSE_MATRIX, because
740  * the MatMatMult() interface code calls MatMatMultNumeric() in this case.
741  * For releases of MKL prior to version 18, update 2:
742  * MKL has no notion of separately callable symbolic vs. numeric phases of sparse matrix-matrix multiply, so in the
743  * MAT_REUSE_MATRIX case, the SeqAIJ routines end up being used. Even though this means that the (hopefully more
744  * optimized) MKL routines do not get used, this probably is best because the MKL routines would waste time re-computing
745  * the symbolic portion, whereas the native PETSc SeqAIJ routines will avoid this. */
746 PetscErrorCode MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat*C)
747 {
748   Mat_SeqAIJMKL    *a, *b;
749   sparse_matrix_t  csrA, csrB, csrC;
750   PetscErrorCode   ierr;
751   sparse_status_t  stat = SPARSE_STATUS_SUCCESS;
752   PetscObjectState state;
753 
754   PetscFunctionBegin;
755   a = (Mat_SeqAIJMKL*)A->spptr;
756   b = (Mat_SeqAIJMKL*)B->spptr;
757   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
758   if (!a->sparse_optimized || a->state != state) {
759     MatSeqAIJMKL_create_mkl_handle(A);
760   }
761   ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr);
762   if (!b->sparse_optimized || b->state != state) {
763     MatSeqAIJMKL_create_mkl_handle(B);
764   }
765   csrA = a->csrA;
766   csrB = b->csrA;
767 
768   stat = mkl_sparse_spmm(SPARSE_OPERATION_NON_TRANSPOSE,csrA,csrB,&csrC);
769   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete sparse matrix-matrix multiply");
770 
771   ierr = MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);CHKERRQ(ierr);
772 
773   PetscFunctionReturn(0);
774 }
775 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
776 
777 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
778 PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,Mat C)
779 {
780   Mat_SeqAIJMKL       *a, *b, *c;
781   sparse_matrix_t     csrA, csrB, csrC;
782   PetscErrorCode      ierr;
783   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
784   struct matrix_descr descr_type_gen;
785   PetscObjectState    state;
786 
787   PetscFunctionBegin;
788   a = (Mat_SeqAIJMKL*)A->spptr;
789   b = (Mat_SeqAIJMKL*)B->spptr;
790   c = (Mat_SeqAIJMKL*)C->spptr;
791   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
792   if (!a->sparse_optimized || a->state != state) {
793     MatSeqAIJMKL_create_mkl_handle(A);
794   }
795   ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr);
796   if (!b->sparse_optimized || b->state != state) {
797     MatSeqAIJMKL_create_mkl_handle(B);
798   }
799   csrA = a->csrA;
800   csrB = b->csrA;
801   csrC = c->csrA;
802   descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
803 
804   stat = mkl_sparse_sp2m(SPARSE_OPERATION_NON_TRANSPOSE,descr_type_gen,csrA,
805                          SPARSE_OPERATION_NON_TRANSPOSE,descr_type_gen,csrB,
806                          SPARSE_STAGE_FINALIZE_MULT,&csrC);
807 
808   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete numerical stage of sparse matrix-matrix multiply");
809 
810   /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */
811   ierr = MatSeqAIJMKL_update_from_mkl_handle(C);CHKERRQ(ierr);
812 
813   PetscFunctionReturn(0);
814 }
815 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
816 
817 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
818 PetscErrorCode MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat*C)
819 {
820   Mat_SeqAIJMKL    *a, *b;
821   sparse_matrix_t  csrA, csrB, csrC;
822   PetscErrorCode   ierr;
823   sparse_status_t  stat = SPARSE_STATUS_SUCCESS;
824   PetscObjectState state;
825 
826   PetscFunctionBegin;
827   a = (Mat_SeqAIJMKL*)A->spptr;
828   b = (Mat_SeqAIJMKL*)B->spptr;
829   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
830   if (!a->sparse_optimized || a->state != state) {
831     MatSeqAIJMKL_create_mkl_handle(A);
832   }
833   ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr);
834   if (!b->sparse_optimized || b->state != state) {
835     MatSeqAIJMKL_create_mkl_handle(B);
836   }
837   csrA = a->csrA;
838   csrB = b->csrA;
839 
840   stat = mkl_sparse_spmm(SPARSE_OPERATION_TRANSPOSE,csrA,csrB,&csrC);
841   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete sparse matrix-matrix multiply");
842 
843   ierr = MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);CHKERRQ(ierr);
844 
845   PetscFunctionReturn(0);
846 }
847 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
848 
849 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
850 PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat P,Mat C)
851 {
852   Mat_SeqAIJMKL       *a, *p, *c;
853   sparse_matrix_t     csrA, csrP, csrC;
854   PetscBool           set, flag;
855   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
856   struct matrix_descr descr_type_sym;
857   PetscObjectState    state;
858   PetscErrorCode      ierr;
859 
860   PetscFunctionBegin;
861   ierr = MatIsSymmetricKnown(A,&set,&flag);
862   if (!set || (set && !flag)) {
863     ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,C);CHKERRQ(ierr);
864     PetscFunctionReturn(0);
865   }
866 
867   a = (Mat_SeqAIJMKL*)A->spptr;
868   p = (Mat_SeqAIJMKL*)P->spptr;
869   c = (Mat_SeqAIJMKL*)C->spptr;
870   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
871   if (!a->sparse_optimized || a->state != state) {
872     MatSeqAIJMKL_create_mkl_handle(A);
873   }
874   ierr = PetscObjectStateGet((PetscObject)P,&state);CHKERRQ(ierr);
875   if (!p->sparse_optimized || p->state != state) {
876     MatSeqAIJMKL_create_mkl_handle(P);
877   }
878   csrA = a->csrA;
879   csrP = p->csrA;
880   csrC = c->csrA;
881   descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
882   descr_type_sym.mode = SPARSE_FILL_MODE_LOWER;
883   descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
884 
885   /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
886   stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FINALIZE_MULT);
887   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to finalize mkl_sparse_sypr");
888 
889   /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */
890   ierr = MatSeqAIJMKL_update_from_mkl_handle(C);CHKERRQ(ierr);
891 
892   PetscFunctionReturn(0);
893 }
894 #endif
895 
896 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
897 PetscErrorCode MatPtAP_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
898 {
899   Mat_SeqAIJMKL       *a, *p;
900   sparse_matrix_t     csrA, csrP, csrC;
901   PetscBool           set, flag;
902   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
903   struct matrix_descr descr_type_sym;
904   PetscObjectState    state;
905   PetscErrorCode      ierr;
906 
907   PetscFunctionBegin;
908   ierr = MatIsSymmetricKnown(A,&set,&flag);
909   if (!set || (set && !flag)) {
910     ierr = MatPtAP_SeqAIJ_SeqAIJ(A,P,scall,fill,C);CHKERRQ(ierr);
911     PetscFunctionReturn(0);
912   }
913 
914   if (scall == MAT_REUSE_MATRIX) {
915     ierr = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2(A,P,*C);CHKERRQ(ierr);
916     PetscFunctionReturn(0);
917   }
918 
919   a = (Mat_SeqAIJMKL*)A->spptr;
920   p = (Mat_SeqAIJMKL*)P->spptr;
921   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
922   if (!a->sparse_optimized || a->state != state) {
923     MatSeqAIJMKL_create_mkl_handle(A);
924   }
925   ierr = PetscObjectStateGet((PetscObject)P,&state);CHKERRQ(ierr);
926   if (!p->sparse_optimized || p->state != state) {
927     MatSeqAIJMKL_create_mkl_handle(P);
928   }
929   csrA = a->csrA;
930   csrP = p->csrA;
931   descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
932   descr_type_sym.mode = SPARSE_FILL_MODE_LOWER;
933   descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
934 
935   /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
936   stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_sym,&csrC,SPARSE_STAGE_FULL_MULT);
937   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete full mkl_sparse_sypr");
938 
939   ierr = MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);CHKERRQ(ierr);
940   ierr = MatSetOption(*C,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
941 
942   PetscFunctionReturn(0);
943 }
944 #endif
945 
946 /* These function prototypes are needed in MatConvert_SeqAIJ_SeqAIJMKL(), below. */
947 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*);
948 #if defined(PETSC_HAVE_HYPRE)
949 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*);
950 #endif
951 
952 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a
953  * SeqAIJMKL matrix.  This routine is called by the MatCreate_SeqAIJMKL()
954  * routine, but can also be used to convert an assembled SeqAIJ matrix
955  * into a SeqAIJMKL one. */
956 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
957 {
958   PetscErrorCode ierr;
959   Mat            B = *newmat;
960   Mat_SeqAIJMKL  *aijmkl;
961   PetscBool      set;
962   PetscBool      sametype;
963 
964   PetscFunctionBegin;
965   if (reuse == MAT_INITIAL_MATRIX) {
966     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
967   }
968 
969   ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr);
970   if (sametype) PetscFunctionReturn(0);
971 
972   ierr     = PetscNewLog(B,&aijmkl);CHKERRQ(ierr);
973   B->spptr = (void*) aijmkl;
974 
975   /* Set function pointers for methods that we inherit from AIJ but override.
976    * We also parse some command line options below, since those determine some of the methods we point to. */
977   B->ops->duplicate        = MatDuplicate_SeqAIJMKL;
978   B->ops->assemblyend      = MatAssemblyEnd_SeqAIJMKL;
979   B->ops->destroy          = MatDestroy_SeqAIJMKL;
980 
981   aijmkl->sparse_optimized = PETSC_FALSE;
982 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
983   aijmkl->no_SpMV2 = PETSC_FALSE;  /* Default to using the SpMV2 routines if our MKL supports them. */
984 #else
985   aijmkl->no_SpMV2 = PETSC_TRUE;
986 #endif
987   aijmkl->eager_inspection = PETSC_FALSE;
988 
989   /* Parse command line options. */
990   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");CHKERRQ(ierr);
991   ierr = PetscOptionsBool("-mat_aijmkl_no_spmv2","Disable use of inspector-executor (SpMV 2) routines","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);CHKERRQ(ierr);
992   ierr = PetscOptionsBool("-mat_aijmkl_eager_inspection","Run inspection at matrix assembly time, instead of waiting until needed by an operation","None",(PetscBool)aijmkl->eager_inspection,(PetscBool*)&aijmkl->eager_inspection,&set);CHKERRQ(ierr);
993   ierr = PetscOptionsEnd();CHKERRQ(ierr);
994 #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
995   if(!aijmkl->no_SpMV2) {
996     ierr = PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize();  defaulting to non-SpMV2 routines.\n");
997     aijmkl->no_SpMV2 = PETSC_TRUE;
998   }
999 #endif
1000 
1001 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1002   B->ops->mult             = MatMult_SeqAIJMKL_SpMV2;
1003   B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL_SpMV2;
1004   B->ops->multadd          = MatMultAdd_SeqAIJMKL_SpMV2;
1005   B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2;
1006   B->ops->matmult          = MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2;
1007 # if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
1008   B->ops->matmultnumeric   = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2;
1009 #   if !defined(PETSC_USE_COMPLEX)
1010   B->ops->ptap             = MatPtAP_SeqAIJMKL_SeqAIJMKL_SpMV2;
1011   B->ops->ptapnumeric      = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2;
1012 #   endif
1013 # endif
1014   B->ops->transposematmult = MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2;
1015 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
1016 
1017 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
1018   /* In MKL version 18, update 2, the old sparse BLAS interfaces were marked as deprecated. If "no_SpMV2" has been specified by the
1019    * user and the old SpBLAS interfaces are deprecated in our MKL version, we use the new _SpMV2 routines (set above), but do not
1020    * call mkl_sparse_optimize(), which results in the old numerical kernels (without the inspector-executor model) being used. For
1021    * versions in which the older interface has not been deprecated, we use the old interface. */
1022   if (aijmkl->no_SpMV2) {
1023     B->ops->mult             = MatMult_SeqAIJMKL;
1024     B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL;
1025     B->ops->multadd          = MatMultAdd_SeqAIJMKL;
1026     B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL;
1027   }
1028 #endif
1029 
1030   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr);
1031   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",MatMatMult_SeqDense_SeqAIJ);CHKERRQ(ierr);
1032   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",MatMatMultSymbolic_SeqDense_SeqAIJ);CHKERRQ(ierr);
1033   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",MatMatMultNumeric_SeqDense_SeqAIJ);CHKERRQ(ierr);
1034   ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_seqaijmkl_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr);
1035 #if defined(PETSC_HAVE_HYPRE)
1036   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_seqaijmkl_seqaijmkl_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr);
1037 #endif
1038   if(!aijmkl->no_SpMV2) {
1039 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1040     ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqaijmkl_seqaijmkl_C",MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2);CHKERRQ(ierr);
1041 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
1042     ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqaijmkl_seqaijmkl_C",MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2);CHKERRQ(ierr);
1043 #endif
1044     ierr = PetscObjectComposeFunction((PetscObject)B,"MatTransposeMatMult_seqaijmkl_seqaijmkl_C",MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2);CHKERRQ(ierr);
1045 #endif
1046   }
1047 
1048   ierr    = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr);
1049   *newmat = B;
1050   PetscFunctionReturn(0);
1051 }
1052 
1053 /*@C
1054    MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL.
1055    This type inherits from AIJ and is largely identical, but uses sparse BLAS
1056    routines from Intel MKL whenever possible.
1057    If the installed version of MKL supports the "SpMV2" sparse
1058    inspector-executor routines, then those are used by default.
1059    MatMult, MatMultAdd, MatMultTranspose, MatMultTransposeAdd, MatMatMult, MatTransposeMatMult, and MatPtAP (for
1060    symmetric A) operations are currently supported.
1061    Note that MKL version 18, update 2 or later is required for MatPtAP/MatPtAPNumeric and MatMatMultNumeric.
1062 
1063    Collective
1064 
1065    Input Parameters:
1066 +  comm - MPI communicator, set to PETSC_COMM_SELF
1067 .  m - number of rows
1068 .  n - number of columns
1069 .  nz - number of nonzeros per row (same for all rows)
1070 -  nnz - array containing the number of nonzeros in the various rows
1071          (possibly different for each row) or NULL
1072 
1073    Output Parameter:
1074 .  A - the matrix
1075 
1076    Options Database Keys:
1077 +  -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines
1078 -  -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, performing this step the first time the matrix is applied
1079 
1080    Notes:
1081    If nnz is given then nz is ignored
1082 
1083    Level: intermediate
1084 
1085 .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues()
1086 @*/
1087 PetscErrorCode  MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
1088 {
1089   PetscErrorCode ierr;
1090 
1091   PetscFunctionBegin;
1092   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1093   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
1094   ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr);
1095   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr);
1096   PetscFunctionReturn(0);
1097 }
1098 
1099 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A)
1100 {
1101   PetscErrorCode ierr;
1102 
1103   PetscFunctionBegin;
1104   ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
1105   ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
1106   PetscFunctionReturn(0);
1107 }
1108