xref: /petsc/src/mat/impls/aij/seq/aijmkl/aijmkl.c (revision e8be1fc79ed5d3947947e3bba595c90a60c3c93e)
1 /*
2   Defines basic operations for the MATSEQAIJMKL matrix class.
3   This class is derived from the MATSEQAIJ class and retains the
4   compressed row storage (aka Yale sparse matrix format) but uses
5   sparse BLAS operations from the Intel Math Kernel Library (MKL)
6   wherever possible.
7 */
8 
9 #include <../src/mat/impls/aij/seq/aij.h>
10 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h>
11 
12 /* MKL include files. */
13 #include <mkl_spblas.h>  /* Sparse BLAS */
14 
15 typedef struct {
16   PetscBool no_SpMV2;  /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */
17   PetscBool eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */
18   PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
19   PetscObjectState state;
20 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
21   sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */
22   struct matrix_descr descr;
23 #endif
24 } Mat_SeqAIJMKL;
25 
26 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType);
27 
28 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat)
29 {
30   /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */
31   /* so we will ignore 'MatType type'. */
32   PetscErrorCode ierr;
33   Mat            B       = *newmat;
34 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
35   Mat_SeqAIJMKL  *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
36 #endif
37 
38   PetscFunctionBegin;
39   if (reuse == MAT_INITIAL_MATRIX) {
40     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
41   }
42 
43   /* Reset the original function pointers. */
44   B->ops->duplicate        = MatDuplicate_SeqAIJ;
45   B->ops->assemblyend      = MatAssemblyEnd_SeqAIJ;
46   B->ops->destroy          = MatDestroy_SeqAIJ;
47   B->ops->mult             = MatMult_SeqAIJ;
48   B->ops->multtranspose    = MatMultTranspose_SeqAIJ;
49   B->ops->multadd          = MatMultAdd_SeqAIJ;
50   B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ;
51   B->ops->matmult          = MatMatMult_SeqAIJ_SeqAIJ;
52   B->ops->matmultnumeric   = MatMatMultNumeric_SeqAIJ_SeqAIJ;
53   B->ops->transposematmult = MatTransposeMatMult_SeqAIJ_SeqAIJ;
54   B->ops->scale            = MatScale_SeqAIJ;
55   B->ops->diagonalscale    = MatDiagonalScale_SeqAIJ;
56   B->ops->diagonalset      = MatDiagonalSet_SeqAIJ;
57   B->ops->axpy             = MatAXPY_SeqAIJ;
58 
59   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);CHKERRQ(ierr);
60   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr);
61   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr);
62   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr);
63 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
64   if(!aijmkl->no_SpMV2) {
65     ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr);
66 #ifdef PETSC_HAVE_MKL_SPARSE_SP2M
67     ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr);
68 #endif
69     ierr = PetscObjectComposeFunction((PetscObject)B,"MatTransposeMatMult_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr);
70   }
71 
72   /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this
73    * simply involves destroying the MKL sparse matrix handle and then freeing
74    * the spptr pointer. */
75   if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr;
76 
77   if (aijmkl->sparse_optimized) {
78     sparse_status_t stat;
79     stat = mkl_sparse_destroy(aijmkl->csrA);
80     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set hints/complete mkl_sparse_optimize");
81   }
82 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
83   ierr = PetscFree(B->spptr);CHKERRQ(ierr);
84 
85   /* Change the type of B to MATSEQAIJ. */
86   ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr);
87 
88   *newmat = B;
89   PetscFunctionReturn(0);
90 }
91 
92 PetscErrorCode MatDestroy_SeqAIJMKL(Mat A)
93 {
94   PetscErrorCode ierr;
95   Mat_SeqAIJMKL  *aijmkl = (Mat_SeqAIJMKL*) A->spptr;
96 
97   PetscFunctionBegin;
98 
99   /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an
100    * spptr pointer. */
101   if (aijmkl) {
102     /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */
103 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
104     if (aijmkl->sparse_optimized) {
105       sparse_status_t stat = SPARSE_STATUS_SUCCESS;
106       stat = mkl_sparse_destroy(aijmkl->csrA);
107       if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy");
108     }
109 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
110     ierr = PetscFree(A->spptr);CHKERRQ(ierr);
111   }
112 
113   /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
114    * to destroy everything that remains. */
115   ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr);
116   /* Note that I don't call MatSetType().  I believe this is because that
117    * is only to be called when *building* a matrix.  I could be wrong, but
118    * that is how things work for the SuperLU matrix class. */
119   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
120   PetscFunctionReturn(0);
121 }
122 
123 /* MatSeqAIJKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it,
124  * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize().
125  * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix
126  * handle, creates a new one, and then calls mkl_sparse_optimize().
127  * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been
128  * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of
129  * an unoptimized matrix handle here. */
130 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A)
131 {
132 #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
133   /* If the MKL library does not have mkl_sparse_optimize(), then this routine
134    * does nothing. We make it callable anyway in this case because it cuts
135    * down on littering the code with #ifdefs. */
136   PetscFunctionBegin;
137   PetscFunctionReturn(0);
138 #else
139   Mat_SeqAIJ       *a = (Mat_SeqAIJ*)A->data;
140   Mat_SeqAIJMKL    *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
141   PetscInt         m,n;
142   MatScalar        *aa;
143   PetscInt         *aj,*ai;
144   sparse_status_t  stat;
145   PetscErrorCode   ierr;
146   PetscObjectState state;
147 
148   PetscFunctionBegin;
149   if (aijmkl->no_SpMV2) PetscFunctionReturn(0);
150 
151   if (aijmkl->sparse_optimized) {
152     /* Matrix has been previously assembled and optimized. Must destroy old
153      * matrix handle before running the optimization step again. */
154     stat = mkl_sparse_destroy(aijmkl->csrA);
155     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy");
156   }
157   aijmkl->sparse_optimized = PETSC_FALSE;
158 
159   /* Now perform the SpMV2 setup and matrix optimization. */
160   aijmkl->descr.type        = SPARSE_MATRIX_TYPE_GENERAL;
161   aijmkl->descr.mode        = SPARSE_FILL_MODE_LOWER;
162   aijmkl->descr.diag        = SPARSE_DIAG_NON_UNIT;
163   m = A->rmap->n;
164   n = A->cmap->n;
165   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
166   aa   = a->a;  /* Nonzero elements stored row-by-row. */
167   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
168   if ((a->nz!=0) & !(A->structure_only)) {
169     /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
170      * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
171     stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa);
172     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle");
173     stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
174     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set mv_hint");
175     stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
176     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set memory_hint");
177     stat = mkl_sparse_optimize(aijmkl->csrA);
178     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_optimize");
179     aijmkl->sparse_optimized = PETSC_TRUE;
180   }
181   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
182 
183   PetscFunctionReturn(0);
184 #endif
185 }
186 
187 /* MatSeqAIJMKL_create_from_mkl_handle() creates a sequential AIJMKL matrix from an MKL sparse matrix handle.
188  * We need this to implement MatMatMult() using the MKL inspector-executor routines, which return an (unoptimized)
189  * matrix handle.
190  * Note: This routine simply destroys and replaces the original matrix if MAT_REUSE_MATRIX has been specified, as
191  * there is no good alternative. */
192 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
193 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,MatReuse reuse,Mat *mat)
194 {
195   PetscErrorCode      ierr;
196   sparse_status_t     stat;
197   sparse_index_base_t indexing;
198   PetscInt            nrows, ncols;
199   PetscInt            *aj,*ai,*dummy;
200   MatScalar           *aa;
201   Mat                 A;
202   Mat_SeqAIJMKL       *aijmkl;
203 
204   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
205   stat = mkl_sparse_x_export_csr(csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);
206   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");
207 
208   if (reuse == MAT_REUSE_MATRIX) {
209     ierr = MatDestroy(mat);CHKERRQ(ierr);
210   }
211   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
212   ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
213   ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,nrows,ncols);CHKERRQ(ierr);
214   /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported
215    * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and
216    * they will be destroyed when the MKL handle is destroyed.
217    * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */
218   ierr = MatSeqAIJSetPreallocationCSR(A,ai,aj,aa);CHKERRQ(ierr);
219 
220   /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle.
221    * Now turn it into a MATSEQAIJMKL. */
222   ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
223 
224   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
225   aijmkl->csrA = csrA;
226 
227   /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but
228    * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one,
229    * and just need to be able to run the MKL optimization step. */
230   aijmkl->descr.type        = SPARSE_MATRIX_TYPE_GENERAL;
231   aijmkl->descr.mode        = SPARSE_FILL_MODE_LOWER;
232   aijmkl->descr.diag        = SPARSE_DIAG_NON_UNIT;
233   stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
234   stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
235   stat = mkl_sparse_optimize(aijmkl->csrA);
236   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set hints/complete mkl_sparse_optimize");
237   aijmkl->sparse_optimized = PETSC_TRUE;
238 
239   *mat = A;
240   PetscFunctionReturn(0);
241 }
242 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
243 
244 /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle.
245  * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in
246  * MatMatMultNumeric(). */
247 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
248 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A)
249 {
250   PetscInt            i;
251   PetscInt            nrows,ncols;
252   PetscInt            nz;
253   PetscInt            *ai,*aj,*dummy;
254   PetscScalar         *aa;
255   PetscErrorCode      ierr;
256   Mat_SeqAIJMKL       *aijmkl;
257   sparse_status_t     stat;
258   sparse_index_base_t indexing;
259 
260   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
261 
262   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
263   stat = mkl_sparse_x_export_csr(aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);
264   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");
265 
266   /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc
267    * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */
268   for (i=0; i<nrows; i++) {
269     nz = ai[i+1] - ai[i];
270     ierr = MatSetValues_SeqAIJ(A, 1, &i, nz, aj+ai[i], aa+ai[i], INSERT_VALUES);CHKERRQ(ierr);
271   }
272 
273   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
274   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
275 
276   PetscFunctionReturn(0);
277 }
278 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
279 
280 PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
281 {
282   PetscErrorCode ierr;
283   Mat_SeqAIJMKL  *aijmkl;
284   Mat_SeqAIJMKL  *aijmkl_dest;
285 
286   PetscFunctionBegin;
287   ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr);
288   aijmkl      = (Mat_SeqAIJMKL*) A->spptr;
289   aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr;
290   ierr = PetscMemcpy(aijmkl_dest,aijmkl,sizeof(Mat_SeqAIJMKL));CHKERRQ(ierr);
291   aijmkl_dest->sparse_optimized = PETSC_FALSE;
292   if (aijmkl->eager_inspection) {
293     ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
294   }
295   PetscFunctionReturn(0);
296 }
297 
298 PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode)
299 {
300   PetscErrorCode  ierr;
301   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
302   Mat_SeqAIJMKL   *aijmkl;
303 
304   PetscFunctionBegin;
305   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
306 
307   /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some
308    * extra information and some different methods, call the AssemblyEnd
309    * routine for a MATSEQAIJ.
310    * I'm not sure if this is the best way to do this, but it avoids
311    * a lot of code duplication. */
312   a->inode.use = PETSC_FALSE;  /* Must disable: otherwise the MKL routines won't get used. */
313   ierr = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr);
314 
315   /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks).
316    * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */
317   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
318   if (aijmkl->eager_inspection) {
319     ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
320 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
321   } else if (aijmkl->sparse_optimized) {
322     /* If doing lazy inspection and there is an optimized MKL handle, we need to destroy it, so that it will be
323      * rebuilt later when needed. Otherwise, some SeqAIJ implementations that we depend on for some operations
324      * (such as MatMatMultNumeric()) can modify the result matrix without the matrix handle being rebuilt.
325      * (The SeqAIJ version MatMatMultNumeric() knows nothing about matrix handles, but it *does* call MatAssemblyEnd().) */
326     sparse_status_t stat = mkl_sparse_destroy(aijmkl->csrA);
327     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy");
328     aijmkl->sparse_optimized = PETSC_FALSE;
329 #endif
330   }
331 
332   PetscFunctionReturn(0);
333 }
334 
335 PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy)
336 {
337   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
338   const PetscScalar *x;
339   PetscScalar       *y;
340   const MatScalar   *aa;
341   PetscErrorCode    ierr;
342   PetscInt          m=A->rmap->n;
343   PetscInt          n=A->cmap->n;
344   PetscScalar       alpha = 1.0;
345   PetscScalar       beta = 0.0;
346   const PetscInt    *aj,*ai;
347   char              matdescra[6];
348 
349 
350   /* Variables not in MatMult_SeqAIJ. */
351   char transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
352 
353   PetscFunctionBegin;
354   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
355   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
356   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
357   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
358   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
359   aa   = a->a;  /* Nonzero elements stored row-by-row. */
360   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
361 
362   /* Call MKL sparse BLAS routine to do the MatMult. */
363   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
364 
365   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
366   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
367   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
368   PetscFunctionReturn(0);
369 }
370 
371 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
372 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
373 {
374   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
375   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
376   const PetscScalar *x;
377   PetscScalar       *y;
378   PetscErrorCode    ierr;
379   sparse_status_t   stat = SPARSE_STATUS_SUCCESS;
380   PetscObjectState  state;
381 
382   PetscFunctionBegin;
383 
384   /* If there are no nonzero entries, zero yy and return immediately. */
385   if(!a->nz) {
386     PetscInt i;
387     PetscInt m=A->rmap->n;
388     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
389     for (i=0; i<m; i++) {
390       y[i] = 0.0;
391     }
392     ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
393     PetscFunctionReturn(0);
394   }
395 
396   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
397   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
398 
399   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
400    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
401    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
402   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
403   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
404     MatSeqAIJMKL_create_mkl_handle(A);
405   }
406 
407   /* Call MKL SpMV2 executor routine to do the MatMult. */
408   stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
409   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
410 
411   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
412   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
413   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
414   PetscFunctionReturn(0);
415 }
416 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
417 
418 PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy)
419 {
420   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
421   const PetscScalar *x;
422   PetscScalar       *y;
423   const MatScalar   *aa;
424   PetscErrorCode    ierr;
425   PetscInt          m=A->rmap->n;
426   PetscInt          n=A->cmap->n;
427   PetscScalar       alpha = 1.0;
428   PetscScalar       beta = 0.0;
429   const PetscInt    *aj,*ai;
430   char              matdescra[6];
431 
432   /* Variables not in MatMultTranspose_SeqAIJ. */
433   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
434 
435   PetscFunctionBegin;
436   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
437   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
438   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
439   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
440   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
441   aa   = a->a;  /* Nonzero elements stored row-by-row. */
442   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
443 
444   /* Call MKL sparse BLAS routine to do the MatMult. */
445   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
446 
447   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
448   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
449   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
450   PetscFunctionReturn(0);
451 }
452 
453 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
454 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
455 {
456   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
457   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
458   const PetscScalar *x;
459   PetscScalar       *y;
460   PetscErrorCode    ierr;
461   sparse_status_t   stat;
462   PetscObjectState  state;
463 
464   PetscFunctionBegin;
465 
466   /* If there are no nonzero entries, zero yy and return immediately. */
467   if(!a->nz) {
468     PetscInt i;
469     PetscInt n=A->cmap->n;
470     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
471     for (i=0; i<n; i++) {
472       y[i] = 0.0;
473     }
474     ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
475     PetscFunctionReturn(0);
476   }
477 
478   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
479   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
480 
481   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
482    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
483    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
484   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
485   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
486     MatSeqAIJMKL_create_mkl_handle(A);
487   }
488 
489   /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
490   stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
491   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
492 
493   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
494   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
495   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
496   PetscFunctionReturn(0);
497 }
498 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
499 
500 PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
501 {
502   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
503   const PetscScalar *x;
504   PetscScalar       *y,*z;
505   const MatScalar   *aa;
506   PetscErrorCode    ierr;
507   PetscInt          m=A->rmap->n;
508   PetscInt          n=A->cmap->n;
509   const PetscInt    *aj,*ai;
510   PetscInt          i;
511 
512   /* Variables not in MatMultAdd_SeqAIJ. */
513   char              transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
514   PetscScalar       alpha = 1.0;
515   PetscScalar       beta;
516   char              matdescra[6];
517 
518   PetscFunctionBegin;
519   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
520   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
521 
522   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
523   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
524   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
525   aa   = a->a;  /* Nonzero elements stored row-by-row. */
526   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
527 
528   /* Call MKL sparse BLAS routine to do the MatMult. */
529   if (zz == yy) {
530     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
531     beta = 1.0;
532     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
533   } else {
534     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
535      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
536     beta = 0.0;
537     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
538     for (i=0; i<m; i++) {
539       z[i] += y[i];
540     }
541   }
542 
543   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
544   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
545   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
546   PetscFunctionReturn(0);
547 }
548 
549 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
550 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
551 {
552   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
553   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
554   const PetscScalar *x;
555   PetscScalar       *y,*z;
556   PetscErrorCode    ierr;
557   PetscInt          m=A->rmap->n;
558   PetscInt          i;
559 
560   /* Variables not in MatMultAdd_SeqAIJ. */
561   sparse_status_t   stat = SPARSE_STATUS_SUCCESS;
562   PetscObjectState  state;
563 
564   PetscFunctionBegin;
565 
566   /* If there are no nonzero entries, set zz = yy and return immediately. */
567   if(!a->nz) {
568     PetscInt i;
569     ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
570     for (i=0; i<m; i++) {
571       z[i] = y[i];
572     }
573     ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
574     PetscFunctionReturn(0);
575   }
576 
577   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
578   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
579 
580   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
581    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
582    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
583   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
584   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
585     MatSeqAIJMKL_create_mkl_handle(A);
586   }
587 
588   /* Call MKL sparse BLAS routine to do the MatMult. */
589   if (zz == yy) {
590     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
591      * with alpha and beta both set to 1.0. */
592     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
593     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
594   } else {
595     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
596      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
597     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
598     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
599     for (i=0; i<m; i++) {
600       z[i] += y[i];
601     }
602   }
603 
604   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
605   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
606   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
607   PetscFunctionReturn(0);
608 }
609 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
610 
611 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
612 {
613   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
614   const PetscScalar *x;
615   PetscScalar       *y,*z;
616   const MatScalar   *aa;
617   PetscErrorCode    ierr;
618   PetscInt          m=A->rmap->n;
619   PetscInt          n=A->cmap->n;
620   const PetscInt    *aj,*ai;
621   PetscInt          i;
622 
623   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
624   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
625   PetscScalar       alpha = 1.0;
626   PetscScalar       beta;
627   char              matdescra[6];
628 
629   PetscFunctionBegin;
630   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
631   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
632 
633   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
634   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
635   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
636   aa   = a->a;  /* Nonzero elements stored row-by-row. */
637   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
638 
639   /* Call MKL sparse BLAS routine to do the MatMult. */
640   if (zz == yy) {
641     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
642     beta = 1.0;
643     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
644   } else {
645     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
646      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
647     beta = 0.0;
648     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
649     for (i=0; i<n; i++) {
650       z[i] += y[i];
651     }
652   }
653 
654   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
655   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
656   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
657   PetscFunctionReturn(0);
658 }
659 
660 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
661 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
662 {
663   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
664   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
665   const PetscScalar *x;
666   PetscScalar       *y,*z;
667   PetscErrorCode    ierr;
668   PetscInt          n=A->cmap->n;
669   PetscInt          i;
670   PetscObjectState  state;
671 
672   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
673   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
674 
675   PetscFunctionBegin;
676 
677   /* If there are no nonzero entries, set zz = yy and return immediately. */
678   if(!a->nz) {
679     PetscInt i;
680     ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
681     for (i=0; i<n; i++) {
682       z[i] = y[i];
683     }
684     ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
685     PetscFunctionReturn(0);
686   }
687 
688   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
689   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
690 
691   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
692    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
693    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
694   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
695   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
696     MatSeqAIJMKL_create_mkl_handle(A);
697   }
698 
699   /* Call MKL sparse BLAS routine to do the MatMult. */
700   if (zz == yy) {
701     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
702      * with alpha and beta both set to 1.0. */
703     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
704     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
705   } else {
706     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
707      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
708     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
709     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
710     for (i=0; i<n; i++) {
711       z[i] += y[i];
712     }
713   }
714 
715   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
716   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
717   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
718   PetscFunctionReturn(0);
719 }
720 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
721 
722 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
723 /* Note that this code currently doesn't actually get used when MatMatMult() is called with MAT_REUSE_MATRIX, because
724  * the MatMatMult() interface code calls MatMatMultNumeric() in this case.
725  * MKL has no notion of separately callable symbolic vs. numeric phases of sparse matrix-matrix multiply, so in the
726  * MAT_REUSE_MATRIX case, the SeqAIJ routines end up being used. Even though this means that the (hopefully more
727  * optimized) MKL routines do not get used, this probably is best because the MKL routines would waste time re-computing
728  * the symbolic portion, whereas the native PETSc SeqAIJ routines will avoid this. */
729 PetscErrorCode MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat*C)
730 {
731   Mat_SeqAIJMKL    *a, *b;
732   sparse_matrix_t  csrA, csrB, csrC;
733   PetscErrorCode   ierr;
734   sparse_status_t  stat = SPARSE_STATUS_SUCCESS;
735   PetscObjectState state;
736 
737   PetscFunctionBegin;
738   a = (Mat_SeqAIJMKL*)A->spptr;
739   b = (Mat_SeqAIJMKL*)B->spptr;
740   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
741   if (!a->sparse_optimized || a->state != state) {
742     MatSeqAIJMKL_create_mkl_handle(A);
743   }
744   ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr);
745   if (!b->sparse_optimized || b->state != state) {
746     MatSeqAIJMKL_create_mkl_handle(B);
747   }
748   csrA = a->csrA;
749   csrB = b->csrA;
750 
751   stat = mkl_sparse_spmm(SPARSE_OPERATION_NON_TRANSPOSE,csrA,csrB,&csrC);
752   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete sparse matrix-matrix multiply");
753 
754   ierr = MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);CHKERRQ(ierr);
755 
756   PetscFunctionReturn(0);
757 }
758 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
759 
760 #ifdef PETSC_HAVE_MKL_SPARSE_SP2M
761 PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,Mat C)
762 {
763   Mat_SeqAIJMKL       *a, *b, *c;
764   sparse_matrix_t     csrA, csrB, csrC;
765   PetscErrorCode      ierr;
766   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
767   struct matrix_descr descr_type_gen;
768   PetscObjectState    state;
769 
770   PetscFunctionBegin;
771   a = (Mat_SeqAIJMKL*)A->spptr;
772   b = (Mat_SeqAIJMKL*)B->spptr;
773   c = (Mat_SeqAIJMKL*)C->spptr;
774   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
775   if (!a->sparse_optimized || a->state != state) {
776     MatSeqAIJMKL_create_mkl_handle(A);
777   }
778   ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr);
779   if (!b->sparse_optimized || b->state != state) {
780     MatSeqAIJMKL_create_mkl_handle(B);
781   }
782   csrA = a->csrA;
783   csrB = b->csrA;
784   csrC = c->csrA;
785   descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
786 
787   stat = mkl_sparse_sp2m(SPARSE_OPERATION_NON_TRANSPOSE,descr_type_gen,csrA,
788                          SPARSE_OPERATION_NON_TRANSPOSE,descr_type_gen,csrB,
789                          SPARSE_STAGE_FINALIZE_MULT,&csrC);
790 
791   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete numerical stage of sparse matrix-matrix multiply");
792 
793   /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */
794   ierr = MatSeqAIJMKL_update_from_mkl_handle(A);CHKERRQ(ierr);
795 
796   PetscFunctionReturn(0);
797 }
798 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M */
799 
800 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
801 PetscErrorCode MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat*C)
802 {
803   Mat_SeqAIJMKL    *a, *b;
804   sparse_matrix_t  csrA, csrB, csrC;
805   PetscErrorCode   ierr;
806   sparse_status_t  stat = SPARSE_STATUS_SUCCESS;
807   PetscObjectState state;
808 
809   PetscFunctionBegin;
810   a = (Mat_SeqAIJMKL*)A->spptr;
811   b = (Mat_SeqAIJMKL*)B->spptr;
812   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
813   if (!a->sparse_optimized || a->state != state) {
814     MatSeqAIJMKL_create_mkl_handle(A);
815   }
816   ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr);
817   if (!b->sparse_optimized || b->state != state) {
818     MatSeqAIJMKL_create_mkl_handle(B);
819   }
820   csrA = a->csrA;
821   csrB = b->csrA;
822 
823   stat = mkl_sparse_spmm(SPARSE_OPERATION_TRANSPOSE,csrA,csrB,&csrC);
824   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete sparse matrix-matrix multiply");
825 
826   ierr = MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);CHKERRQ(ierr);
827 
828   PetscFunctionReturn(0);
829 }
830 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
831 
832 PetscErrorCode MatScale_SeqAIJMKL(Mat inA,PetscScalar alpha)
833 {
834   PetscErrorCode ierr;
835 
836   PetscFunctionBegin;
837   ierr = MatScale_SeqAIJ(inA,alpha);CHKERRQ(ierr);
838   ierr = MatSeqAIJMKL_create_mkl_handle(inA);CHKERRQ(ierr);
839   PetscFunctionReturn(0);
840 }
841 
842 PetscErrorCode MatDiagonalScale_SeqAIJMKL(Mat A,Vec ll,Vec rr)
843 {
844   PetscErrorCode ierr;
845 
846   PetscFunctionBegin;
847   ierr = MatDiagonalScale_SeqAIJ(A,ll,rr);CHKERRQ(ierr);
848   ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
849   PetscFunctionReturn(0);
850 }
851 
852 PetscErrorCode MatDiagonalSet_SeqAIJMKL(Mat Y,Vec D,InsertMode is)
853 {
854   PetscErrorCode ierr;
855 
856   PetscFunctionBegin;
857   ierr = MatDiagonalSet_SeqAIJ(Y,D,is);CHKERRQ(ierr);
858   ierr = MatSeqAIJMKL_create_mkl_handle(Y);CHKERRQ(ierr);
859   PetscFunctionReturn(0);
860 }
861 
862 PetscErrorCode MatAXPY_SeqAIJMKL(Mat Y,PetscScalar a,Mat X,MatStructure str)
863 {
864   PetscErrorCode ierr;
865 
866   PetscFunctionBegin;
867   ierr = MatAXPY_SeqAIJ(Y,a,X,str);CHKERRQ(ierr);
868   if (str == SAME_NONZERO_PATTERN) {
869     /* MatAssemblyEnd() is not called if SAME_NONZERO_PATTERN, so we need to force update of the MKL matrix handle. */
870     ierr = MatSeqAIJMKL_create_mkl_handle(Y);CHKERRQ(ierr);
871   }
872   PetscFunctionReturn(0);
873 }
874 
875 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a
876  * SeqAIJMKL matrix.  This routine is called by the MatCreate_SeqMKLAIJ()
877  * routine, but can also be used to convert an assembled SeqAIJ matrix
878  * into a SeqAIJMKL one. */
879 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
880 {
881   PetscErrorCode ierr;
882   Mat            B = *newmat;
883   Mat_SeqAIJMKL  *aijmkl;
884   PetscBool      set;
885   PetscBool      sametype;
886 
887   PetscFunctionBegin;
888   if (reuse == MAT_INITIAL_MATRIX) {
889     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
890   }
891 
892   ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr);
893   if (sametype) PetscFunctionReturn(0);
894 
895   ierr     = PetscNewLog(B,&aijmkl);CHKERRQ(ierr);
896   B->spptr = (void*) aijmkl;
897 
898   /* Set function pointers for methods that we inherit from AIJ but override.
899    * We also parse some command line options below, since those determine some of the methods we point to. */
900   B->ops->duplicate        = MatDuplicate_SeqAIJMKL;
901   B->ops->assemblyend      = MatAssemblyEnd_SeqAIJMKL;
902   B->ops->destroy          = MatDestroy_SeqAIJMKL;
903 
904   aijmkl->sparse_optimized = PETSC_FALSE;
905 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
906   aijmkl->no_SpMV2 = PETSC_FALSE;  /* Default to using the SpMV2 routines if our MKL supports them. */
907 #else
908   aijmkl->no_SpMV2 = PETSC_TRUE;
909 #endif
910   aijmkl->eager_inspection = PETSC_FALSE;
911 
912   /* Parse command line options. */
913   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");CHKERRQ(ierr);
914   ierr = PetscOptionsBool("-mat_aijmkl_no_spmv2","NoSPMV2","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);CHKERRQ(ierr);
915   ierr = PetscOptionsBool("-mat_aijmkl_eager_inspection","Eager Inspection","None",(PetscBool)aijmkl->eager_inspection,(PetscBool*)&aijmkl->eager_inspection,&set);CHKERRQ(ierr);
916   ierr = PetscOptionsEnd();CHKERRQ(ierr);
917 #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
918   if(!aijmkl->no_SpMV2) {
919     ierr = PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize();  defaulting to non-SpMV2 routines.\n");
920     aijmkl->no_SpMV2 = PETSC_TRUE;
921   }
922 #endif
923 
924   if(!aijmkl->no_SpMV2) {
925 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
926     B->ops->mult             = MatMult_SeqAIJMKL_SpMV2;
927     B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL_SpMV2;
928     B->ops->multadd          = MatMultAdd_SeqAIJMKL_SpMV2;
929     B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2;
930     B->ops->matmult          = MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2;
931 #ifdef PETSC_HAVE_MKL_SPARSE_SP2M
932     B->ops->matmultnumeric   = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2;
933 #endif
934     B->ops->transposematmult = MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2;
935 #endif
936   } else {
937     B->ops->mult             = MatMult_SeqAIJMKL;
938     B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL;
939     B->ops->multadd          = MatMultAdd_SeqAIJMKL;
940     B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL;
941   }
942 
943   B->ops->scale              = MatScale_SeqAIJMKL;
944   B->ops->diagonalscale      = MatDiagonalScale_SeqAIJMKL;
945   B->ops->diagonalset        = MatDiagonalSet_SeqAIJMKL;
946   B->ops->axpy               = MatAXPY_SeqAIJMKL;
947 
948   ierr = PetscObjectComposeFunction((PetscObject)B,"MatScale_SeqAIJMKL_C",MatScale_SeqAIJMKL);CHKERRQ(ierr);
949   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr);
950   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",MatMatMult_SeqDense_SeqAIJ);CHKERRQ(ierr);
951   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",MatMatMultSymbolic_SeqDense_SeqAIJ);CHKERRQ(ierr);
952   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",MatMatMultNumeric_SeqDense_SeqAIJ);CHKERRQ(ierr);
953   if(!aijmkl->no_SpMV2) {
954 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
955     ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqaijmkl_seqaijmkl_C",MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2);CHKERRQ(ierr);
956 #ifdef PETSC_HAVE_MKL_SPARSE_SP2M
957     ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqaijmkl_seqaijmkl_C",MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2);CHKERRQ(ierr);
958 #endif
959     ierr = PetscObjectComposeFunction((PetscObject)B,"MatTransposeMatMult_seqaijmkl_seqaijmkl_C",MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2);CHKERRQ(ierr);
960 #endif
961   }
962 
963   ierr    = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr);
964   *newmat = B;
965   PetscFunctionReturn(0);
966 }
967 
968 /*@C
969    MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL.
970    This type inherits from AIJ and is largely identical, but uses sparse BLAS
971    routines from Intel MKL whenever possible.
972    MatMult, MatMultAdd, MatMultTranspose, MatMultTransposeAdd, MatMatMult, and MatTransposeMatMult
973    operations are currently supported.
974    If the installed version of MKL supports the "SpMV2" sparse
975    inspector-executor routines, then those are used by default.
976 
977    Collective on MPI_Comm
978 
979    Input Parameters:
980 +  comm - MPI communicator, set to PETSC_COMM_SELF
981 .  m - number of rows
982 .  n - number of columns
983 .  nz - number of nonzeros per row (same for all rows)
984 -  nnz - array containing the number of nonzeros in the various rows
985          (possibly different for each row) or NULL
986 
987    Output Parameter:
988 .  A - the matrix
989 
990    Options Database Keys:
991 +  -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines
992 -  -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, performing this step the first time the matrix is applied
993 
994    Notes:
995    If nnz is given then nz is ignored
996 
997    Level: intermediate
998 
999 .keywords: matrix, MKL, sparse, parallel
1000 
1001 .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues()
1002 @*/
1003 PetscErrorCode  MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
1004 {
1005   PetscErrorCode ierr;
1006 
1007   PetscFunctionBegin;
1008   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1009   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
1010   ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr);
1011   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr);
1012   PetscFunctionReturn(0);
1013 }
1014 
1015 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A)
1016 {
1017   PetscErrorCode ierr;
1018 
1019   PetscFunctionBegin;
1020   ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
1021   ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
1022   PetscFunctionReturn(0);
1023 }
1024