xref: /petsc/src/mat/impls/aij/seq/aijmkl/aijmkl.c (revision 4f53af40d4298c4fada2e656b4dbeb55b230a855)
1 /*
2   Defines basic operations for the MATSEQAIJMKL matrix class.
3   This class is derived from the MATSEQAIJ class and retains the
4   compressed row storage (aka Yale sparse matrix format) but uses
5   sparse BLAS operations from the Intel Math Kernel Library (MKL)
6   wherever possible.
7 */
8 
9 #include <../src/mat/impls/aij/seq/aij.h>
10 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h>
11 
12 /* MKL include files. */
13 #include <mkl_spblas.h>  /* Sparse BLAS */
14 
15 typedef struct {
16   PetscBool no_SpMV2;  /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */
17   PetscBool eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */
18   PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
19   PetscObjectState state;
20 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
21   sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */
22   struct matrix_descr descr;
23 #endif
24 } Mat_SeqAIJMKL;
25 
26 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType);
27 
28 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat)
29 {
30   /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */
31   /* so we will ignore 'MatType type'. */
32   PetscErrorCode ierr;
33   Mat            B       = *newmat;
34 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
35   Mat_SeqAIJMKL  *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
36 #endif
37 
38   PetscFunctionBegin;
39   if (reuse == MAT_INITIAL_MATRIX) {
40     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
41   }
42 
43   /* Reset the original function pointers. */
44   B->ops->duplicate        = MatDuplicate_SeqAIJ;
45   B->ops->assemblyend      = MatAssemblyEnd_SeqAIJ;
46   B->ops->destroy          = MatDestroy_SeqAIJ;
47   B->ops->mult             = MatMult_SeqAIJ;
48   B->ops->multtranspose    = MatMultTranspose_SeqAIJ;
49   B->ops->multadd          = MatMultAdd_SeqAIJ;
50   B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ;
51   B->ops->matmult          = MatMatMult_SeqAIJ_SeqAIJ;
52   B->ops->matmultnumeric   = MatMatMultNumeric_SeqAIJ_SeqAIJ;
53   B->ops->ptap             = MatPtAP_SeqAIJ_SeqAIJ;
54   B->ops->ptapnumeric      = MatPtAPNumeric_SeqAIJ_SeqAIJ;
55   B->ops->transposematmult = MatTransposeMatMult_SeqAIJ_SeqAIJ;
56   B->ops->scale            = MatScale_SeqAIJ;
57   B->ops->diagonalscale    = MatDiagonalScale_SeqAIJ;
58   B->ops->diagonalset      = MatDiagonalSet_SeqAIJ;
59   B->ops->axpy             = MatAXPY_SeqAIJ;
60 
61   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);CHKERRQ(ierr);
62   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr);
63   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr);
64   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr);
65 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
66   if(!aijmkl->no_SpMV2) {
67     ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr);
68 #ifdef PETSC_HAVE_MKL_SPARSE_SP2M
69     ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr);
70 #endif
71     ierr = PetscObjectComposeFunction((PetscObject)B,"MatTransposeMatMult_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr);
72   }
73 
74   /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this
75    * simply involves destroying the MKL sparse matrix handle and then freeing
76    * the spptr pointer. */
77   if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr;
78 
79   if (aijmkl->sparse_optimized) {
80     sparse_status_t stat;
81     stat = mkl_sparse_destroy(aijmkl->csrA);
82     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set hints/complete mkl_sparse_optimize");
83   }
84 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
85   ierr = PetscFree(B->spptr);CHKERRQ(ierr);
86 
87   /* Change the type of B to MATSEQAIJ. */
88   ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr);
89 
90   *newmat = B;
91   PetscFunctionReturn(0);
92 }
93 
94 PetscErrorCode MatDestroy_SeqAIJMKL(Mat A)
95 {
96   PetscErrorCode ierr;
97   Mat_SeqAIJMKL  *aijmkl = (Mat_SeqAIJMKL*) A->spptr;
98 
99   PetscFunctionBegin;
100 
101   /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an
102    * spptr pointer. */
103   if (aijmkl) {
104     /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */
105 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
106     if (aijmkl->sparse_optimized) {
107       sparse_status_t stat = SPARSE_STATUS_SUCCESS;
108       stat = mkl_sparse_destroy(aijmkl->csrA);
109       if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy");
110     }
111 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
112     ierr = PetscFree(A->spptr);CHKERRQ(ierr);
113   }
114 
115   /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
116    * to destroy everything that remains. */
117   ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr);
118   /* Note that I don't call MatSetType().  I believe this is because that
119    * is only to be called when *building* a matrix.  I could be wrong, but
120    * that is how things work for the SuperLU matrix class. */
121   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
122   PetscFunctionReturn(0);
123 }
124 
125 /* MatSeqAIJKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it,
126  * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize().
127  * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix
128  * handle, creates a new one, and then calls mkl_sparse_optimize().
129  * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been
130  * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of
131  * an unoptimized matrix handle here. */
132 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A)
133 {
134 #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
135   /* If the MKL library does not have mkl_sparse_optimize(), then this routine
136    * does nothing. We make it callable anyway in this case because it cuts
137    * down on littering the code with #ifdefs. */
138   PetscFunctionBegin;
139   PetscFunctionReturn(0);
140 #else
141   Mat_SeqAIJ       *a = (Mat_SeqAIJ*)A->data;
142   Mat_SeqAIJMKL    *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
143   PetscInt         m,n;
144   MatScalar        *aa;
145   PetscInt         *aj,*ai;
146   sparse_status_t  stat;
147   PetscErrorCode   ierr;
148   PetscObjectState state;
149 
150   PetscFunctionBegin;
151   if (aijmkl->no_SpMV2) PetscFunctionReturn(0);
152 
153   if (aijmkl->sparse_optimized) {
154     /* Matrix has been previously assembled and optimized. Must destroy old
155      * matrix handle before running the optimization step again. */
156     stat = mkl_sparse_destroy(aijmkl->csrA);
157     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy");
158   }
159   aijmkl->sparse_optimized = PETSC_FALSE;
160 
161   /* Now perform the SpMV2 setup and matrix optimization. */
162   aijmkl->descr.type        = SPARSE_MATRIX_TYPE_GENERAL;
163   aijmkl->descr.mode        = SPARSE_FILL_MODE_LOWER;
164   aijmkl->descr.diag        = SPARSE_DIAG_NON_UNIT;
165   m = A->rmap->n;
166   n = A->cmap->n;
167   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
168   aa   = a->a;  /* Nonzero elements stored row-by-row. */
169   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
170   if ((a->nz!=0) & !(A->structure_only)) {
171     /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
172      * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
173     stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa);
174     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle");
175     stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
176     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set mv_hint");
177     stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
178     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set memory_hint");
179     stat = mkl_sparse_optimize(aijmkl->csrA);
180     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_optimize");
181     aijmkl->sparse_optimized = PETSC_TRUE;
182   }
183   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
184 
185   PetscFunctionReturn(0);
186 #endif
187 }
188 
189 /* MatSeqAIJMKL_create_from_mkl_handle() creates a sequential AIJMKL matrix from an MKL sparse matrix handle.
190  * We need this to implement MatMatMult() using the MKL inspector-executor routines, which return an (unoptimized)
191  * matrix handle.
192  * Note: This routine simply destroys and replaces the original matrix if MAT_REUSE_MATRIX has been specified, as
193  * there is no good alternative. */
194 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
195 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,MatReuse reuse,Mat *mat)
196 {
197   PetscErrorCode      ierr;
198   sparse_status_t     stat;
199   sparse_index_base_t indexing;
200   PetscInt            nrows, ncols;
201   PetscInt            *aj,*ai,*dummy;
202   MatScalar           *aa;
203   Mat                 A;
204   Mat_SeqAIJMKL       *aijmkl;
205 
206   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
207   stat = mkl_sparse_x_export_csr(csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);
208   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");
209 
210   if (reuse == MAT_REUSE_MATRIX) {
211     ierr = MatDestroy(mat);CHKERRQ(ierr);
212   }
213   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
214   ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
215   ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,nrows,ncols);CHKERRQ(ierr);
216   /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported
217    * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and
218    * they will be destroyed when the MKL handle is destroyed.
219    * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */
220   ierr = MatSeqAIJSetPreallocationCSR(A,ai,aj,aa);CHKERRQ(ierr);
221 
222   /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle.
223    * Now turn it into a MATSEQAIJMKL. */
224   ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
225 
226   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
227   aijmkl->csrA = csrA;
228 
229   /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but
230    * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one,
231    * and just need to be able to run the MKL optimization step. */
232   aijmkl->descr.type        = SPARSE_MATRIX_TYPE_GENERAL;
233   aijmkl->descr.mode        = SPARSE_FILL_MODE_LOWER;
234   aijmkl->descr.diag        = SPARSE_DIAG_NON_UNIT;
235   stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
236   stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
237   stat = mkl_sparse_optimize(aijmkl->csrA);
238   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set hints/complete mkl_sparse_optimize");
239   aijmkl->sparse_optimized = PETSC_TRUE;
240 
241   *mat = A;
242   PetscFunctionReturn(0);
243 }
244 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
245 
246 /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle.
247  * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in
248  * MatMatMultNumeric(). */
249 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
250 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A)
251 {
252   PetscInt            i;
253   PetscInt            nrows,ncols;
254   PetscInt            nz;
255   PetscInt            *ai,*aj,*dummy;
256   PetscScalar         *aa;
257   PetscErrorCode      ierr;
258   Mat_SeqAIJMKL       *aijmkl;
259   sparse_status_t     stat;
260   sparse_index_base_t indexing;
261 
262   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
263 
264   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
265   stat = mkl_sparse_x_export_csr(aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);
266   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");
267 
268   /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc
269    * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */
270   for (i=0; i<nrows; i++) {
271     nz = ai[i+1] - ai[i];
272     ierr = MatSetValues_SeqAIJ(A, 1, &i, nz, aj+ai[i], aa+ai[i], INSERT_VALUES);CHKERRQ(ierr);
273   }
274 
275   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
276   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
277 
278   PetscFunctionReturn(0);
279 }
280 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
281 
282 PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
283 {
284   PetscErrorCode ierr;
285   Mat_SeqAIJMKL  *aijmkl;
286   Mat_SeqAIJMKL  *aijmkl_dest;
287 
288   PetscFunctionBegin;
289   ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr);
290   aijmkl      = (Mat_SeqAIJMKL*) A->spptr;
291   aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr;
292   ierr = PetscMemcpy(aijmkl_dest,aijmkl,sizeof(Mat_SeqAIJMKL));CHKERRQ(ierr);
293   aijmkl_dest->sparse_optimized = PETSC_FALSE;
294   if (aijmkl->eager_inspection) {
295     ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
296   }
297   PetscFunctionReturn(0);
298 }
299 
300 PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode)
301 {
302   PetscErrorCode  ierr;
303   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
304   Mat_SeqAIJMKL   *aijmkl;
305 
306   PetscFunctionBegin;
307   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
308 
309   /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some
310    * extra information and some different methods, call the AssemblyEnd
311    * routine for a MATSEQAIJ.
312    * I'm not sure if this is the best way to do this, but it avoids
313    * a lot of code duplication. */
314   a->inode.use = PETSC_FALSE;  /* Must disable: otherwise the MKL routines won't get used. */
315   ierr = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr);
316 
317   /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks).
318    * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */
319   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
320   if (aijmkl->eager_inspection) {
321     ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
322   }
323 
324   PetscFunctionReturn(0);
325 }
326 
327 PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy)
328 {
329   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
330   const PetscScalar *x;
331   PetscScalar       *y;
332   const MatScalar   *aa;
333   PetscErrorCode    ierr;
334   PetscInt          m=A->rmap->n;
335   PetscInt          n=A->cmap->n;
336   PetscScalar       alpha = 1.0;
337   PetscScalar       beta = 0.0;
338   const PetscInt    *aj,*ai;
339   char              matdescra[6];
340 
341 
342   /* Variables not in MatMult_SeqAIJ. */
343   char transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
344 
345   PetscFunctionBegin;
346   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
347   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
348   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
349   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
350   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
351   aa   = a->a;  /* Nonzero elements stored row-by-row. */
352   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
353 
354   /* Call MKL sparse BLAS routine to do the MatMult. */
355   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
356 
357   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
358   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
359   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
360   PetscFunctionReturn(0);
361 }
362 
363 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
364 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
365 {
366   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
367   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
368   const PetscScalar *x;
369   PetscScalar       *y;
370   PetscErrorCode    ierr;
371   sparse_status_t   stat = SPARSE_STATUS_SUCCESS;
372   PetscObjectState  state;
373 
374   PetscFunctionBegin;
375 
376   /* If there are no nonzero entries, zero yy and return immediately. */
377   if(!a->nz) {
378     PetscInt i;
379     PetscInt m=A->rmap->n;
380     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
381     for (i=0; i<m; i++) {
382       y[i] = 0.0;
383     }
384     ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
385     PetscFunctionReturn(0);
386   }
387 
388   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
389   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
390 
391   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
392    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
393    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
394   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
395   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
396     MatSeqAIJMKL_create_mkl_handle(A);
397   }
398 
399   /* Call MKL SpMV2 executor routine to do the MatMult. */
400   stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
401   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
402 
403   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
404   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
405   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
406   PetscFunctionReturn(0);
407 }
408 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
409 
410 PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy)
411 {
412   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
413   const PetscScalar *x;
414   PetscScalar       *y;
415   const MatScalar   *aa;
416   PetscErrorCode    ierr;
417   PetscInt          m=A->rmap->n;
418   PetscInt          n=A->cmap->n;
419   PetscScalar       alpha = 1.0;
420   PetscScalar       beta = 0.0;
421   const PetscInt    *aj,*ai;
422   char              matdescra[6];
423 
424   /* Variables not in MatMultTranspose_SeqAIJ. */
425   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
426 
427   PetscFunctionBegin;
428   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
429   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
430   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
431   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
432   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
433   aa   = a->a;  /* Nonzero elements stored row-by-row. */
434   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
435 
436   /* Call MKL sparse BLAS routine to do the MatMult. */
437   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
438 
439   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
440   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
441   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
442   PetscFunctionReturn(0);
443 }
444 
445 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
446 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
447 {
448   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
449   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
450   const PetscScalar *x;
451   PetscScalar       *y;
452   PetscErrorCode    ierr;
453   sparse_status_t   stat;
454   PetscObjectState  state;
455 
456   PetscFunctionBegin;
457 
458   /* If there are no nonzero entries, zero yy and return immediately. */
459   if(!a->nz) {
460     PetscInt i;
461     PetscInt n=A->cmap->n;
462     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
463     for (i=0; i<n; i++) {
464       y[i] = 0.0;
465     }
466     ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
467     PetscFunctionReturn(0);
468   }
469 
470   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
471   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
472 
473   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
474    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
475    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
476   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
477   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
478     MatSeqAIJMKL_create_mkl_handle(A);
479   }
480 
481   /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
482   stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
483   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
484 
485   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
486   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
487   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
488   PetscFunctionReturn(0);
489 }
490 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
491 
492 PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
493 {
494   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
495   const PetscScalar *x;
496   PetscScalar       *y,*z;
497   const MatScalar   *aa;
498   PetscErrorCode    ierr;
499   PetscInt          m=A->rmap->n;
500   PetscInt          n=A->cmap->n;
501   const PetscInt    *aj,*ai;
502   PetscInt          i;
503 
504   /* Variables not in MatMultAdd_SeqAIJ. */
505   char              transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
506   PetscScalar       alpha = 1.0;
507   PetscScalar       beta;
508   char              matdescra[6];
509 
510   PetscFunctionBegin;
511   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
512   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
513 
514   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
515   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
516   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
517   aa   = a->a;  /* Nonzero elements stored row-by-row. */
518   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
519 
520   /* Call MKL sparse BLAS routine to do the MatMult. */
521   if (zz == yy) {
522     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
523     beta = 1.0;
524     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
525   } else {
526     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
527      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
528     beta = 0.0;
529     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
530     for (i=0; i<m; i++) {
531       z[i] += y[i];
532     }
533   }
534 
535   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
536   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
537   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
538   PetscFunctionReturn(0);
539 }
540 
541 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
542 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
543 {
544   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
545   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
546   const PetscScalar *x;
547   PetscScalar       *y,*z;
548   PetscErrorCode    ierr;
549   PetscInt          m=A->rmap->n;
550   PetscInt          i;
551 
552   /* Variables not in MatMultAdd_SeqAIJ. */
553   sparse_status_t   stat = SPARSE_STATUS_SUCCESS;
554   PetscObjectState  state;
555 
556   PetscFunctionBegin;
557 
558   /* If there are no nonzero entries, set zz = yy and return immediately. */
559   if(!a->nz) {
560     PetscInt i;
561     ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
562     for (i=0; i<m; i++) {
563       z[i] = y[i];
564     }
565     ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
566     PetscFunctionReturn(0);
567   }
568 
569   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
570   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
571 
572   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
573    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
574    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
575   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
576   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
577     MatSeqAIJMKL_create_mkl_handle(A);
578   }
579 
580   /* Call MKL sparse BLAS routine to do the MatMult. */
581   if (zz == yy) {
582     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
583      * with alpha and beta both set to 1.0. */
584     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
585     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
586   } else {
587     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
588      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
589     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
590     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
591     for (i=0; i<m; i++) {
592       z[i] += y[i];
593     }
594   }
595 
596   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
597   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
598   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
599   PetscFunctionReturn(0);
600 }
601 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
602 
603 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
604 {
605   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
606   const PetscScalar *x;
607   PetscScalar       *y,*z;
608   const MatScalar   *aa;
609   PetscErrorCode    ierr;
610   PetscInt          m=A->rmap->n;
611   PetscInt          n=A->cmap->n;
612   const PetscInt    *aj,*ai;
613   PetscInt          i;
614 
615   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
616   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
617   PetscScalar       alpha = 1.0;
618   PetscScalar       beta;
619   char              matdescra[6];
620 
621   PetscFunctionBegin;
622   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
623   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
624 
625   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
626   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
627   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
628   aa   = a->a;  /* Nonzero elements stored row-by-row. */
629   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
630 
631   /* Call MKL sparse BLAS routine to do the MatMult. */
632   if (zz == yy) {
633     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
634     beta = 1.0;
635     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
636   } else {
637     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
638      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
639     beta = 0.0;
640     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
641     for (i=0; i<n; i++) {
642       z[i] += y[i];
643     }
644   }
645 
646   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
647   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
648   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
649   PetscFunctionReturn(0);
650 }
651 
652 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
653 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
654 {
655   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
656   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
657   const PetscScalar *x;
658   PetscScalar       *y,*z;
659   PetscErrorCode    ierr;
660   PetscInt          n=A->cmap->n;
661   PetscInt          i;
662   PetscObjectState  state;
663 
664   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
665   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
666 
667   PetscFunctionBegin;
668 
669   /* If there are no nonzero entries, set zz = yy and return immediately. */
670   if(!a->nz) {
671     PetscInt i;
672     ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
673     for (i=0; i<n; i++) {
674       z[i] = y[i];
675     }
676     ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
677     PetscFunctionReturn(0);
678   }
679 
680   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
681   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
682 
683   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
684    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
685    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
686   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
687   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
688     MatSeqAIJMKL_create_mkl_handle(A);
689   }
690 
691   /* Call MKL sparse BLAS routine to do the MatMult. */
692   if (zz == yy) {
693     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
694      * with alpha and beta both set to 1.0. */
695     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
696     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
697   } else {
698     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
699      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
700     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
701     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
702     for (i=0; i<n; i++) {
703       z[i] += y[i];
704     }
705   }
706 
707   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
708   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
709   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
710   PetscFunctionReturn(0);
711 }
712 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
713 
714 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
715 /* Note that this code currently doesn't actually get used when MatMatMult() is called with MAT_REUSE_MATRIX, because
716  * the MatMatMult() interface code calls MatMatMultNumeric() in this case.
717  * For releases of MKL prior to version 18, update 2:
718  * MKL has no notion of separately callable symbolic vs. numeric phases of sparse matrix-matrix multiply, so in the
719  * MAT_REUSE_MATRIX case, the SeqAIJ routines end up being used. Even though this means that the (hopefully more
720  * optimized) MKL routines do not get used, this probably is best because the MKL routines would waste time re-computing
721  * the symbolic portion, whereas the native PETSc SeqAIJ routines will avoid this. */
722 PetscErrorCode MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat*C)
723 {
724   Mat_SeqAIJMKL    *a, *b;
725   sparse_matrix_t  csrA, csrB, csrC;
726   PetscErrorCode   ierr;
727   sparse_status_t  stat = SPARSE_STATUS_SUCCESS;
728   PetscObjectState state;
729 
730   PetscFunctionBegin;
731   a = (Mat_SeqAIJMKL*)A->spptr;
732   b = (Mat_SeqAIJMKL*)B->spptr;
733   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
734   if (!a->sparse_optimized || a->state != state) {
735     MatSeqAIJMKL_create_mkl_handle(A);
736   }
737   ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr);
738   if (!b->sparse_optimized || b->state != state) {
739     MatSeqAIJMKL_create_mkl_handle(B);
740   }
741   csrA = a->csrA;
742   csrB = b->csrA;
743 
744   stat = mkl_sparse_spmm(SPARSE_OPERATION_NON_TRANSPOSE,csrA,csrB,&csrC);
745   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete sparse matrix-matrix multiply");
746 
747   ierr = MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);CHKERRQ(ierr);
748 
749   PetscFunctionReturn(0);
750 }
751 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
752 
753 #ifdef PETSC_HAVE_MKL_SPARSE_SP2M
754 PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,Mat C)
755 {
756   Mat_SeqAIJMKL       *a, *b, *c;
757   sparse_matrix_t     csrA, csrB, csrC;
758   PetscErrorCode      ierr;
759   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
760   struct matrix_descr descr_type_gen;
761   PetscObjectState    state;
762 
763   PetscFunctionBegin;
764   a = (Mat_SeqAIJMKL*)A->spptr;
765   b = (Mat_SeqAIJMKL*)B->spptr;
766   c = (Mat_SeqAIJMKL*)C->spptr;
767   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
768   if (!a->sparse_optimized || a->state != state) {
769     MatSeqAIJMKL_create_mkl_handle(A);
770   }
771   ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr);
772   if (!b->sparse_optimized || b->state != state) {
773     MatSeqAIJMKL_create_mkl_handle(B);
774   }
775   csrA = a->csrA;
776   csrB = b->csrA;
777   csrC = c->csrA;
778   descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
779 
780   stat = mkl_sparse_sp2m(SPARSE_OPERATION_NON_TRANSPOSE,descr_type_gen,csrA,
781                          SPARSE_OPERATION_NON_TRANSPOSE,descr_type_gen,csrB,
782                          SPARSE_STAGE_FINALIZE_MULT,&csrC);
783 
784   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete numerical stage of sparse matrix-matrix multiply");
785 
786   /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */
787   ierr = MatSeqAIJMKL_update_from_mkl_handle(C);CHKERRQ(ierr);
788 
789   PetscFunctionReturn(0);
790 }
791 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M */
792 
793 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
794 PetscErrorCode MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat*C)
795 {
796   Mat_SeqAIJMKL    *a, *b;
797   sparse_matrix_t  csrA, csrB, csrC;
798   PetscErrorCode   ierr;
799   sparse_status_t  stat = SPARSE_STATUS_SUCCESS;
800   PetscObjectState state;
801 
802   PetscFunctionBegin;
803   a = (Mat_SeqAIJMKL*)A->spptr;
804   b = (Mat_SeqAIJMKL*)B->spptr;
805   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
806   if (!a->sparse_optimized || a->state != state) {
807     MatSeqAIJMKL_create_mkl_handle(A);
808   }
809   ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr);
810   if (!b->sparse_optimized || b->state != state) {
811     MatSeqAIJMKL_create_mkl_handle(B);
812   }
813   csrA = a->csrA;
814   csrB = b->csrA;
815 
816   stat = mkl_sparse_spmm(SPARSE_OPERATION_TRANSPOSE,csrA,csrB,&csrC);
817   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete sparse matrix-matrix multiply");
818 
819   ierr = MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);CHKERRQ(ierr);
820 
821   PetscFunctionReturn(0);
822 }
823 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
824 
825 #ifdef PETSC_HAVE_MKL_SPARSE_SP2M
826 PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat P,Mat C)
827 {
828   Mat_SeqAIJMKL       *a, *p, *c;
829   sparse_matrix_t     csrA, csrP, csrC;
830   PetscBool           set, flag;
831   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
832   struct matrix_descr descr_type_gen;
833   PetscObjectState    state;
834   PetscErrorCode      ierr;
835 
836   PetscFunctionBegin;
837   ierr = MatIsSymmetricKnown(A,&set,&flag);
838   if (!set || (set && !flag)) {
839     ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,C);CHKERRQ(ierr);
840     PetscFunctionReturn(0);
841   }
842 
843   a = (Mat_SeqAIJMKL*)A->spptr;
844   p = (Mat_SeqAIJMKL*)P->spptr;
845   c = (Mat_SeqAIJMKL*)C->spptr;
846   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
847   if (!a->sparse_optimized || a->state != state) {
848     MatSeqAIJMKL_create_mkl_handle(A);
849   }
850   ierr = PetscObjectStateGet((PetscObject)P,&state);CHKERRQ(ierr);
851   if (!p->sparse_optimized || p->state != state) {
852     MatSeqAIJMKL_create_mkl_handle(P);
853   }
854   csrA = a->csrA;
855   csrP = p->csrA;
856   csrC = c->csrA;
857   descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
858 
859   /* TODO: Below won't work for complex matrix. Protect this! Maybe where function pointers are assigned in MatConvert? */
860   stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_gen,&csrC,SPARSE_STAGE_FINALIZE_MULT);
861   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to finalize mkl_sparse_sypr");
862 
863   /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */
864   ierr = MatSeqAIJMKL_update_from_mkl_handle(C);CHKERRQ(ierr);
865 
866   PetscFunctionReturn(0);
867 }
868 #endif
869 
870 #ifdef PETSC_HAVE_MKL_SPARSE_SP2M
871 PetscErrorCode MatPtAP_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
872 {
873   Mat_SeqAIJMKL       *a, *p;
874   sparse_matrix_t     csrA, csrP, csrC;
875   PetscBool           set, flag;
876   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
877   struct matrix_descr descr_type_gen;
878   PetscObjectState    state;
879   PetscErrorCode      ierr;
880 
881   PetscFunctionBegin;
882   ierr = MatIsSymmetricKnown(A,&set,&flag);
883   if (!set || (set && !flag)) {
884     ierr = MatPtAP_SeqAIJ_SeqAIJ(A,P,scall,fill,C);CHKERRQ(ierr);
885     PetscFunctionReturn(0);
886   }
887 
888   if (scall == MAT_REUSE_MATRIX) {
889     ierr = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2(A,P,*C);CHKERRQ(ierr);
890     PetscFunctionReturn(0);
891   }
892 
893   a = (Mat_SeqAIJMKL*)A->spptr;
894   p = (Mat_SeqAIJMKL*)P->spptr;
895   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
896   if (!a->sparse_optimized || a->state != state) {
897     MatSeqAIJMKL_create_mkl_handle(A);
898   }
899   ierr = PetscObjectStateGet((PetscObject)P,&state);CHKERRQ(ierr);
900   if (!p->sparse_optimized || p->state != state) {
901     MatSeqAIJMKL_create_mkl_handle(P);
902   }
903   csrA = a->csrA;
904   csrP = p->csrA;
905   descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
906 
907   /* TODO: Below won't work for complex matrix. Protect this! Maybe where function pointers are assigned in MatConvert? */
908   stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_gen,&csrC,SPARSE_STAGE_FULL_MULT);
909   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete full mkl_sparse_sypr");
910 
911   ierr = MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);CHKERRQ(ierr);
912   ierr = MatSetOption(*C,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
913 
914   PetscFunctionReturn(0);
915 }
916 #endif
917 
918 PetscErrorCode MatScale_SeqAIJMKL(Mat inA,PetscScalar alpha)
919 {
920   PetscErrorCode ierr;
921 
922   PetscFunctionBegin;
923   ierr = MatScale_SeqAIJ(inA,alpha);CHKERRQ(ierr);
924   ierr = MatSeqAIJMKL_create_mkl_handle(inA);CHKERRQ(ierr);
925   PetscFunctionReturn(0);
926 }
927 
928 PetscErrorCode MatDiagonalScale_SeqAIJMKL(Mat A,Vec ll,Vec rr)
929 {
930   PetscErrorCode ierr;
931 
932   PetscFunctionBegin;
933   ierr = MatDiagonalScale_SeqAIJ(A,ll,rr);CHKERRQ(ierr);
934   ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
935   PetscFunctionReturn(0);
936 }
937 
938 PetscErrorCode MatDiagonalSet_SeqAIJMKL(Mat Y,Vec D,InsertMode is)
939 {
940   PetscErrorCode ierr;
941 
942   PetscFunctionBegin;
943   ierr = MatDiagonalSet_SeqAIJ(Y,D,is);CHKERRQ(ierr);
944   ierr = MatSeqAIJMKL_create_mkl_handle(Y);CHKERRQ(ierr);
945   PetscFunctionReturn(0);
946 }
947 
948 PetscErrorCode MatAXPY_SeqAIJMKL(Mat Y,PetscScalar a,Mat X,MatStructure str)
949 {
950   PetscErrorCode ierr;
951 
952   PetscFunctionBegin;
953   ierr = MatAXPY_SeqAIJ(Y,a,X,str);CHKERRQ(ierr);
954   if (str == SAME_NONZERO_PATTERN) {
955     /* MatAssemblyEnd() is not called if SAME_NONZERO_PATTERN, so we need to force update of the MKL matrix handle. */
956     ierr = MatSeqAIJMKL_create_mkl_handle(Y);CHKERRQ(ierr);
957   }
958   PetscFunctionReturn(0);
959 }
960 
961 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a
962  * SeqAIJMKL matrix.  This routine is called by the MatCreate_SeqMKLAIJ()
963  * routine, but can also be used to convert an assembled SeqAIJ matrix
964  * into a SeqAIJMKL one. */
965 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
966 {
967   PetscErrorCode ierr;
968   Mat            B = *newmat;
969   Mat_SeqAIJMKL  *aijmkl;
970   PetscBool      set;
971   PetscBool      sametype;
972 
973   PetscFunctionBegin;
974   if (reuse == MAT_INITIAL_MATRIX) {
975     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
976   }
977 
978   ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr);
979   if (sametype) PetscFunctionReturn(0);
980 
981   ierr     = PetscNewLog(B,&aijmkl);CHKERRQ(ierr);
982   B->spptr = (void*) aijmkl;
983 
984   /* Set function pointers for methods that we inherit from AIJ but override.
985    * We also parse some command line options below, since those determine some of the methods we point to. */
986   B->ops->duplicate        = MatDuplicate_SeqAIJMKL;
987   B->ops->assemblyend      = MatAssemblyEnd_SeqAIJMKL;
988   B->ops->destroy          = MatDestroy_SeqAIJMKL;
989 
990   aijmkl->sparse_optimized = PETSC_FALSE;
991 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
992   aijmkl->no_SpMV2 = PETSC_FALSE;  /* Default to using the SpMV2 routines if our MKL supports them. */
993 #else
994   aijmkl->no_SpMV2 = PETSC_TRUE;
995 #endif
996   aijmkl->eager_inspection = PETSC_FALSE;
997 
998   /* Parse command line options. */
999   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");CHKERRQ(ierr);
1000   ierr = PetscOptionsBool("-mat_aijmkl_no_spmv2","NoSPMV2","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);CHKERRQ(ierr);
1001   ierr = PetscOptionsBool("-mat_aijmkl_eager_inspection","Eager Inspection","None",(PetscBool)aijmkl->eager_inspection,(PetscBool*)&aijmkl->eager_inspection,&set);CHKERRQ(ierr);
1002   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1003 #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
1004   if(!aijmkl->no_SpMV2) {
1005     ierr = PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize();  defaulting to non-SpMV2 routines.\n");
1006     aijmkl->no_SpMV2 = PETSC_TRUE;
1007   }
1008 #endif
1009 
1010   if(!aijmkl->no_SpMV2) {
1011 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
1012     B->ops->mult             = MatMult_SeqAIJMKL_SpMV2;
1013     B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL_SpMV2;
1014     B->ops->multadd          = MatMultAdd_SeqAIJMKL_SpMV2;
1015     B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2;
1016     B->ops->matmult          = MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2;
1017 #ifdef PETSC_HAVE_MKL_SPARSE_SP2M
1018     B->ops->matmultnumeric   = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2;
1019 #ifndef PETSC_USE_COMPLEX
1020     B->ops->ptap             = MatPtAP_SeqAIJMKL_SeqAIJMKL_SpMV2;
1021     B->ops->ptapnumeric      = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2;
1022 #endif
1023 #endif
1024     B->ops->transposematmult = MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2;
1025 #endif
1026   } else {
1027     B->ops->mult             = MatMult_SeqAIJMKL;
1028     B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL;
1029     B->ops->multadd          = MatMultAdd_SeqAIJMKL;
1030     B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL;
1031   }
1032 
1033   B->ops->scale              = MatScale_SeqAIJMKL;
1034   B->ops->diagonalscale      = MatDiagonalScale_SeqAIJMKL;
1035   B->ops->diagonalset        = MatDiagonalSet_SeqAIJMKL;
1036   B->ops->axpy               = MatAXPY_SeqAIJMKL;
1037 
1038   ierr = PetscObjectComposeFunction((PetscObject)B,"MatScale_SeqAIJMKL_C",MatScale_SeqAIJMKL);CHKERRQ(ierr);
1039   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr);
1040   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",MatMatMult_SeqDense_SeqAIJ);CHKERRQ(ierr);
1041   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",MatMatMultSymbolic_SeqDense_SeqAIJ);CHKERRQ(ierr);
1042   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",MatMatMultNumeric_SeqDense_SeqAIJ);CHKERRQ(ierr);
1043   if(!aijmkl->no_SpMV2) {
1044 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
1045     ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqaijmkl_seqaijmkl_C",MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2);CHKERRQ(ierr);
1046 #ifdef PETSC_HAVE_MKL_SPARSE_SP2M
1047     ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqaijmkl_seqaijmkl_C",MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2);CHKERRQ(ierr);
1048 #endif
1049     ierr = PetscObjectComposeFunction((PetscObject)B,"MatTransposeMatMult_seqaijmkl_seqaijmkl_C",MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2);CHKERRQ(ierr);
1050 #endif
1051   }
1052 
1053   ierr    = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr);
1054   *newmat = B;
1055   PetscFunctionReturn(0);
1056 }
1057 
1058 /*@C
1059    MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL.
1060    This type inherits from AIJ and is largely identical, but uses sparse BLAS
1061    routines from Intel MKL whenever possible.
1062    MatMult, MatMultAdd, MatMultTranspose, MatMultTransposeAdd, MatMatMult, and MatTransposeMatMult
1063    operations are currently supported.
1064    If the installed version of MKL supports the "SpMV2" sparse
1065    inspector-executor routines, then those are used by default.
1066 
1067    Collective on MPI_Comm
1068 
1069    Input Parameters:
1070 +  comm - MPI communicator, set to PETSC_COMM_SELF
1071 .  m - number of rows
1072 .  n - number of columns
1073 .  nz - number of nonzeros per row (same for all rows)
1074 -  nnz - array containing the number of nonzeros in the various rows
1075          (possibly different for each row) or NULL
1076 
1077    Output Parameter:
1078 .  A - the matrix
1079 
1080    Options Database Keys:
1081 +  -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines
1082 -  -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, performing this step the first time the matrix is applied
1083 
1084    Notes:
1085    If nnz is given then nz is ignored
1086 
1087    Level: intermediate
1088 
1089 .keywords: matrix, MKL, sparse, parallel
1090 
1091 .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues()
1092 @*/
1093 PetscErrorCode  MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
1094 {
1095   PetscErrorCode ierr;
1096 
1097   PetscFunctionBegin;
1098   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1099   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
1100   ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr);
1101   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr);
1102   PetscFunctionReturn(0);
1103 }
1104 
1105 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A)
1106 {
1107   PetscErrorCode ierr;
1108 
1109   PetscFunctionBegin;
1110   ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
1111   ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
1112   PetscFunctionReturn(0);
1113 }
1114