xref: /petsc/src/mat/impls/aij/seq/aijmkl/aijmkl.c (revision 5295adeefcdc54783dd7ea14e34a9f3d61af6b9d)
1 /*
2   Defines basic operations for the MATSEQAIJMKL matrix class.
3   This class is derived from the MATSEQAIJ class and retains the
4   compressed row storage (aka Yale sparse matrix format) but uses
5   sparse BLAS operations from the Intel Math Kernel Library (MKL)
6   wherever possible.
7 */
8 
9 #include <../src/mat/impls/aij/seq/aij.h>
10 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h>
11 
12 /* MKL include files. */
13 #include <mkl_spblas.h>  /* Sparse BLAS */
14 
15 typedef struct {
16   PetscBool no_SpMV2;  /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */
17   PetscBool eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */
18   PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
19   PetscObjectState state;
20 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
21   sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */
22   struct matrix_descr descr;
23 #endif
24 } Mat_SeqAIJMKL;
25 
26 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType);
27 
28 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat)
29 {
30   /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */
31   /* so we will ignore 'MatType type'. */
32   PetscErrorCode ierr;
33   Mat            B       = *newmat;
34 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
35   Mat_SeqAIJMKL  *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
36 #endif
37 
38   PetscFunctionBegin;
39   if (reuse == MAT_INITIAL_MATRIX) {
40     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
41   }
42 
43   /* Reset the original function pointers. */
44   B->ops->duplicate        = MatDuplicate_SeqAIJ;
45   B->ops->assemblyend      = MatAssemblyEnd_SeqAIJ;
46   B->ops->destroy          = MatDestroy_SeqAIJ;
47   B->ops->mult             = MatMult_SeqAIJ;
48   B->ops->multtranspose    = MatMultTranspose_SeqAIJ;
49   B->ops->multadd          = MatMultAdd_SeqAIJ;
50   B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ;
51   B->ops->matmult          = MatMatMult_SeqAIJ_SeqAIJ;
52   B->ops->matmultnumeric   = MatMatMultNumeric_SeqAIJ_SeqAIJ;
53   B->ops->transposematmult = MatTransposeMatMult_SeqAIJ_SeqAIJ;
54   B->ops->scale            = MatScale_SeqAIJ;
55   B->ops->diagonalscale    = MatDiagonalScale_SeqAIJ;
56   B->ops->diagonalset      = MatDiagonalSet_SeqAIJ;
57   B->ops->axpy             = MatAXPY_SeqAIJ;
58 
59   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);CHKERRQ(ierr);
60   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr);
61   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr);
62   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr);
63 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
64   if(!aijmkl->no_SpMV2) {
65     ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr);
66 #ifdef PETSC_HAVE_MKL_SPARSE_SP2M
67     ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr);
68 #endif
69     ierr = PetscObjectComposeFunction((PetscObject)B,"MatTransposeMatMult_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr);
70   }
71 
72   /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this
73    * simply involves destroying the MKL sparse matrix handle and then freeing
74    * the spptr pointer. */
75   if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr;
76 
77   if (aijmkl->sparse_optimized) {
78     sparse_status_t stat;
79     stat = mkl_sparse_destroy(aijmkl->csrA);
80     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set hints/complete mkl_sparse_optimize");
81   }
82 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
83   ierr = PetscFree(B->spptr);CHKERRQ(ierr);
84 
85   /* Change the type of B to MATSEQAIJ. */
86   ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr);
87 
88   *newmat = B;
89   PetscFunctionReturn(0);
90 }
91 
92 PetscErrorCode MatDestroy_SeqAIJMKL(Mat A)
93 {
94   PetscErrorCode ierr;
95   Mat_SeqAIJMKL  *aijmkl = (Mat_SeqAIJMKL*) A->spptr;
96 
97   PetscFunctionBegin;
98 
99   /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an
100    * spptr pointer. */
101   if (aijmkl) {
102     /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */
103 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
104     if (aijmkl->sparse_optimized) {
105       sparse_status_t stat = SPARSE_STATUS_SUCCESS;
106       stat = mkl_sparse_destroy(aijmkl->csrA);
107       if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy");
108     }
109 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
110     ierr = PetscFree(A->spptr);CHKERRQ(ierr);
111   }
112 
113   /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
114    * to destroy everything that remains. */
115   ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr);
116   /* Note that I don't call MatSetType().  I believe this is because that
117    * is only to be called when *building* a matrix.  I could be wrong, but
118    * that is how things work for the SuperLU matrix class. */
119   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
120   PetscFunctionReturn(0);
121 }
122 
123 /* MatSeqAIJKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it,
124  * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize().
125  * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix
126  * handle, creates a new one, and then calls mkl_sparse_optimize().
127  * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been
128  * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of
129  * an unoptimized matrix handle here. */
130 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A)
131 {
132 #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
133   /* If the MKL library does not have mkl_sparse_optimize(), then this routine
134    * does nothing. We make it callable anyway in this case because it cuts
135    * down on littering the code with #ifdefs. */
136   PetscFunctionBegin;
137   PetscFunctionReturn(0);
138 #else
139   Mat_SeqAIJ       *a = (Mat_SeqAIJ*)A->data;
140   Mat_SeqAIJMKL    *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
141   PetscInt         m,n;
142   MatScalar        *aa;
143   PetscInt         *aj,*ai;
144   sparse_status_t  stat;
145   PetscErrorCode   ierr;
146   PetscObjectState state;
147 
148   PetscFunctionBegin;
149   if (aijmkl->no_SpMV2) PetscFunctionReturn(0);
150 
151   if (aijmkl->sparse_optimized) {
152     /* Matrix has been previously assembled and optimized. Must destroy old
153      * matrix handle before running the optimization step again. */
154     stat = mkl_sparse_destroy(aijmkl->csrA);
155     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy");
156   }
157   aijmkl->sparse_optimized = PETSC_FALSE;
158 
159   /* Now perform the SpMV2 setup and matrix optimization. */
160   aijmkl->descr.type        = SPARSE_MATRIX_TYPE_GENERAL;
161   aijmkl->descr.mode        = SPARSE_FILL_MODE_LOWER;
162   aijmkl->descr.diag        = SPARSE_DIAG_NON_UNIT;
163   m = A->rmap->n;
164   n = A->cmap->n;
165   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
166   aa   = a->a;  /* Nonzero elements stored row-by-row. */
167   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
168   if ((a->nz!=0) & !(A->structure_only)) {
169     /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
170      * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
171     stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa);
172     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle");
173     stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
174     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set mv_hint");
175     stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
176     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set memory_hint");
177     stat = mkl_sparse_optimize(aijmkl->csrA);
178     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_optimize");
179     aijmkl->sparse_optimized = PETSC_TRUE;
180   }
181   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
182 
183   PetscFunctionReturn(0);
184 #endif
185 }
186 
187 /* MatSeqAIJMKL_create_from_mkl_handle() creates a sequential AIJMKL matrix from an MKL sparse matrix handle.
188  * We need this to implement MatMatMult() using the MKL inspector-executor routines, which return an (unoptimized)
189  * matrix handle.
190  * Note: This routine simply destroys and replaces the original matrix if MAT_REUSE_MATRIX has been specified, as
191  * there is no good alternative. */
192 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
193 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,MatReuse reuse,Mat *mat)
194 {
195   PetscErrorCode      ierr;
196   sparse_status_t     stat;
197   sparse_index_base_t indexing;
198   PetscInt            nrows, ncols;
199   PetscInt            *aj,*ai,*dummy;
200   MatScalar           *aa;
201   Mat                 A;
202   Mat_SeqAIJMKL       *aijmkl;
203 
204   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
205   stat = mkl_sparse_x_export_csr(csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);
206   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");
207 
208   if (reuse == MAT_REUSE_MATRIX) {
209     ierr = MatDestroy(mat);CHKERRQ(ierr);
210   }
211   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
212   ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
213   ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,nrows,ncols);CHKERRQ(ierr);
214   /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported
215    * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and
216    * they will be destroyed when the MKL handle is destroyed.
217    * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */
218   ierr = MatSeqAIJSetPreallocationCSR(A,ai,aj,aa);CHKERRQ(ierr);
219 
220   /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle.
221    * Now turn it into a MATSEQAIJMKL. */
222   ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
223 
224   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
225   aijmkl->csrA = csrA;
226 
227   /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but
228    * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one,
229    * and just need to be able to run the MKL optimization step. */
230   aijmkl->descr.type        = SPARSE_MATRIX_TYPE_GENERAL;
231   aijmkl->descr.mode        = SPARSE_FILL_MODE_LOWER;
232   aijmkl->descr.diag        = SPARSE_DIAG_NON_UNIT;
233   stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
234   stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
235   stat = mkl_sparse_optimize(aijmkl->csrA);
236   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set hints/complete mkl_sparse_optimize");
237   aijmkl->sparse_optimized = PETSC_TRUE;
238 
239   *mat = A;
240   PetscFunctionReturn(0);
241 }
242 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
243 
244 /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle.
245  * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in
246  * MatMatMultNumeric(). */
247 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
248 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A)
249 {
250   PetscInt            i;
251   PetscInt            nrows,ncols;
252   PetscInt            nz;
253   PetscInt            *ai,*aj,*dummy;
254   PetscScalar         *aa;
255   PetscErrorCode      ierr;
256   Mat_SeqAIJMKL       *aijmkl;
257   sparse_status_t     stat;
258   sparse_index_base_t indexing;
259 
260   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
261 
262   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
263   stat = mkl_sparse_x_export_csr(aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);
264   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");
265 
266   /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc
267    * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */
268   for (i=0; i<nrows; i++) {
269     nz = ai[i+1] - ai[i];
270     ierr = MatSetValues_SeqAIJ(A, 1, &i, nz, aj+ai[i], aa+ai[i], INSERT_VALUES);CHKERRQ(ierr);
271   }
272 
273   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
274   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
275 
276   PetscFunctionReturn(0);
277 }
278 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
279 
280 PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
281 {
282   PetscErrorCode ierr;
283   Mat_SeqAIJMKL  *aijmkl;
284   Mat_SeqAIJMKL  *aijmkl_dest;
285 
286   PetscFunctionBegin;
287   ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr);
288   aijmkl      = (Mat_SeqAIJMKL*) A->spptr;
289   aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr;
290   ierr = PetscMemcpy(aijmkl_dest,aijmkl,sizeof(Mat_SeqAIJMKL));CHKERRQ(ierr);
291   aijmkl_dest->sparse_optimized = PETSC_FALSE;
292   if (aijmkl->eager_inspection) {
293     ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
294   }
295   PetscFunctionReturn(0);
296 }
297 
298 PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode)
299 {
300   PetscErrorCode  ierr;
301   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
302   Mat_SeqAIJMKL   *aijmkl;
303 
304   PetscFunctionBegin;
305   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
306 
307   /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some
308    * extra information and some different methods, call the AssemblyEnd
309    * routine for a MATSEQAIJ.
310    * I'm not sure if this is the best way to do this, but it avoids
311    * a lot of code duplication. */
312   a->inode.use = PETSC_FALSE;  /* Must disable: otherwise the MKL routines won't get used. */
313   ierr = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr);
314 
315   /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks).
316    * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */
317   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
318   if (aijmkl->eager_inspection) {
319     ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
320 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
321   } else if (aijmkl->sparse_optimized) {
322     /* If doing lazy inspection and there is an optimized MKL handle, we need to destroy it, so that it will be
323      * rebuilt later when needed. Otherwise, some SeqAIJ implementations that we depend on for some operations
324      * (such as MatMatMultNumeric()) can modify the result matrix without the matrix handle being rebuilt.
325      * (The SeqAIJ version MatMatMultNumeric() knows nothing about matrix handles, but it *does* call MatAssemblyEnd().) */
326     sparse_status_t stat = mkl_sparse_destroy(aijmkl->csrA);
327     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy");
328     aijmkl->sparse_optimized = PETSC_FALSE;
329 #endif
330   }
331 
332   PetscFunctionReturn(0);
333 }
334 
335 PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy)
336 {
337   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
338   const PetscScalar *x;
339   PetscScalar       *y;
340   const MatScalar   *aa;
341   PetscErrorCode    ierr;
342   PetscInt          m=A->rmap->n;
343   PetscInt          n=A->cmap->n;
344   PetscScalar       alpha = 1.0;
345   PetscScalar       beta = 0.0;
346   const PetscInt    *aj,*ai;
347   char              matdescra[6];
348 
349 
350   /* Variables not in MatMult_SeqAIJ. */
351   char transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
352 
353   PetscFunctionBegin;
354   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
355   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
356   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
357   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
358   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
359   aa   = a->a;  /* Nonzero elements stored row-by-row. */
360   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
361 
362   /* Call MKL sparse BLAS routine to do the MatMult. */
363   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
364 
365   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
366   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
367   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
368   PetscFunctionReturn(0);
369 }
370 
371 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
372 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
373 {
374   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
375   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
376   const PetscScalar *x;
377   PetscScalar       *y;
378   PetscErrorCode    ierr;
379   sparse_status_t   stat = SPARSE_STATUS_SUCCESS;
380   PetscObjectState  state;
381 
382   PetscFunctionBegin;
383 
384   /* If there are no nonzero entries, zero yy and return immediately. */
385   if(!a->nz) {
386     PetscInt i;
387     PetscInt m=A->rmap->n;
388     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
389     for (i=0; i<m; i++) {
390       y[i] = 0.0;
391     }
392     ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
393     PetscFunctionReturn(0);
394   }
395 
396   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
397   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
398 
399   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
400    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
401    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
402   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
403   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
404     MatSeqAIJMKL_create_mkl_handle(A);
405   }
406 
407   /* Call MKL SpMV2 executor routine to do the MatMult. */
408   stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
409   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
410 
411   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
412   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
413   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
414   PetscFunctionReturn(0);
415 }
416 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
417 
418 PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy)
419 {
420   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
421   const PetscScalar *x;
422   PetscScalar       *y;
423   const MatScalar   *aa;
424   PetscErrorCode    ierr;
425   PetscInt          m=A->rmap->n;
426   PetscInt          n=A->cmap->n;
427   PetscScalar       alpha = 1.0;
428   PetscScalar       beta = 0.0;
429   const PetscInt    *aj,*ai;
430   char              matdescra[6];
431 
432   /* Variables not in MatMultTranspose_SeqAIJ. */
433   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
434 
435   PetscFunctionBegin;
436   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
437   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
438   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
439   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
440   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
441   aa   = a->a;  /* Nonzero elements stored row-by-row. */
442   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
443 
444   /* Call MKL sparse BLAS routine to do the MatMult. */
445   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
446 
447   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
448   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
449   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
450   PetscFunctionReturn(0);
451 }
452 
453 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
454 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
455 {
456   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
457   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
458   const PetscScalar *x;
459   PetscScalar       *y;
460   PetscErrorCode    ierr;
461   sparse_status_t   stat;
462   PetscObjectState  state;
463 
464   PetscFunctionBegin;
465 
466   /* If there are no nonzero entries, zero yy and return immediately. */
467   if(!a->nz) {
468     PetscInt i;
469     PetscInt n=A->cmap->n;
470     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
471     for (i=0; i<n; i++) {
472       y[i] = 0.0;
473     }
474     ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
475     PetscFunctionReturn(0);
476   }
477 
478   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
479   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
480 
481   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
482    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
483    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
484   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
485   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
486     MatSeqAIJMKL_create_mkl_handle(A);
487   }
488 
489   /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
490   stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
491   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
492 
493   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
494   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
495   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
496   PetscFunctionReturn(0);
497 }
498 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
499 
500 PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
501 {
502   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
503   const PetscScalar *x;
504   PetscScalar       *y,*z;
505   const MatScalar   *aa;
506   PetscErrorCode    ierr;
507   PetscInt          m=A->rmap->n;
508   PetscInt          n=A->cmap->n;
509   const PetscInt    *aj,*ai;
510   PetscInt          i;
511 
512   /* Variables not in MatMultAdd_SeqAIJ. */
513   char              transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
514   PetscScalar       alpha = 1.0;
515   PetscScalar       beta;
516   char              matdescra[6];
517 
518   PetscFunctionBegin;
519   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
520   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
521 
522   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
523   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
524   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
525   aa   = a->a;  /* Nonzero elements stored row-by-row. */
526   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
527 
528   /* Call MKL sparse BLAS routine to do the MatMult. */
529   if (zz == yy) {
530     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
531     beta = 1.0;
532     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
533   } else {
534     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
535      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
536     beta = 0.0;
537     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
538     for (i=0; i<m; i++) {
539       z[i] += y[i];
540     }
541   }
542 
543   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
544   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
545   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
546   PetscFunctionReturn(0);
547 }
548 
549 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
550 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
551 {
552   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
553   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
554   const PetscScalar *x;
555   PetscScalar       *y,*z;
556   PetscErrorCode    ierr;
557   PetscInt          m=A->rmap->n;
558   PetscInt          i;
559 
560   /* Variables not in MatMultAdd_SeqAIJ. */
561   sparse_status_t   stat = SPARSE_STATUS_SUCCESS;
562   PetscObjectState  state;
563 
564   PetscFunctionBegin;
565 
566   /* If there are no nonzero entries, set zz = yy and return immediately. */
567   if(!a->nz) {
568     PetscInt i;
569     ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
570     for (i=0; i<m; i++) {
571       z[i] = y[i];
572     }
573     ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
574     PetscFunctionReturn(0);
575   }
576 
577   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
578   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
579 
580   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
581    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
582    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
583   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
584   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
585     MatSeqAIJMKL_create_mkl_handle(A);
586   }
587 
588   /* Call MKL sparse BLAS routine to do the MatMult. */
589   if (zz == yy) {
590     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
591      * with alpha and beta both set to 1.0. */
592     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
593     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
594   } else {
595     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
596      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
597     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
598     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
599     for (i=0; i<m; i++) {
600       z[i] += y[i];
601     }
602   }
603 
604   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
605   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
606   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
607   PetscFunctionReturn(0);
608 }
609 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
610 
611 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
612 {
613   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
614   const PetscScalar *x;
615   PetscScalar       *y,*z;
616   const MatScalar   *aa;
617   PetscErrorCode    ierr;
618   PetscInt          m=A->rmap->n;
619   PetscInt          n=A->cmap->n;
620   const PetscInt    *aj,*ai;
621   PetscInt          i;
622 
623   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
624   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
625   PetscScalar       alpha = 1.0;
626   PetscScalar       beta;
627   char              matdescra[6];
628 
629   PetscFunctionBegin;
630   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
631   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
632 
633   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
634   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
635   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
636   aa   = a->a;  /* Nonzero elements stored row-by-row. */
637   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
638 
639   /* Call MKL sparse BLAS routine to do the MatMult. */
640   if (zz == yy) {
641     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
642     beta = 1.0;
643     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
644   } else {
645     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
646      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
647     beta = 0.0;
648     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
649     for (i=0; i<n; i++) {
650       z[i] += y[i];
651     }
652   }
653 
654   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
655   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
656   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
657   PetscFunctionReturn(0);
658 }
659 
660 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
661 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
662 {
663   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
664   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
665   const PetscScalar *x;
666   PetscScalar       *y,*z;
667   PetscErrorCode    ierr;
668   PetscInt          n=A->cmap->n;
669   PetscInt          i;
670   PetscObjectState  state;
671 
672   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
673   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
674 
675   PetscFunctionBegin;
676 
677   /* If there are no nonzero entries, set zz = yy and return immediately. */
678   if(!a->nz) {
679     PetscInt i;
680     ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
681     for (i=0; i<n; i++) {
682       z[i] = y[i];
683     }
684     ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
685     PetscFunctionReturn(0);
686   }
687 
688   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
689   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
690 
691   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
692    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
693    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
694   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
695   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
696     MatSeqAIJMKL_create_mkl_handle(A);
697   }
698 
699   /* Call MKL sparse BLAS routine to do the MatMult. */
700   if (zz == yy) {
701     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
702      * with alpha and beta both set to 1.0. */
703     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
704     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
705   } else {
706     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
707      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
708     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
709     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
710     for (i=0; i<n; i++) {
711       z[i] += y[i];
712     }
713   }
714 
715   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
716   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
717   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
718   PetscFunctionReturn(0);
719 }
720 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
721 
722 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
723 /* Note that this code currently doesn't actually get used when MatMatMult() is called with MAT_REUSE_MATRIX, because
724  * the MatMatMult() interface code calls MatMatMultNumeric() in this case.
725  * For releases of MKL prior to version 18, update 2:
726  * MKL has no notion of separately callable symbolic vs. numeric phases of sparse matrix-matrix multiply, so in the
727  * MAT_REUSE_MATRIX case, the SeqAIJ routines end up being used. Even though this means that the (hopefully more
728  * optimized) MKL routines do not get used, this probably is best because the MKL routines would waste time re-computing
729  * the symbolic portion, whereas the native PETSc SeqAIJ routines will avoid this. */
730 PetscErrorCode MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat*C)
731 {
732   Mat_SeqAIJMKL    *a, *b;
733   sparse_matrix_t  csrA, csrB, csrC;
734   PetscErrorCode   ierr;
735   sparse_status_t  stat = SPARSE_STATUS_SUCCESS;
736   PetscObjectState state;
737 
738   PetscFunctionBegin;
739   a = (Mat_SeqAIJMKL*)A->spptr;
740   b = (Mat_SeqAIJMKL*)B->spptr;
741   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
742   if (!a->sparse_optimized || a->state != state) {
743     MatSeqAIJMKL_create_mkl_handle(A);
744   }
745   ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr);
746   if (!b->sparse_optimized || b->state != state) {
747     MatSeqAIJMKL_create_mkl_handle(B);
748   }
749   csrA = a->csrA;
750   csrB = b->csrA;
751 
752   stat = mkl_sparse_spmm(SPARSE_OPERATION_NON_TRANSPOSE,csrA,csrB,&csrC);
753   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete sparse matrix-matrix multiply");
754 
755   ierr = MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);CHKERRQ(ierr);
756 
757   PetscFunctionReturn(0);
758 }
759 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
760 
761 #ifdef PETSC_HAVE_MKL_SPARSE_SP2M
762 PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,Mat C)
763 {
764   Mat_SeqAIJMKL       *a, *b, *c;
765   sparse_matrix_t     csrA, csrB, csrC;
766   PetscErrorCode      ierr;
767   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
768   struct matrix_descr descr_type_gen;
769   PetscObjectState    state;
770 
771   PetscFunctionBegin;
772   a = (Mat_SeqAIJMKL*)A->spptr;
773   b = (Mat_SeqAIJMKL*)B->spptr;
774   c = (Mat_SeqAIJMKL*)C->spptr;
775   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
776   if (!a->sparse_optimized || a->state != state) {
777     MatSeqAIJMKL_create_mkl_handle(A);
778   }
779   ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr);
780   if (!b->sparse_optimized || b->state != state) {
781     MatSeqAIJMKL_create_mkl_handle(B);
782   }
783   csrA = a->csrA;
784   csrB = b->csrA;
785   csrC = c->csrA;
786   descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
787 
788   stat = mkl_sparse_sp2m(SPARSE_OPERATION_NON_TRANSPOSE,descr_type_gen,csrA,
789                          SPARSE_OPERATION_NON_TRANSPOSE,descr_type_gen,csrB,
790                          SPARSE_STAGE_FINALIZE_MULT,&csrC);
791 
792   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete numerical stage of sparse matrix-matrix multiply");
793 
794   /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */
795   ierr = MatSeqAIJMKL_update_from_mkl_handle(A);CHKERRQ(ierr);
796 
797   PetscFunctionReturn(0);
798 }
799 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M */
800 
801 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
802 PetscErrorCode MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat*C)
803 {
804   Mat_SeqAIJMKL    *a, *b;
805   sparse_matrix_t  csrA, csrB, csrC;
806   PetscErrorCode   ierr;
807   sparse_status_t  stat = SPARSE_STATUS_SUCCESS;
808   PetscObjectState state;
809 
810   PetscFunctionBegin;
811   a = (Mat_SeqAIJMKL*)A->spptr;
812   b = (Mat_SeqAIJMKL*)B->spptr;
813   ierr = PetscObjectStateGet((PetscObject)A,&state);CHKERRQ(ierr);
814   if (!a->sparse_optimized || a->state != state) {
815     MatSeqAIJMKL_create_mkl_handle(A);
816   }
817   ierr = PetscObjectStateGet((PetscObject)B,&state);CHKERRQ(ierr);
818   if (!b->sparse_optimized || b->state != state) {
819     MatSeqAIJMKL_create_mkl_handle(B);
820   }
821   csrA = a->csrA;
822   csrB = b->csrA;
823 
824   stat = mkl_sparse_spmm(SPARSE_OPERATION_TRANSPOSE,csrA,csrB,&csrC);
825   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete sparse matrix-matrix multiply");
826 
827   ierr = MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);CHKERRQ(ierr);
828 
829   PetscFunctionReturn(0);
830 }
831 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
832 
833 PetscErrorCode MatScale_SeqAIJMKL(Mat inA,PetscScalar alpha)
834 {
835   PetscErrorCode ierr;
836 
837   PetscFunctionBegin;
838   ierr = MatScale_SeqAIJ(inA,alpha);CHKERRQ(ierr);
839   ierr = MatSeqAIJMKL_create_mkl_handle(inA);CHKERRQ(ierr);
840   PetscFunctionReturn(0);
841 }
842 
843 PetscErrorCode MatDiagonalScale_SeqAIJMKL(Mat A,Vec ll,Vec rr)
844 {
845   PetscErrorCode ierr;
846 
847   PetscFunctionBegin;
848   ierr = MatDiagonalScale_SeqAIJ(A,ll,rr);CHKERRQ(ierr);
849   ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
850   PetscFunctionReturn(0);
851 }
852 
853 PetscErrorCode MatDiagonalSet_SeqAIJMKL(Mat Y,Vec D,InsertMode is)
854 {
855   PetscErrorCode ierr;
856 
857   PetscFunctionBegin;
858   ierr = MatDiagonalSet_SeqAIJ(Y,D,is);CHKERRQ(ierr);
859   ierr = MatSeqAIJMKL_create_mkl_handle(Y);CHKERRQ(ierr);
860   PetscFunctionReturn(0);
861 }
862 
863 PetscErrorCode MatAXPY_SeqAIJMKL(Mat Y,PetscScalar a,Mat X,MatStructure str)
864 {
865   PetscErrorCode ierr;
866 
867   PetscFunctionBegin;
868   ierr = MatAXPY_SeqAIJ(Y,a,X,str);CHKERRQ(ierr);
869   if (str == SAME_NONZERO_PATTERN) {
870     /* MatAssemblyEnd() is not called if SAME_NONZERO_PATTERN, so we need to force update of the MKL matrix handle. */
871     ierr = MatSeqAIJMKL_create_mkl_handle(Y);CHKERRQ(ierr);
872   }
873   PetscFunctionReturn(0);
874 }
875 
876 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a
877  * SeqAIJMKL matrix.  This routine is called by the MatCreate_SeqMKLAIJ()
878  * routine, but can also be used to convert an assembled SeqAIJ matrix
879  * into a SeqAIJMKL one. */
880 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
881 {
882   PetscErrorCode ierr;
883   Mat            B = *newmat;
884   Mat_SeqAIJMKL  *aijmkl;
885   PetscBool      set;
886   PetscBool      sametype;
887 
888   PetscFunctionBegin;
889   if (reuse == MAT_INITIAL_MATRIX) {
890     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
891   }
892 
893   ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr);
894   if (sametype) PetscFunctionReturn(0);
895 
896   ierr     = PetscNewLog(B,&aijmkl);CHKERRQ(ierr);
897   B->spptr = (void*) aijmkl;
898 
899   /* Set function pointers for methods that we inherit from AIJ but override.
900    * We also parse some command line options below, since those determine some of the methods we point to. */
901   B->ops->duplicate        = MatDuplicate_SeqAIJMKL;
902   B->ops->assemblyend      = MatAssemblyEnd_SeqAIJMKL;
903   B->ops->destroy          = MatDestroy_SeqAIJMKL;
904 
905   aijmkl->sparse_optimized = PETSC_FALSE;
906 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
907   aijmkl->no_SpMV2 = PETSC_FALSE;  /* Default to using the SpMV2 routines if our MKL supports them. */
908 #else
909   aijmkl->no_SpMV2 = PETSC_TRUE;
910 #endif
911   aijmkl->eager_inspection = PETSC_FALSE;
912 
913   /* Parse command line options. */
914   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");CHKERRQ(ierr);
915   ierr = PetscOptionsBool("-mat_aijmkl_no_spmv2","NoSPMV2","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);CHKERRQ(ierr);
916   ierr = PetscOptionsBool("-mat_aijmkl_eager_inspection","Eager Inspection","None",(PetscBool)aijmkl->eager_inspection,(PetscBool*)&aijmkl->eager_inspection,&set);CHKERRQ(ierr);
917   ierr = PetscOptionsEnd();CHKERRQ(ierr);
918 #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
919   if(!aijmkl->no_SpMV2) {
920     ierr = PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize();  defaulting to non-SpMV2 routines.\n");
921     aijmkl->no_SpMV2 = PETSC_TRUE;
922   }
923 #endif
924 
925   if(!aijmkl->no_SpMV2) {
926 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
927     B->ops->mult             = MatMult_SeqAIJMKL_SpMV2;
928     B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL_SpMV2;
929     B->ops->multadd          = MatMultAdd_SeqAIJMKL_SpMV2;
930     B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2;
931     B->ops->matmult          = MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2;
932 #ifdef PETSC_HAVE_MKL_SPARSE_SP2M
933     B->ops->matmultnumeric   = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2;
934 #endif
935     B->ops->transposematmult = MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2;
936 #endif
937   } else {
938     B->ops->mult             = MatMult_SeqAIJMKL;
939     B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL;
940     B->ops->multadd          = MatMultAdd_SeqAIJMKL;
941     B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL;
942   }
943 
944   B->ops->scale              = MatScale_SeqAIJMKL;
945   B->ops->diagonalscale      = MatDiagonalScale_SeqAIJMKL;
946   B->ops->diagonalset        = MatDiagonalSet_SeqAIJMKL;
947   B->ops->axpy               = MatAXPY_SeqAIJMKL;
948 
949   ierr = PetscObjectComposeFunction((PetscObject)B,"MatScale_SeqAIJMKL_C",MatScale_SeqAIJMKL);CHKERRQ(ierr);
950   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr);
951   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",MatMatMult_SeqDense_SeqAIJ);CHKERRQ(ierr);
952   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",MatMatMultSymbolic_SeqDense_SeqAIJ);CHKERRQ(ierr);
953   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",MatMatMultNumeric_SeqDense_SeqAIJ);CHKERRQ(ierr);
954   if(!aijmkl->no_SpMV2) {
955 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
956     ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqaijmkl_seqaijmkl_C",MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2);CHKERRQ(ierr);
957 #ifdef PETSC_HAVE_MKL_SPARSE_SP2M
958     ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqaijmkl_seqaijmkl_C",MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2);CHKERRQ(ierr);
959 #endif
960     ierr = PetscObjectComposeFunction((PetscObject)B,"MatTransposeMatMult_seqaijmkl_seqaijmkl_C",MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2);CHKERRQ(ierr);
961 #endif
962   }
963 
964   ierr    = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr);
965   *newmat = B;
966   PetscFunctionReturn(0);
967 }
968 
969 /*@C
970    MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL.
971    This type inherits from AIJ and is largely identical, but uses sparse BLAS
972    routines from Intel MKL whenever possible.
973    MatMult, MatMultAdd, MatMultTranspose, MatMultTransposeAdd, MatMatMult, and MatTransposeMatMult
974    operations are currently supported.
975    If the installed version of MKL supports the "SpMV2" sparse
976    inspector-executor routines, then those are used by default.
977 
978    Collective on MPI_Comm
979 
980    Input Parameters:
981 +  comm - MPI communicator, set to PETSC_COMM_SELF
982 .  m - number of rows
983 .  n - number of columns
984 .  nz - number of nonzeros per row (same for all rows)
985 -  nnz - array containing the number of nonzeros in the various rows
986          (possibly different for each row) or NULL
987 
988    Output Parameter:
989 .  A - the matrix
990 
991    Options Database Keys:
992 +  -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines
993 -  -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, performing this step the first time the matrix is applied
994 
995    Notes:
996    If nnz is given then nz is ignored
997 
998    Level: intermediate
999 
1000 .keywords: matrix, MKL, sparse, parallel
1001 
1002 .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues()
1003 @*/
1004 PetscErrorCode  MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
1005 {
1006   PetscErrorCode ierr;
1007 
1008   PetscFunctionBegin;
1009   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1010   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
1011   ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr);
1012   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr);
1013   PetscFunctionReturn(0);
1014 }
1015 
1016 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A)
1017 {
1018   PetscErrorCode ierr;
1019 
1020   PetscFunctionBegin;
1021   ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
1022   ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
1023   PetscFunctionReturn(0);
1024 }
1025