xref: /petsc/src/mat/impls/aij/seq/aijmkl/aijmkl.c (revision 761b7befd64a3f32db0fedb577296bc4a5767613)
1 /*
2   Defines basic operations for the MATSEQAIJMKL matrix class.
3   This class is derived from the MATSEQAIJ class and retains the
4   compressed row storage (aka Yale sparse matrix format) but uses
5   sparse BLAS operations from the Intel Math Kernel Library (MKL)
6   wherever possible.
7 */
8 
9 #include <../src/mat/impls/aij/seq/aij.h>
10 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h>
11 
12 /* MKL include files. */
13 #include <mkl_spblas.h>  /* Sparse BLAS */
14 
15 typedef struct {
16   PetscBool no_SpMV2;  /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */
17   PetscBool eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */
18   PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
19 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
20   sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */
21   struct matrix_descr descr;
22 #endif
23 } Mat_SeqAIJMKL;
24 
25 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType);
26 
27 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat)
28 {
29   /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */
30   /* so we will ignore 'MatType type'. */
31   PetscErrorCode ierr;
32   Mat            B       = *newmat;
33 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
34   Mat_SeqAIJMKL  *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
35 #endif
36 
37   PetscFunctionBegin;
38   if (reuse == MAT_INITIAL_MATRIX) {
39     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
40   }
41 
42   /* Reset the original function pointers. */
43   B->ops->duplicate        = MatDuplicate_SeqAIJ;
44   B->ops->assemblyend      = MatAssemblyEnd_SeqAIJ;
45   B->ops->destroy          = MatDestroy_SeqAIJ;
46   B->ops->mult             = MatMult_SeqAIJ;
47   B->ops->multtranspose    = MatMultTranspose_SeqAIJ;
48   B->ops->multadd          = MatMultAdd_SeqAIJ;
49   B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ;
50   B->ops->matmult          = MatMatMult_SeqAIJ_SeqAIJ;
51   B->ops->transposematmult = MatTransposeMatMult_SeqAIJ_SeqAIJ;
52   B->ops->scale            = MatScale_SeqAIJ;
53   B->ops->diagonalscale    = MatDiagonalScale_SeqAIJ;
54   B->ops->diagonalset      = MatDiagonalSet_SeqAIJ;
55   B->ops->axpy             = MatAXPY_SeqAIJ;
56 
57   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);CHKERRQ(ierr);
58   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr);
59   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr);
60   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr);
61 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
62   if(!aijmkl->no_SpMV2) {
63     ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr);
64     ierr = PetscObjectComposeFunction((PetscObject)B,"MatTransposeMatMult_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr);
65   }
66 
67   /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this
68    * simply involves destroying the MKL sparse matrix handle and then freeing
69    * the spptr pointer. */
70   if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr;
71 
72   if (aijmkl->sparse_optimized) {
73     sparse_status_t stat;
74     stat = mkl_sparse_destroy(aijmkl->csrA);
75     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set hints/complete mkl_sparse_optimize");
76   }
77 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
78   ierr = PetscFree(B->spptr);CHKERRQ(ierr);
79 
80   /* Change the type of B to MATSEQAIJ. */
81   ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr);
82 
83   *newmat = B;
84   PetscFunctionReturn(0);
85 }
86 
87 PetscErrorCode MatDestroy_SeqAIJMKL(Mat A)
88 {
89   PetscErrorCode ierr;
90   Mat_SeqAIJMKL  *aijmkl = (Mat_SeqAIJMKL*) A->spptr;
91 
92   PetscFunctionBegin;
93 
94   /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an
95    * spptr pointer. */
96   if (aijmkl) {
97     /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */
98 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
99     if (aijmkl->sparse_optimized) {
100       sparse_status_t stat = SPARSE_STATUS_SUCCESS;
101       stat = mkl_sparse_destroy(aijmkl->csrA);
102       if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy");
103     }
104 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
105     ierr = PetscFree(A->spptr);CHKERRQ(ierr);
106   }
107 
108   /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
109    * to destroy everything that remains. */
110   ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr);
111   /* Note that I don't call MatSetType().  I believe this is because that
112    * is only to be called when *building* a matrix.  I could be wrong, but
113    * that is how things work for the SuperLU matrix class. */
114   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
115   PetscFunctionReturn(0);
116 }
117 
118 /* MatSeqAIJKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it,
119  * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize().
120  * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix
121  * handle, creates a new one, and then calls mkl_sparse_optimize().
122  * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been
123  * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of
124  * an unoptimized matrix handle here. */
125 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A)
126 {
127 #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
128   /* If the MKL library does not have mkl_sparse_optimize(), then this routine
129    * does nothing. We make it callable anyway in this case because it cuts
130    * down on littering the code with #ifdefs. */
131   PetscFunctionBegin;
132   PetscFunctionReturn(0);
133 #else
134   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
135   Mat_SeqAIJMKL   *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
136   PetscInt        m,n;
137   MatScalar       *aa;
138   PetscInt        *aj,*ai;
139   sparse_status_t stat;
140 
141   PetscFunctionBegin;
142   if (aijmkl->no_SpMV2) PetscFunctionReturn(0);
143 
144   if (aijmkl->sparse_optimized) {
145     /* Matrix has been previously assembled and optimized. Must destroy old
146      * matrix handle before running the optimization step again. */
147     stat = mkl_sparse_destroy(aijmkl->csrA);
148     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy");
149   }
150   aijmkl->sparse_optimized = PETSC_FALSE;
151 
152   /* Now perform the SpMV2 setup and matrix optimization. */
153   aijmkl->descr.type        = SPARSE_MATRIX_TYPE_GENERAL;
154   aijmkl->descr.mode        = SPARSE_FILL_MODE_LOWER;
155   aijmkl->descr.diag        = SPARSE_DIAG_NON_UNIT;
156   m = A->rmap->n;
157   n = A->cmap->n;
158   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
159   aa   = a->a;  /* Nonzero elements stored row-by-row. */
160   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
161   if ((a->nz!=0) & !(A->structure_only)) {
162     /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
163      * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
164     stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa);
165     stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
166     stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
167     stat = mkl_sparse_optimize(aijmkl->csrA);
168     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle/complete mkl_sparse_optimize");
169     aijmkl->sparse_optimized = PETSC_TRUE;
170   }
171 
172   PetscFunctionReturn(0);
173 #endif
174 }
175 
176 /* MatSeqAIJMKL_create_from_mkl_handle() creates a sequential AIJMKL matrix from an MKL sparse matrix handle.
177  * We need this to implement MatMatMult() using the MKL inspector-executor routines, which return an (unoptimized)
178  * matrix handle.
179  * Note: This routine simply destroys and replaces the original matrix if MAT_REUSE_MATRIX has been specified, as
180  * there is no good alternative. */
181 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
182 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,MatReuse reuse,Mat *mat)
183 {
184   PetscErrorCode      ierr;
185   sparse_status_t     stat;
186   sparse_index_base_t indexing;
187   PetscInt            nrows, ncols;
188   PetscInt            *aj,*ai,*dummy;
189   MatScalar           *aa;
190   Mat                 A;
191   Mat_SeqAIJMKL       *aijmkl;
192 
193   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
194   stat = mkl_sparse_x_export_csr(csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);
195   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");
196 
197   if (reuse == MAT_REUSE_MATRIX) {
198     ierr = MatDestroy(mat);CHKERRQ(ierr);
199   }
200   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
201   ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
202   ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,nrows,ncols);CHKERRQ(ierr);
203   /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported
204    * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and
205    * they will be destroyed when the MKL handle is destroyed.
206    * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */
207   ierr = MatSeqAIJSetPreallocationCSR(A,ai,aj,aa);CHKERRQ(ierr);
208 
209   /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle.
210    * Now turn it into a MATSEQAIJMKL. */
211   ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
212 
213   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
214   aijmkl->csrA = csrA;
215 
216   /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but
217    * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one,
218    * and just need to be able to run the MKL optimization step. */
219   aijmkl->descr.type        = SPARSE_MATRIX_TYPE_GENERAL;
220   aijmkl->descr.mode        = SPARSE_FILL_MODE_LOWER;
221   aijmkl->descr.diag        = SPARSE_DIAG_NON_UNIT;
222   stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
223   stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
224   stat = mkl_sparse_optimize(aijmkl->csrA);
225   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set hints/complete mkl_sparse_optimize");
226   aijmkl->sparse_optimized = PETSC_TRUE;
227 
228   *mat = A;
229   PetscFunctionReturn(0);
230 }
231 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
232 
233 PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
234 {
235   PetscErrorCode ierr;
236   Mat_SeqAIJMKL  *aijmkl;
237   Mat_SeqAIJMKL  *aijmkl_dest;
238 
239   PetscFunctionBegin;
240   ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr);
241   aijmkl      = (Mat_SeqAIJMKL*) A->spptr;
242   aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr;
243   ierr = PetscMemcpy(aijmkl_dest,aijmkl,sizeof(Mat_SeqAIJMKL));CHKERRQ(ierr);
244   aijmkl_dest->sparse_optimized = PETSC_FALSE;
245   if (aijmkl->eager_inspection) {
246     ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
247   }
248   PetscFunctionReturn(0);
249 }
250 
251 PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode)
252 {
253   PetscErrorCode  ierr;
254   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
255   Mat_SeqAIJMKL   *aijmkl;
256 
257   PetscFunctionBegin;
258   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
259 
260   /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some
261    * extra information and some different methods, call the AssemblyEnd
262    * routine for a MATSEQAIJ.
263    * I'm not sure if this is the best way to do this, but it avoids
264    * a lot of code duplication. */
265   a->inode.use = PETSC_FALSE;  /* Must disable: otherwise the MKL routines won't get used. */
266   ierr = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr);
267 
268   /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks).
269    * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */
270   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
271   if (aijmkl->eager_inspection) {
272     ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
273 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
274   } else if (aijmkl->sparse_optimized) {
275     /* If doing lazy inspection and there is an optimized MKL handle, we need to destroy it, so that it will be
276      * rebuilt later when needed. Otherwise, some SeqAIJ implementations that we depend on for some operations
277      * (such as MatMatMultNumeric()) can modify the result matrix without the matrix handle being rebuilt.
278      * (The SeqAIJ version MatMatMultNumeric() knows nothing about matrix handles, but it *does* call MatAssemblyEnd().) */
279     sparse_status_t stat = mkl_sparse_destroy(aijmkl->csrA);
280     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy");
281     aijmkl->sparse_optimized = PETSC_FALSE;
282 #endif
283   }
284 
285   PetscFunctionReturn(0);
286 }
287 
288 PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy)
289 {
290   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
291   const PetscScalar *x;
292   PetscScalar       *y;
293   const MatScalar   *aa;
294   PetscErrorCode    ierr;
295   PetscInt          m=A->rmap->n;
296   PetscInt          n=A->cmap->n;
297   PetscScalar       alpha = 1.0;
298   PetscScalar       beta = 0.0;
299   const PetscInt    *aj,*ai;
300   char              matdescra[6];
301 
302 
303   /* Variables not in MatMult_SeqAIJ. */
304   char transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
305 
306   PetscFunctionBegin;
307   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
308   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
309   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
310   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
311   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
312   aa   = a->a;  /* Nonzero elements stored row-by-row. */
313   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
314 
315   /* Call MKL sparse BLAS routine to do the MatMult. */
316   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
317 
318   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
319   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
320   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
321   PetscFunctionReturn(0);
322 }
323 
324 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
325 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
326 {
327   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
328   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
329   const PetscScalar *x;
330   PetscScalar       *y;
331   PetscErrorCode    ierr;
332   sparse_status_t   stat = SPARSE_STATUS_SUCCESS;
333 
334   PetscFunctionBegin;
335 
336   /* If there are no nonzero entries, zero yy and return immediately. */
337   if(!a->nz) {
338     PetscInt i;
339     PetscInt m=A->rmap->n;
340     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
341     for (i=0; i<m; i++) {
342       y[i] = 0.0;
343     }
344     ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
345     PetscFunctionReturn(0);
346   }
347 
348   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
349   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
350 
351   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
352    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
353    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
354   if (!aijmkl->sparse_optimized) {
355     MatSeqAIJMKL_create_mkl_handle(A);
356   }
357 
358   /* Call MKL SpMV2 executor routine to do the MatMult. */
359   stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
360   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
361 
362   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
363   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
364   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
365   PetscFunctionReturn(0);
366 }
367 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
368 
369 PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy)
370 {
371   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
372   const PetscScalar *x;
373   PetscScalar       *y;
374   const MatScalar   *aa;
375   PetscErrorCode    ierr;
376   PetscInt          m=A->rmap->n;
377   PetscInt          n=A->cmap->n;
378   PetscScalar       alpha = 1.0;
379   PetscScalar       beta = 0.0;
380   const PetscInt    *aj,*ai;
381   char              matdescra[6];
382 
383   /* Variables not in MatMultTranspose_SeqAIJ. */
384   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
385 
386   PetscFunctionBegin;
387   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
388   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
389   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
390   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
391   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
392   aa   = a->a;  /* Nonzero elements stored row-by-row. */
393   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
394 
395   /* Call MKL sparse BLAS routine to do the MatMult. */
396   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
397 
398   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
399   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
400   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
401   PetscFunctionReturn(0);
402 }
403 
404 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
405 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
406 {
407   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
408   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
409   const PetscScalar *x;
410   PetscScalar       *y;
411   PetscErrorCode    ierr;
412   sparse_status_t   stat;
413 
414   PetscFunctionBegin;
415 
416   /* If there are no nonzero entries, zero yy and return immediately. */
417   if(!a->nz) {
418     PetscInt i;
419     PetscInt n=A->cmap->n;
420     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
421     for (i=0; i<n; i++) {
422       y[i] = 0.0;
423     }
424     ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
425     PetscFunctionReturn(0);
426   }
427 
428   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
429   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
430 
431   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
432    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
433    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
434   if (!aijmkl->sparse_optimized) {
435     MatSeqAIJMKL_create_mkl_handle(A);
436   }
437 
438   /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
439   stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
440   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
441 
442   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
443   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
444   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
445   PetscFunctionReturn(0);
446 }
447 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
448 
449 PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
450 {
451   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
452   const PetscScalar *x;
453   PetscScalar       *y,*z;
454   const MatScalar   *aa;
455   PetscErrorCode    ierr;
456   PetscInt          m=A->rmap->n;
457   PetscInt          n=A->cmap->n;
458   const PetscInt    *aj,*ai;
459   PetscInt          i;
460 
461   /* Variables not in MatMultAdd_SeqAIJ. */
462   char              transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
463   PetscScalar       alpha = 1.0;
464   PetscScalar       beta;
465   char              matdescra[6];
466 
467   PetscFunctionBegin;
468   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
469   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
470 
471   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
472   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
473   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
474   aa   = a->a;  /* Nonzero elements stored row-by-row. */
475   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
476 
477   /* Call MKL sparse BLAS routine to do the MatMult. */
478   if (zz == yy) {
479     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
480     beta = 1.0;
481     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
482   } else {
483     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
484      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
485     beta = 0.0;
486     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
487     for (i=0; i<m; i++) {
488       z[i] += y[i];
489     }
490   }
491 
492   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
493   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
494   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
495   PetscFunctionReturn(0);
496 }
497 
498 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
499 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
500 {
501   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
502   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
503   const PetscScalar *x;
504   PetscScalar       *y,*z;
505   PetscErrorCode    ierr;
506   PetscInt          m=A->rmap->n;
507   PetscInt          i;
508 
509   /* Variables not in MatMultAdd_SeqAIJ. */
510   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
511 
512   PetscFunctionBegin;
513 
514   /* If there are no nonzero entries, set zz = yy and return immediately. */
515   if(!a->nz) {
516     PetscInt i;
517     ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
518     for (i=0; i<m; i++) {
519       z[i] = y[i];
520     }
521     ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
522     PetscFunctionReturn(0);
523   }
524 
525   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
526   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
527 
528   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
529    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
530    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
531   if (!aijmkl->sparse_optimized) {
532     MatSeqAIJMKL_create_mkl_handle(A);
533   }
534 
535   /* Call MKL sparse BLAS routine to do the MatMult. */
536   if (zz == yy) {
537     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
538      * with alpha and beta both set to 1.0. */
539     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
540     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
541   } else {
542     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
543      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
544     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
545     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
546     for (i=0; i<m; i++) {
547       z[i] += y[i];
548     }
549   }
550 
551   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
552   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
553   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
554   PetscFunctionReturn(0);
555 }
556 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
557 
558 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
559 {
560   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
561   const PetscScalar *x;
562   PetscScalar       *y,*z;
563   const MatScalar   *aa;
564   PetscErrorCode    ierr;
565   PetscInt          m=A->rmap->n;
566   PetscInt          n=A->cmap->n;
567   const PetscInt    *aj,*ai;
568   PetscInt          i;
569 
570   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
571   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
572   PetscScalar       alpha = 1.0;
573   PetscScalar       beta;
574   char              matdescra[6];
575 
576   PetscFunctionBegin;
577   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
578   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
579 
580   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
581   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
582   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
583   aa   = a->a;  /* Nonzero elements stored row-by-row. */
584   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
585 
586   /* Call MKL sparse BLAS routine to do the MatMult. */
587   if (zz == yy) {
588     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
589     beta = 1.0;
590     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
591   } else {
592     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
593      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
594     beta = 0.0;
595     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
596     for (i=0; i<n; i++) {
597       z[i] += y[i];
598     }
599   }
600 
601   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
602   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
603   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
604   PetscFunctionReturn(0);
605 }
606 
607 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
608 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
609 {
610   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
611   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
612   const PetscScalar *x;
613   PetscScalar       *y,*z;
614   PetscErrorCode    ierr;
615   PetscInt          n=A->cmap->n;
616   PetscInt          i;
617 
618   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
619   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
620 
621   PetscFunctionBegin;
622 
623   /* If there are no nonzero entries, set zz = yy and return immediately. */
624   if(!a->nz) {
625     PetscInt i;
626     ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
627     for (i=0; i<n; i++) {
628       z[i] = y[i];
629     }
630     ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
631     PetscFunctionReturn(0);
632   }
633 
634   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
635   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
636 
637   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
638    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
639    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
640   if (!aijmkl->sparse_optimized) {
641     MatSeqAIJMKL_create_mkl_handle(A);
642   }
643 
644   /* Call MKL sparse BLAS routine to do the MatMult. */
645   if (zz == yy) {
646     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
647      * with alpha and beta both set to 1.0. */
648     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
649     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
650   } else {
651     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
652      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
653     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
654     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
655     for (i=0; i<n; i++) {
656       z[i] += y[i];
657     }
658   }
659 
660   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
661   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
662   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
663   PetscFunctionReturn(0);
664 }
665 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
666 
667 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
668 /* Note that this code currently doesn't actually get used when MatMatMult() is called with MAT_REUSE_MATRIX, because
669  * the MatMatMult() interface code calls MatMatMultNumeric() in this case.
670  * MKL has no notion of separately callable symbolic vs. numeric phases of sparse matrix-matrix multiply, so in the
671  * MAT_REUSE_MATRIX case, the SeqAIJ routines end up being used. Even though this means that the (hopefully more
672  * optimized) MKL routines do not get used, this probably is best because the MKL routines would waste time re-computing
673  * the symbolic portion, whereas the native PETSc SeqAIJ routines will avoid this. */
674 PetscErrorCode MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat*C)
675 {
676   Mat_SeqAIJMKL   *a, *b;
677   sparse_matrix_t csrA, csrB, csrC;
678   PetscErrorCode  ierr;
679   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
680 
681   PetscFunctionBegin;
682   a = (Mat_SeqAIJMKL*)A->spptr;
683   b = (Mat_SeqAIJMKL*)B->spptr;
684   if (!a->sparse_optimized) {
685     MatSeqAIJMKL_create_mkl_handle(A);
686   }
687   if (!b->sparse_optimized) {
688     MatSeqAIJMKL_create_mkl_handle(B);
689   }
690   csrA = a->csrA;
691   csrB = b->csrA;
692 
693   stat = mkl_sparse_spmm(SPARSE_OPERATION_NON_TRANSPOSE,csrA,csrB,&csrC);
694   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete sparse matrix-matrix multiply");
695 
696   ierr = MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);CHKERRQ(ierr);
697 
698   PetscFunctionReturn(0);
699 }
700 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
701 
702 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
703 PetscErrorCode MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat*C)
704 {
705   Mat_SeqAIJMKL   *a, *b;
706   sparse_matrix_t csrA, csrB, csrC;
707   PetscErrorCode  ierr;
708   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
709 
710   PetscFunctionBegin;
711   a = (Mat_SeqAIJMKL*)A->spptr;
712   b = (Mat_SeqAIJMKL*)B->spptr;
713   if (!a->sparse_optimized) {
714     MatSeqAIJMKL_create_mkl_handle(A);
715   }
716   if (!b->sparse_optimized) {
717     MatSeqAIJMKL_create_mkl_handle(B);
718   }
719   csrA = a->csrA;
720   csrB = b->csrA;
721 
722   stat = mkl_sparse_spmm(SPARSE_OPERATION_TRANSPOSE,csrA,csrB,&csrC);
723   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete sparse matrix-matrix multiply");
724 
725   ierr = MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);CHKERRQ(ierr);
726 
727   PetscFunctionReturn(0);
728 }
729 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
730 
731 PetscErrorCode MatScale_SeqAIJMKL(Mat inA,PetscScalar alpha)
732 {
733   PetscErrorCode ierr;
734 
735   PetscFunctionBegin;
736   ierr = MatScale_SeqAIJ(inA,alpha);CHKERRQ(ierr);
737   ierr = MatSeqAIJMKL_create_mkl_handle(inA);CHKERRQ(ierr);
738   PetscFunctionReturn(0);
739 }
740 
741 PetscErrorCode MatDiagonalScale_SeqAIJMKL(Mat A,Vec ll,Vec rr)
742 {
743   PetscErrorCode ierr;
744 
745   PetscFunctionBegin;
746   ierr = MatDiagonalScale_SeqAIJ(A,ll,rr);CHKERRQ(ierr);
747   ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
748   PetscFunctionReturn(0);
749 }
750 
751 PetscErrorCode MatDiagonalSet_SeqAIJMKL(Mat Y,Vec D,InsertMode is)
752 {
753   PetscErrorCode ierr;
754 
755   PetscFunctionBegin;
756   ierr = MatDiagonalSet_SeqAIJ(Y,D,is);CHKERRQ(ierr);
757   ierr = MatSeqAIJMKL_create_mkl_handle(Y);CHKERRQ(ierr);
758   PetscFunctionReturn(0);
759 }
760 
761 PetscErrorCode MatAXPY_SeqAIJMKL(Mat Y,PetscScalar a,Mat X,MatStructure str)
762 {
763   PetscErrorCode ierr;
764 
765   PetscFunctionBegin;
766   ierr = MatAXPY_SeqAIJ(Y,a,X,str);CHKERRQ(ierr);
767   if (str == SAME_NONZERO_PATTERN) {
768     /* MatAssemblyEnd() is not called if SAME_NONZERO_PATTERN, so we need to force update of the MKL matrix handle. */
769     ierr = MatSeqAIJMKL_create_mkl_handle(Y);CHKERRQ(ierr);
770   }
771   PetscFunctionReturn(0);
772 }
773 
774 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a
775  * SeqAIJMKL matrix.  This routine is called by the MatCreate_SeqMKLAIJ()
776  * routine, but can also be used to convert an assembled SeqAIJ matrix
777  * into a SeqAIJMKL one. */
778 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
779 {
780   PetscErrorCode ierr;
781   Mat            B = *newmat;
782   Mat_SeqAIJMKL  *aijmkl;
783   PetscBool      set;
784   PetscBool      sametype;
785 
786   PetscFunctionBegin;
787   if (reuse == MAT_INITIAL_MATRIX) {
788     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
789   }
790 
791   ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr);
792   if (sametype) PetscFunctionReturn(0);
793 
794   ierr     = PetscNewLog(B,&aijmkl);CHKERRQ(ierr);
795   B->spptr = (void*) aijmkl;
796 
797   /* Set function pointers for methods that we inherit from AIJ but override.
798    * We also parse some command line options below, since those determine some of the methods we point to. */
799   B->ops->duplicate        = MatDuplicate_SeqAIJMKL;
800   B->ops->assemblyend      = MatAssemblyEnd_SeqAIJMKL;
801   B->ops->destroy          = MatDestroy_SeqAIJMKL;
802 
803   aijmkl->sparse_optimized = PETSC_FALSE;
804 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
805   aijmkl->no_SpMV2 = PETSC_FALSE;  /* Default to using the SpMV2 routines if our MKL supports them. */
806 #else
807   aijmkl->no_SpMV2 = PETSC_TRUE;
808 #endif
809   aijmkl->eager_inspection = PETSC_FALSE;
810 
811   /* Parse command line options. */
812   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");CHKERRQ(ierr);
813   ierr = PetscOptionsBool("-mat_aijmkl_no_spmv2","NoSPMV2","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);CHKERRQ(ierr);
814   ierr = PetscOptionsBool("-mat_aijmkl_eager_inspection","Eager Inspection","None",(PetscBool)aijmkl->eager_inspection,(PetscBool*)&aijmkl->eager_inspection,&set);CHKERRQ(ierr);
815   ierr = PetscOptionsEnd();CHKERRQ(ierr);
816 #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
817   if(!aijmkl->no_SpMV2) {
818     ierr = PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize();  defaulting to non-SpMV2 routines.\n");
819     aijmkl->no_SpMV2 = PETSC_TRUE;
820   }
821 #endif
822 
823   if(!aijmkl->no_SpMV2) {
824 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
825     B->ops->mult             = MatMult_SeqAIJMKL_SpMV2;
826     B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL_SpMV2;
827     B->ops->multadd          = MatMultAdd_SeqAIJMKL_SpMV2;
828     B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2;
829     B->ops->matmult          = MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2;
830     B->ops->transposematmult = MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2;
831 #endif
832   } else {
833     B->ops->mult             = MatMult_SeqAIJMKL;
834     B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL;
835     B->ops->multadd          = MatMultAdd_SeqAIJMKL;
836     B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL;
837   }
838 
839   B->ops->scale              = MatScale_SeqAIJMKL;
840   B->ops->diagonalscale      = MatDiagonalScale_SeqAIJMKL;
841   B->ops->diagonalset        = MatDiagonalSet_SeqAIJMKL;
842   B->ops->axpy               = MatAXPY_SeqAIJMKL;
843 
844   ierr = PetscObjectComposeFunction((PetscObject)B,"MatScale_SeqAIJMKL_C",MatScale_SeqAIJMKL);CHKERRQ(ierr);
845   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr);
846   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",MatMatMult_SeqDense_SeqAIJ);CHKERRQ(ierr);
847   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",MatMatMultSymbolic_SeqDense_SeqAIJ);CHKERRQ(ierr);
848   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",MatMatMultNumeric_SeqDense_SeqAIJ);CHKERRQ(ierr);
849   if(!aijmkl->no_SpMV2) {
850 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
851     ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqaijmkl_seqaijmkl_C",MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2);CHKERRQ(ierr);
852     ierr = PetscObjectComposeFunction((PetscObject)B,"MatTransposeMatMult_seqaijmkl_seqaijmkl_C",MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2);CHKERRQ(ierr);
853 #endif
854   }
855 
856   ierr    = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr);
857   *newmat = B;
858   PetscFunctionReturn(0);
859 }
860 
861 /*@C
862    MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL.
863    This type inherits from AIJ and is largely identical, but uses sparse BLAS
864    routines from Intel MKL whenever possible.
865    MatMult, MatMultAdd, MatMultTranspose, MatMultTransposeAdd, MatMatMult, and MatTransposeMatMult
866    operations are currently supported.
867    If the installed version of MKL supports the "SpMV2" sparse
868    inspector-executor routines, then those are used by default.
869 
870    Collective on MPI_Comm
871 
872    Input Parameters:
873 +  comm - MPI communicator, set to PETSC_COMM_SELF
874 .  m - number of rows
875 .  n - number of columns
876 .  nz - number of nonzeros per row (same for all rows)
877 -  nnz - array containing the number of nonzeros in the various rows
878          (possibly different for each row) or NULL
879 
880    Output Parameter:
881 .  A - the matrix
882 
883    Options Database Keys:
884 +  -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines
885 -  -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, performing this step the first time the matrix is applied
886 
887    Notes:
888    If nnz is given then nz is ignored
889 
890    Level: intermediate
891 
892 .keywords: matrix, MKL, sparse, parallel
893 
894 .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues()
895 @*/
896 PetscErrorCode  MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
897 {
898   PetscErrorCode ierr;
899 
900   PetscFunctionBegin;
901   ierr = MatCreate(comm,A);CHKERRQ(ierr);
902   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
903   ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr);
904   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr);
905   PetscFunctionReturn(0);
906 }
907 
908 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A)
909 {
910   PetscErrorCode ierr;
911 
912   PetscFunctionBegin;
913   ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
914   ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
915   PetscFunctionReturn(0);
916 }
917