xref: /petsc/src/mat/impls/aij/seq/aijmkl/aijmkl.c (revision 5e71baeff2f3138f93cd4f5927dfd596eb8325cc)
1 /*
2   Defines basic operations for the MATSEQAIJMKL matrix class.
3   This class is derived from the MATSEQAIJ class and retains the
4   compressed row storage (aka Yale sparse matrix format) but uses
5   sparse BLAS operations from the Intel Math Kernel Library (MKL)
6   wherever possible.
7 */
8 
9 #include <../src/mat/impls/aij/seq/aij.h>
10 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h>
11 
12 /* MKL include files. */
13 #include <mkl_spblas.h>  /* Sparse BLAS */
14 
15 typedef struct {
16   PetscBool no_SpMV2;  /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */
17   PetscBool eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */
18   PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
19 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
20   sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */
21   struct matrix_descr descr;
22 #endif
23 } Mat_SeqAIJMKL;
24 
25 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType);
26 
27 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat)
28 {
29   /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */
30   /* so we will ignore 'MatType type'. */
31   PetscErrorCode ierr;
32   Mat            B       = *newmat;
33 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
34   Mat_SeqAIJMKL  *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
35 #endif
36 
37   PetscFunctionBegin;
38   if (reuse == MAT_INITIAL_MATRIX) {
39     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
40   }
41 
42   /* Reset the original function pointers. */
43   B->ops->duplicate        = MatDuplicate_SeqAIJ;
44   B->ops->assemblyend      = MatAssemblyEnd_SeqAIJ;
45   B->ops->destroy          = MatDestroy_SeqAIJ;
46   B->ops->mult             = MatMult_SeqAIJ;
47   B->ops->multtranspose    = MatMultTranspose_SeqAIJ;
48   B->ops->multadd          = MatMultAdd_SeqAIJ;
49   B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ;
50   B->ops->matmult          = MatMatMult_SeqAIJ_SeqAIJ;
51   B->ops->transposematmult = MatTransposeMatMult_SeqAIJ_SeqAIJ;
52   B->ops->scale            = MatScale_SeqAIJ;
53   B->ops->diagonalscale    = MatDiagonalScale_SeqAIJ;
54   B->ops->diagonalset      = MatDiagonalSet_SeqAIJ;
55   B->ops->axpy             = MatAXPY_SeqAIJ;
56 
57   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);CHKERRQ(ierr);
58   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr);
59   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr);
60   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr);
61 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
62   if(!aijmkl->no_SpMV2) {
63     ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr);
64     ierr = PetscObjectComposeFunction((PetscObject)B,"MatTransposeMatMult_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr);
65   }
66 
67   /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this
68    * simply involves destroying the MKL sparse matrix handle and then freeing
69    * the spptr pointer. */
70   if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr;
71 
72   if (aijmkl->sparse_optimized) {
73     sparse_status_t stat;
74     stat = mkl_sparse_destroy(aijmkl->csrA);
75     if (stat != SPARSE_STATUS_SUCCESS) {
76       PetscFunctionReturn(PETSC_ERR_LIB);
77     }
78   }
79 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
80   ierr = PetscFree(B->spptr);CHKERRQ(ierr);
81 
82   /* Change the type of B to MATSEQAIJ. */
83   ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr);
84 
85   *newmat = B;
86   PetscFunctionReturn(0);
87 }
88 
89 PetscErrorCode MatDestroy_SeqAIJMKL(Mat A)
90 {
91   PetscErrorCode ierr;
92   Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr;
93 
94   PetscFunctionBegin;
95 
96   /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an
97    * spptr pointer. */
98   if (aijmkl) {
99     /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */
100 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
101     if (aijmkl->sparse_optimized) {
102       sparse_status_t stat = SPARSE_STATUS_SUCCESS;
103       stat = mkl_sparse_destroy(aijmkl->csrA);
104       if (stat != SPARSE_STATUS_SUCCESS) {
105         PetscFunctionReturn(PETSC_ERR_LIB);
106       }
107     }
108 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
109     ierr = PetscFree(A->spptr);CHKERRQ(ierr);
110   }
111 
112   /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
113    * to destroy everything that remains. */
114   ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr);
115   /* Note that I don't call MatSetType().  I believe this is because that
116    * is only to be called when *building* a matrix.  I could be wrong, but
117    * that is how things work for the SuperLU matrix class. */
118   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
119   PetscFunctionReturn(0);
120 }
121 
122 /* MatSeqAIJKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it,
123  * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize().
124  * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix
125  * handle, creates a new one, and then calls mkl_sparse_optimize().
126  * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been
127  * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of
128  * an unoptimized matrix handle here. */
129 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A)
130 {
131 #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
132   /* If the MKL library does not have mkl_sparse_optimize(), then this routine
133    * does nothing. We make it callable anyway in this case because it cuts
134    * down on littering the code with #ifdefs. */
135   PetscFunctionBegin;
136   PetscFunctionReturn(0);
137 #else
138   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
139   Mat_SeqAIJMKL   *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
140   PetscInt        m,n;
141   MatScalar       *aa;
142   PetscInt        *aj,*ai;
143   sparse_status_t stat;
144 
145   PetscFunctionBegin;
146   if (aijmkl->no_SpMV2) PetscFunctionReturn(0);
147 
148   if (aijmkl->sparse_optimized) {
149     /* Matrix has been previously assembled and optimized. Must destroy old
150      * matrix handle before running the optimization step again. */
151     stat = mkl_sparse_destroy(aijmkl->csrA);
152     if (stat != SPARSE_STATUS_SUCCESS) {
153       PetscFunctionReturn(PETSC_ERR_LIB);
154     }
155   }
156   aijmkl->sparse_optimized = PETSC_FALSE;
157 
158   /* Now perform the SpMV2 setup and matrix optimization. */
159   aijmkl->descr.type        = SPARSE_MATRIX_TYPE_GENERAL;
160   aijmkl->descr.mode        = SPARSE_FILL_MODE_LOWER;
161   aijmkl->descr.diag        = SPARSE_DIAG_NON_UNIT;
162   m = A->rmap->n;
163   n = A->cmap->n;
164   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
165   aa   = a->a;  /* Nonzero elements stored row-by-row. */
166   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
167   if ((a->nz!=0) & !(A->structure_only)) {
168     /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
169      * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
170     stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa);
171     stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
172     stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
173     stat = mkl_sparse_optimize(aijmkl->csrA);
174     if (stat != SPARSE_STATUS_SUCCESS) {
175       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle/complete mkl_sparse_optimize");
176       PetscFunctionReturn(PETSC_ERR_LIB);
177     }
178     aijmkl->sparse_optimized = PETSC_TRUE;
179   }
180 
181   PetscFunctionReturn(0);
182 #endif
183 }
184 
185 /* MatSeqAIJMKL_create_from_mkl_handle() creates a sequential AIJMKL matrix from an MKL sparse matrix handle.
186  * We need this to implement MatMatMult() using the MKL inspector-executor routines, which return an (unoptimized)
187  * matrix handle.
188  * Note: This routine simply destroys and replaces the original matrix if MAT_REUSE_MATRIX has been specified, as
189  * there is no good alternative. */
190 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
191 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,MatReuse reuse,Mat *mat)
192 {
193   PetscErrorCode ierr;
194   sparse_status_t stat;
195   sparse_index_base_t indexing;
196   PetscInt nrows, ncols;
197   PetscInt *aj,*ai,*dummy;
198   MatScalar *aa;
199   Mat A;
200   Mat_SeqAIJ *a;
201   Mat_SeqAIJMKL *aijmkl;
202 
203   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
204   stat = mkl_sparse_x_export_csr(csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);
205   if (stat != SPARSE_STATUS_SUCCESS) {
206     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");
207     PetscFunctionReturn(PETSC_ERR_LIB);
208   }
209 
210   if (reuse == MAT_REUSE_MATRIX) {
211     ierr = MatDestroy(mat);CHKERRQ(ierr);
212   }
213   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
214   ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
215   ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,nrows,ncols);CHKERRQ(ierr);
216   /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported
217    * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and
218    * they will be destroyed when the MKL handle is destroyed.
219    * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */
220   ierr = MatSeqAIJSetPreallocationCSR(A,ai,aj,aa);CHKERRQ(ierr);
221 
222   /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle.
223    * Now turn it into a MATSEQAIJMKL. */
224   ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
225 
226   a = (Mat_SeqAIJ*)A->data;
227   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
228   aijmkl->csrA = csrA;
229 
230   /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but
231    * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one,
232    * and just need to be able to run the MKL optimization step. */
233   aijmkl->descr.type        = SPARSE_MATRIX_TYPE_GENERAL;
234   aijmkl->descr.mode        = SPARSE_FILL_MODE_LOWER;
235   aijmkl->descr.diag        = SPARSE_DIAG_NON_UNIT;
236   stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
237   stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
238   stat = mkl_sparse_optimize(aijmkl->csrA);
239   if (stat != SPARSE_STATUS_SUCCESS) {
240     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set hints/complete mkl_sparse_optimize");
241     PetscFunctionReturn(PETSC_ERR_LIB);
242   }
243   aijmkl->sparse_optimized = PETSC_TRUE;
244 
245   *mat = A;
246   PetscFunctionReturn(0);
247 }
248 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
249 
250 PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
251 {
252   PetscErrorCode ierr;
253   Mat_SeqAIJMKL *aijmkl;
254   Mat_SeqAIJMKL *aijmkl_dest;
255 
256   PetscFunctionBegin;
257   ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr);
258   aijmkl      = (Mat_SeqAIJMKL*) A->spptr;
259   aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr;
260   ierr = PetscMemcpy(aijmkl_dest,aijmkl,sizeof(Mat_SeqAIJMKL));CHKERRQ(ierr);
261   aijmkl_dest->sparse_optimized = PETSC_FALSE;
262   if (aijmkl->eager_inspection) {
263     ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
264   }
265   PetscFunctionReturn(0);
266 }
267 
268 PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode)
269 {
270   PetscErrorCode  ierr;
271   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
272   Mat_SeqAIJMKL *aijmkl;
273 
274   PetscFunctionBegin;
275   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
276 
277   /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some
278    * extra information and some different methods, call the AssemblyEnd
279    * routine for a MATSEQAIJ.
280    * I'm not sure if this is the best way to do this, but it avoids
281    * a lot of code duplication. */
282   a->inode.use = PETSC_FALSE;  /* Must disable: otherwise the MKL routines won't get used. */
283   ierr = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr);
284 
285   /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks).
286    * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */
287   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
288   if (aijmkl->eager_inspection) {
289     ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
290 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
291   } else if (aijmkl->sparse_optimized) {
292     /* If doing lazy inspection and there is an optimized MKL handle, we need to destroy it, so that it will be
293      * rebuilt later when needed. Otherwise, some SeqAIJ implementations that we depend on for some operations
294      * (such as MatMatMultNumeric()) can modify the result matrix without the matrix handle being rebuilt.
295      * (The SeqAIJ version MatMatMultNumeric() knows nothing about matrix handles, but it *does* call MatAssemblyEnd().) */
296     sparse_status_t stat = mkl_sparse_destroy(aijmkl->csrA);
297     if (stat != SPARSE_STATUS_SUCCESS) {
298       PetscFunctionReturn(PETSC_ERR_LIB);
299     }
300     aijmkl->sparse_optimized = PETSC_FALSE;
301 #endif
302   }
303 
304   PetscFunctionReturn(0);
305 }
306 
307 PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy)
308 {
309   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
310   const PetscScalar *x;
311   PetscScalar       *y;
312   const MatScalar   *aa;
313   PetscErrorCode    ierr;
314   PetscInt          m=A->rmap->n;
315   PetscInt          n=A->cmap->n;
316   PetscScalar       alpha = 1.0;
317   PetscScalar       beta = 0.0;
318   const PetscInt    *aj,*ai;
319   char              matdescra[6];
320 
321 
322   /* Variables not in MatMult_SeqAIJ. */
323   char transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
324 
325   PetscFunctionBegin;
326   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
327   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
328   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
329   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
330   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
331   aa   = a->a;  /* Nonzero elements stored row-by-row. */
332   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
333 
334   /* Call MKL sparse BLAS routine to do the MatMult. */
335   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
336 
337   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
338   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
339   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
340   PetscFunctionReturn(0);
341 }
342 
343 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
344 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
345 {
346   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
347   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
348   const PetscScalar *x;
349   PetscScalar       *y;
350   PetscErrorCode    ierr;
351   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
352 
353   PetscFunctionBegin;
354 
355   /* If there are no nonzero entries, zero yy and return immediately. */
356   if(!a->nz) {
357     PetscInt i;
358     PetscInt m=A->rmap->n;
359     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
360     for (i=0; i<m; i++) {
361       y[i] = 0.0;
362     }
363     ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
364     PetscFunctionReturn(0);
365   }
366 
367   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
368   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
369 
370   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
371    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
372    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
373   if (!aijmkl->sparse_optimized) {
374     MatSeqAIJMKL_create_mkl_handle(A);
375   }
376 
377   /* Call MKL SpMV2 executor routine to do the MatMult. */
378   stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
379 
380   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
381   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
382   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
383   if (stat != SPARSE_STATUS_SUCCESS) {
384     PetscFunctionReturn(PETSC_ERR_LIB);
385   }
386   PetscFunctionReturn(0);
387 }
388 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
389 
390 PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy)
391 {
392   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
393   const PetscScalar *x;
394   PetscScalar       *y;
395   const MatScalar   *aa;
396   PetscErrorCode    ierr;
397   PetscInt          m=A->rmap->n;
398   PetscInt          n=A->cmap->n;
399   PetscScalar       alpha = 1.0;
400   PetscScalar       beta = 0.0;
401   const PetscInt    *aj,*ai;
402   char              matdescra[6];
403 
404   /* Variables not in MatMultTranspose_SeqAIJ. */
405   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
406 
407   PetscFunctionBegin;
408   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
409   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
410   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
411   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
412   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
413   aa   = a->a;  /* Nonzero elements stored row-by-row. */
414   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
415 
416   /* Call MKL sparse BLAS routine to do the MatMult. */
417   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
418 
419   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
420   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
421   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
422   PetscFunctionReturn(0);
423 }
424 
425 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
426 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
427 {
428   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
429   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
430   const PetscScalar *x;
431   PetscScalar       *y;
432   PetscErrorCode    ierr;
433   sparse_status_t   stat;
434 
435   PetscFunctionBegin;
436 
437   /* If there are no nonzero entries, zero yy and return immediately. */
438   if(!a->nz) {
439     PetscInt i;
440     PetscInt n=A->cmap->n;
441     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
442     for (i=0; i<n; i++) {
443       y[i] = 0.0;
444     }
445     ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
446     PetscFunctionReturn(0);
447   }
448 
449   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
450   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
451 
452   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
453    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
454    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
455   if (!aijmkl->sparse_optimized) {
456     MatSeqAIJMKL_create_mkl_handle(A);
457   }
458 
459   /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
460   stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
461 
462   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
463   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
464   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
465   if (stat != SPARSE_STATUS_SUCCESS) {
466     PetscFunctionReturn(PETSC_ERR_LIB);
467   }
468   PetscFunctionReturn(0);
469 }
470 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
471 
472 PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
473 {
474   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
475   const PetscScalar *x;
476   PetscScalar       *y,*z;
477   const MatScalar   *aa;
478   PetscErrorCode    ierr;
479   PetscInt          m=A->rmap->n;
480   PetscInt          n=A->cmap->n;
481   const PetscInt    *aj,*ai;
482   PetscInt          i;
483 
484   /* Variables not in MatMultAdd_SeqAIJ. */
485   char transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
486   PetscScalar       alpha = 1.0;
487   PetscScalar       beta;
488   char              matdescra[6];
489 
490   PetscFunctionBegin;
491   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
492   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
493 
494   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
495   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
496   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
497   aa   = a->a;  /* Nonzero elements stored row-by-row. */
498   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
499 
500   /* Call MKL sparse BLAS routine to do the MatMult. */
501   if (zz == yy) {
502     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
503     beta = 1.0;
504     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
505   } else {
506     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
507      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
508     beta = 0.0;
509     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
510     for (i=0; i<m; i++) {
511       z[i] += y[i];
512     }
513   }
514 
515   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
516   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
517   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
518   PetscFunctionReturn(0);
519 }
520 
521 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
522 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
523 {
524   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
525   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
526   const PetscScalar *x;
527   PetscScalar       *y,*z;
528   PetscErrorCode    ierr;
529   PetscInt          m=A->rmap->n;
530   PetscInt          i;
531 
532   /* Variables not in MatMultAdd_SeqAIJ. */
533   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
534 
535   PetscFunctionBegin;
536 
537   /* If there are no nonzero entries, set zz = yy and return immediately. */
538   if(!a->nz) {
539     PetscInt i;
540     ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
541     for (i=0; i<m; i++) {
542       z[i] = y[i];
543     }
544     ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
545     PetscFunctionReturn(0);
546   }
547 
548   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
549   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
550 
551   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
552    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
553    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
554   if (!aijmkl->sparse_optimized) {
555     MatSeqAIJMKL_create_mkl_handle(A);
556   }
557 
558   /* Call MKL sparse BLAS routine to do the MatMult. */
559   if (zz == yy) {
560     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
561      * with alpha and beta both set to 1.0. */
562     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
563   } else {
564     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
565      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
566     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
567     for (i=0; i<m; i++) {
568       z[i] += y[i];
569     }
570   }
571 
572   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
573   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
574   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
575   if (stat != SPARSE_STATUS_SUCCESS) {
576     PetscFunctionReturn(PETSC_ERR_LIB);
577   }
578   PetscFunctionReturn(0);
579 }
580 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
581 
582 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
583 {
584   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
585   const PetscScalar *x;
586   PetscScalar       *y,*z;
587   const MatScalar   *aa;
588   PetscErrorCode    ierr;
589   PetscInt          m=A->rmap->n;
590   PetscInt          n=A->cmap->n;
591   const PetscInt    *aj,*ai;
592   PetscInt          i;
593 
594   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
595   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
596   PetscScalar       alpha = 1.0;
597   PetscScalar       beta;
598   char              matdescra[6];
599 
600   PetscFunctionBegin;
601   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
602   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
603 
604   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
605   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
606   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
607   aa   = a->a;  /* Nonzero elements stored row-by-row. */
608   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
609 
610   /* Call MKL sparse BLAS routine to do the MatMult. */
611   if (zz == yy) {
612     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
613     beta = 1.0;
614     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
615   } else {
616     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
617      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
618     beta = 0.0;
619     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
620     for (i=0; i<n; i++) {
621       z[i] += y[i];
622     }
623   }
624 
625   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
626   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
627   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
628   PetscFunctionReturn(0);
629 }
630 
631 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
632 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
633 {
634   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
635   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
636   const PetscScalar *x;
637   PetscScalar       *y,*z;
638   PetscErrorCode    ierr;
639   PetscInt          n=A->cmap->n;
640   PetscInt          i;
641 
642   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
643   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
644 
645   PetscFunctionBegin;
646 
647   /* If there are no nonzero entries, set zz = yy and return immediately. */
648   if(!a->nz) {
649     PetscInt i;
650     ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
651     for (i=0; i<n; i++) {
652       z[i] = y[i];
653     }
654     ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
655     PetscFunctionReturn(0);
656   }
657 
658   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
659   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
660 
661   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
662    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
663    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
664   if (!aijmkl->sparse_optimized) {
665     MatSeqAIJMKL_create_mkl_handle(A);
666   }
667 
668   /* Call MKL sparse BLAS routine to do the MatMult. */
669   if (zz == yy) {
670     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
671      * with alpha and beta both set to 1.0. */
672     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
673   } else {
674     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
675      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
676     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
677     for (i=0; i<n; i++) {
678       z[i] += y[i];
679     }
680   }
681 
682   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
683   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
684   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
685   if (stat != SPARSE_STATUS_SUCCESS) {
686     PetscFunctionReturn(PETSC_ERR_LIB);
687   }
688   PetscFunctionReturn(0);
689 }
690 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
691 
692 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
693 /* Note that this code currently doesn't actually get used when MatMatMult() is called with MAT_REUSE_MATRIX, because
694  * the MatMatMult() interface code calls MatMatMultNumeric() in this case.
695  * MKL has no notion of separately callable symbolic vs. numeric phases of sparse matrix-matrix multiply, so in the
696  * MAT_REUSE_MATRIX case, the SeqAIJ routines end up being used. Even though this means that the (hopefully more
697  * optimized) MKL routines do not get used, this probably is best because the MKL routines would waste time re-computing
698  * the symbolic portion, whereas the native PETSc SeqAIJ routines will avoid this. */
699 PetscErrorCode MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat*C)
700 {
701   Mat_SeqAIJMKL *a, *b;
702   sparse_matrix_t csrA, csrB, csrC;
703   PetscErrorCode ierr;
704   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
705 
706   PetscFunctionBegin;
707   a = (Mat_SeqAIJMKL*)A->spptr;
708   b = (Mat_SeqAIJMKL*)B->spptr;
709   if (!a->sparse_optimized) {
710     MatSeqAIJMKL_create_mkl_handle(A);
711   }
712   if (!b->sparse_optimized) {
713     MatSeqAIJMKL_create_mkl_handle(B);
714   }
715   csrA = a->csrA;
716   csrB = b->csrA;
717 
718   stat = mkl_sparse_spmm(SPARSE_OPERATION_NON_TRANSPOSE,csrA,csrB,&csrC);
719   if (stat != SPARSE_STATUS_SUCCESS) {
720     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete sparse matrix-matrix multiply");
721     PetscFunctionReturn(PETSC_ERR_LIB);
722   }
723 
724   ierr = MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);CHKERRQ(ierr);
725 
726   PetscFunctionReturn(0);
727 }
728 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
729 
730 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
731 PetscErrorCode MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat*C)
732 {
733   Mat_SeqAIJMKL *a, *b;
734   sparse_matrix_t csrA, csrB, csrC;
735   PetscErrorCode ierr;
736   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
737 
738   PetscFunctionBegin;
739   a = (Mat_SeqAIJMKL*)A->spptr;
740   b = (Mat_SeqAIJMKL*)B->spptr;
741   if (!a->sparse_optimized) {
742     MatSeqAIJMKL_create_mkl_handle(A);
743   }
744   if (!b->sparse_optimized) {
745     MatSeqAIJMKL_create_mkl_handle(B);
746   }
747   csrA = a->csrA;
748   csrB = b->csrA;
749 
750   stat = mkl_sparse_spmm(SPARSE_OPERATION_TRANSPOSE,csrA,csrB,&csrC);
751   if (stat != SPARSE_STATUS_SUCCESS) {
752     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete sparse matrix-matrix multiply");
753     PetscFunctionReturn(PETSC_ERR_LIB);
754   }
755 
756   ierr = MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);CHKERRQ(ierr);
757 
758   PetscFunctionReturn(0);
759 }
760 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
761 
762 PetscErrorCode MatScale_SeqAIJMKL(Mat inA,PetscScalar alpha)
763 {
764   PetscErrorCode ierr;
765 
766   PetscFunctionBegin;
767   ierr = MatScale_SeqAIJ(inA,alpha);CHKERRQ(ierr);
768   ierr = MatSeqAIJMKL_create_mkl_handle(inA);CHKERRQ(ierr);
769   PetscFunctionReturn(0);
770 }
771 
772 PetscErrorCode MatDiagonalScale_SeqAIJMKL(Mat A,Vec ll,Vec rr)
773 {
774   PetscErrorCode ierr;
775 
776   PetscFunctionBegin;
777   ierr = MatDiagonalScale_SeqAIJ(A,ll,rr);CHKERRQ(ierr);
778   ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
779   PetscFunctionReturn(0);
780 }
781 
782 PetscErrorCode MatDiagonalSet_SeqAIJMKL(Mat Y,Vec D,InsertMode is)
783 {
784   PetscErrorCode ierr;
785 
786   PetscFunctionBegin;
787   ierr = MatDiagonalSet_SeqAIJ(Y,D,is);CHKERRQ(ierr);
788   ierr = MatSeqAIJMKL_create_mkl_handle(Y);CHKERRQ(ierr);
789   PetscFunctionReturn(0);
790 }
791 
792 PetscErrorCode MatAXPY_SeqAIJMKL(Mat Y,PetscScalar a,Mat X,MatStructure str)
793 {
794   PetscErrorCode ierr;
795 
796   PetscFunctionBegin;
797   ierr = MatAXPY_SeqAIJ(Y,a,X,str);CHKERRQ(ierr);
798   if (str == SAME_NONZERO_PATTERN) {
799     /* MatAssemblyEnd() is not called if SAME_NONZERO_PATTERN, so we need to force update of the MKL matrix handle. */
800     ierr = MatSeqAIJMKL_create_mkl_handle(Y);CHKERRQ(ierr);
801   }
802   PetscFunctionReturn(0);
803 }
804 
805 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a
806  * SeqAIJMKL matrix.  This routine is called by the MatCreate_SeqMKLAIJ()
807  * routine, but can also be used to convert an assembled SeqAIJ matrix
808  * into a SeqAIJMKL one. */
809 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
810 {
811   PetscErrorCode ierr;
812   Mat            B = *newmat;
813   Mat_SeqAIJMKL  *aijmkl;
814   PetscBool      set;
815   PetscBool      sametype;
816 
817   PetscFunctionBegin;
818   if (reuse == MAT_INITIAL_MATRIX) {
819     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
820   }
821 
822   ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr);
823   if (sametype) PetscFunctionReturn(0);
824 
825   ierr     = PetscNewLog(B,&aijmkl);CHKERRQ(ierr);
826   B->spptr = (void*) aijmkl;
827 
828   /* Set function pointers for methods that we inherit from AIJ but override.
829    * We also parse some command line options below, since those determine some of the methods we point to. */
830   B->ops->duplicate        = MatDuplicate_SeqAIJMKL;
831   B->ops->assemblyend      = MatAssemblyEnd_SeqAIJMKL;
832   B->ops->destroy          = MatDestroy_SeqAIJMKL;
833 
834   aijmkl->sparse_optimized = PETSC_FALSE;
835 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
836   aijmkl->no_SpMV2 = PETSC_FALSE;  /* Default to using the SpMV2 routines if our MKL supports them. */
837 #else
838   aijmkl->no_SpMV2 = PETSC_TRUE;
839 #endif
840   aijmkl->eager_inspection = PETSC_FALSE;
841 
842   /* Parse command line options. */
843   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");CHKERRQ(ierr);
844   ierr = PetscOptionsBool("-mat_aijmkl_no_spmv2","NoSPMV2","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);CHKERRQ(ierr);
845   ierr = PetscOptionsBool("-mat_aijmkl_eager_inspection","Eager Inspection","None",(PetscBool)aijmkl->eager_inspection,(PetscBool*)&aijmkl->eager_inspection,&set);CHKERRQ(ierr);
846   ierr = PetscOptionsEnd();CHKERRQ(ierr);
847 #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
848   if(!aijmkl->no_SpMV2) {
849     ierr = PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize();  defaulting to non-SpMV2 routines.\n");
850     aijmkl->no_SpMV2 = PETSC_TRUE;
851   }
852 #endif
853 
854   if(!aijmkl->no_SpMV2) {
855 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
856     B->ops->mult             = MatMult_SeqAIJMKL_SpMV2;
857     B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL_SpMV2;
858     B->ops->multadd          = MatMultAdd_SeqAIJMKL_SpMV2;
859     B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2;
860     B->ops->matmult          = MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2;
861     B->ops->transposematmult = MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2;
862 #endif
863   } else {
864     B->ops->mult             = MatMult_SeqAIJMKL;
865     B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL;
866     B->ops->multadd          = MatMultAdd_SeqAIJMKL;
867     B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL;
868   }
869 
870   B->ops->scale              = MatScale_SeqAIJMKL;
871   B->ops->diagonalscale      = MatDiagonalScale_SeqAIJMKL;
872   B->ops->diagonalset        = MatDiagonalSet_SeqAIJMKL;
873   B->ops->axpy               = MatAXPY_SeqAIJMKL;
874 
875   ierr = PetscObjectComposeFunction((PetscObject)B,"MatScale_SeqAIJMKL_C",MatScale_SeqAIJMKL);CHKERRQ(ierr);
876   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr);
877   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",MatMatMult_SeqDense_SeqAIJ);CHKERRQ(ierr);
878   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",MatMatMultSymbolic_SeqDense_SeqAIJ);CHKERRQ(ierr);
879   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",MatMatMultNumeric_SeqDense_SeqAIJ);CHKERRQ(ierr);
880   if(!aijmkl->no_SpMV2) {
881 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
882     ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqaijmkl_seqaijmkl_C",MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2);CHKERRQ(ierr);
883     ierr = PetscObjectComposeFunction((PetscObject)B,"MatTransposeMatMult_seqaijmkl_seqaijmkl_C",MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2);CHKERRQ(ierr);
884 #endif
885   }
886 
887   ierr    = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr);
888   *newmat = B;
889   PetscFunctionReturn(0);
890 }
891 
892 /*@C
893    MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL.
894    This type inherits from AIJ and is largely identical, but uses sparse BLAS
895    routines from Intel MKL whenever possible.
896    MatMult, MatMultAdd, MatMultTranspose, MatMultTransposeAdd, MatMatMult, and MatTransposeMatMult
897    operations are currently supported.
898    If the installed version of MKL supports the "SpMV2" sparse
899    inspector-executor routines, then those are used by default.
900 
901    Collective on MPI_Comm
902 
903    Input Parameters:
904 +  comm - MPI communicator, set to PETSC_COMM_SELF
905 .  m - number of rows
906 .  n - number of columns
907 .  nz - number of nonzeros per row (same for all rows)
908 -  nnz - array containing the number of nonzeros in the various rows
909          (possibly different for each row) or NULL
910 
911    Output Parameter:
912 .  A - the matrix
913 
914    Options Database Keys:
915 +  -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines
916 -  -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, performing this step the first time the matrix is applied
917 
918    Notes:
919    If nnz is given then nz is ignored
920 
921    Level: intermediate
922 
923 .keywords: matrix, MKL, sparse, parallel
924 
925 .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues()
926 @*/
927 PetscErrorCode  MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
928 {
929   PetscErrorCode ierr;
930 
931   PetscFunctionBegin;
932   ierr = MatCreate(comm,A);CHKERRQ(ierr);
933   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
934   ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr);
935   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr);
936   PetscFunctionReturn(0);
937 }
938 
939 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A)
940 {
941   PetscErrorCode ierr;
942 
943   PetscFunctionBegin;
944   ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
945   ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
946   PetscFunctionReturn(0);
947 }
948