xref: /petsc/src/mat/impls/aij/seq/aijmkl/aijmkl.c (revision f3fd175839bd4670c4525ba579f9e8a22bd71c7b)
1 /*
2   Defines basic operations for the MATSEQAIJMKL matrix class.
3   This class is derived from the MATSEQAIJ class and retains the
4   compressed row storage (aka Yale sparse matrix format) but uses
5   sparse BLAS operations from the Intel Math Kernel Library (MKL)
6   wherever possible.
7 */
8 
9 #include <../src/mat/impls/aij/seq/aij.h>
10 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h>
11 
12 /* MKL include files. */
13 #include <mkl_spblas.h>  /* Sparse BLAS */
14 
15 typedef struct {
16   PetscBool no_SpMV2;  /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */
17   PetscBool eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */
18   PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
19 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
20   sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */
21   struct matrix_descr descr;
22 #endif
23 } Mat_SeqAIJMKL;
24 
25 extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType);
26 
27 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat)
28 {
29   /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */
30   /* so we will ignore 'MatType type'. */
31   PetscErrorCode ierr;
32   Mat            B       = *newmat;
33 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
34   Mat_SeqAIJMKL  *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
35 #endif
36 
37   PetscFunctionBegin;
38   if (reuse == MAT_INITIAL_MATRIX) {
39     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
40   }
41 
42   /* Reset the original function pointers. */
43   B->ops->duplicate        = MatDuplicate_SeqAIJ;
44   B->ops->assemblyend      = MatAssemblyEnd_SeqAIJ;
45   B->ops->destroy          = MatDestroy_SeqAIJ;
46   B->ops->mult             = MatMult_SeqAIJ;
47   B->ops->multtranspose    = MatMultTranspose_SeqAIJ;
48   B->ops->multadd          = MatMultAdd_SeqAIJ;
49   B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ;
50   B->ops->matmult          = MatMatMult_SeqAIJ_SeqAIJ;
51   B->ops->transposematmult = MatTransposeMatMult_SeqAIJ_SeqAIJ;
52   B->ops->scale            = MatScale_SeqAIJ;
53   B->ops->diagonalscale    = MatDiagonalScale_SeqAIJ;
54   B->ops->diagonalset      = MatDiagonalSet_SeqAIJ;
55   B->ops->axpy             = MatAXPY_SeqAIJ;
56 
57   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);CHKERRQ(ierr);
58   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr);
59   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr);
60   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",NULL);CHKERRQ(ierr);
61   if(!aijmkl->no_SpMV2) {
62 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
63     ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr);
64     ierr = PetscObjectComposeFunction((PetscObject)B,"MatTransposeMatMult_seqaijmkl_seqaijmkl_C",NULL);CHKERRQ(ierr);
65 #endif
66   }
67 
68   /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this
69    * simply involves destroying the MKL sparse matrix handle and then freeing
70    * the spptr pointer. */
71 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
72   if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr;
73 
74   if (aijmkl->sparse_optimized) {
75     sparse_status_t stat;
76     stat = mkl_sparse_destroy(aijmkl->csrA);
77     if (stat != SPARSE_STATUS_SUCCESS) {
78       PetscFunctionReturn(PETSC_ERR_LIB);
79     }
80   }
81 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
82   ierr = PetscFree(B->spptr);CHKERRQ(ierr);
83 
84   /* Change the type of B to MATSEQAIJ. */
85   ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);CHKERRQ(ierr);
86 
87   *newmat = B;
88   PetscFunctionReturn(0);
89 }
90 
91 PetscErrorCode MatDestroy_SeqAIJMKL(Mat A)
92 {
93   PetscErrorCode ierr;
94   Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr;
95 
96   PetscFunctionBegin;
97 
98   /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an
99    * spptr pointer. */
100   if (aijmkl) {
101     /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */
102 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
103     if (aijmkl->sparse_optimized) {
104       sparse_status_t stat = SPARSE_STATUS_SUCCESS;
105       stat = mkl_sparse_destroy(aijmkl->csrA);
106       if (stat != SPARSE_STATUS_SUCCESS) {
107         PetscFunctionReturn(PETSC_ERR_LIB);
108       }
109     }
110 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
111     ierr = PetscFree(A->spptr);CHKERRQ(ierr);
112   }
113 
114   /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
115    * to destroy everything that remains. */
116   ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);CHKERRQ(ierr);
117   /* Note that I don't call MatSetType().  I believe this is because that
118    * is only to be called when *building* a matrix.  I could be wrong, but
119    * that is how things work for the SuperLU matrix class. */
120   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
121   PetscFunctionReturn(0);
122 }
123 
124 /* MatSeqAIJKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it,
125  * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize().
126  * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix
127  * handle, creates a new one, and then calls mkl_sparse_optimize().
128  * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been
129  * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of
130  * an unoptimized matrix handle here. */
131 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A)
132 {
133 #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
134   /* If the MKL library does not have mkl_sparse_optimize(), then this routine
135    * does nothing. We make it callable anyway in this case because it cuts
136    * down on littering the code with #ifdefs. */
137   PetscFunctionBegin;
138   PetscFunctionReturn(0);
139 #else
140   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
141   Mat_SeqAIJMKL   *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
142   PetscInt        m,n;
143   MatScalar       *aa;
144   PetscInt        *aj,*ai;
145   sparse_status_t stat;
146 
147   PetscFunctionBegin;
148   if (aijmkl->no_SpMV2) PetscFunctionReturn(0);
149 
150   if (aijmkl->sparse_optimized) {
151     /* Matrix has been previously assembled and optimized. Must destroy old
152      * matrix handle before running the optimization step again. */
153     stat = mkl_sparse_destroy(aijmkl->csrA);
154     if (stat != SPARSE_STATUS_SUCCESS) {
155       PetscFunctionReturn(PETSC_ERR_LIB);
156     }
157   }
158   aijmkl->sparse_optimized = PETSC_FALSE;
159 
160   /* Now perform the SpMV2 setup and matrix optimization. */
161   aijmkl->descr.type        = SPARSE_MATRIX_TYPE_GENERAL;
162   aijmkl->descr.mode        = SPARSE_FILL_MODE_LOWER;
163   aijmkl->descr.diag        = SPARSE_DIAG_NON_UNIT;
164   m = A->rmap->n;
165   n = A->cmap->n;
166   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
167   aa   = a->a;  /* Nonzero elements stored row-by-row. */
168   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
169   if ((a->nz!=0) & !(A->structure_only)) {
170     /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
171      * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
172     stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa);
173     stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
174     stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
175     stat = mkl_sparse_optimize(aijmkl->csrA);
176     if (stat != SPARSE_STATUS_SUCCESS) {
177       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle/complete mkl_sparse_optimize");
178       PetscFunctionReturn(PETSC_ERR_LIB);
179     }
180     aijmkl->sparse_optimized = PETSC_TRUE;
181   }
182 
183   PetscFunctionReturn(0);
184 #endif
185 }
186 
187 /* MatSeqAIJMKL_create_from_mkl_handle() creates a sequential AIJMKL matrix from an MKL sparse matrix handle.
188  * We need this to implement MatMatMult() using the MKL inspector-executor routines, which return an (unoptimized)
189  * matrix handle.
190  * Note: This routine supports replacing the numerical values in an existing matrix that has the same sparsity
191  * structure as in the MKL handle. However, this code currently doesn't actually get used when MatMatMult()
192  * is called with MAT_REUSE_MATRIX, because the MatMatMult() interface code calls MatMatMultNumeric() in this case.
193  * MKL has no notion of separately callable symbolic vs. numeric phases of sparse matrix-matrix multiply, so in the
194  * MAT_REUSE_MATRIX case, the SeqAIJ routines end up being used. Even though this means that the (hopefully more
195  * optimized) MKL routines do not get used, this probably is best because the MKL routines would waste time re-computing
196  * the symbolic portion, whereas the native PETSc SeqAIJ routines will avoid this. */
197 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
198 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,MatReuse reuse,Mat *mat)
199 {
200   PetscErrorCode ierr;
201   sparse_status_t stat;
202   sparse_index_base_t indexing;
203   PetscInt nrows, ncols;
204   PetscInt *aj,*ai,*dummy;
205   MatScalar *aa;
206   Mat A;
207   Mat_SeqAIJ *a;
208   Mat_SeqAIJMKL *aijmkl;
209 
210   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
211   stat = mkl_sparse_x_export_csr(csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);
212   if (stat != SPARSE_STATUS_SUCCESS) {
213     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");
214     PetscFunctionReturn(PETSC_ERR_LIB);
215   }
216 
217   if (reuse == MAT_INITIAL_MATRIX) {
218     ierr = MatCreate(comm,&A);CHKERRQ(ierr);
219     ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
220     ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,nrows,ncols);CHKERRQ(ierr);
221     ierr = MatSeqAIJSetPreallocationCSR(A,ai,aj,aa);CHKERRQ(ierr);
222 
223     /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle.
224      * Now turn it into a MATSEQAIJMKL. */
225     ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
226   } else {
227     A = *mat;
228   }
229 
230   a = (Mat_SeqAIJ*)A->data;
231   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
232 
233   if (reuse == MAT_REUSE_MATRIX) {
234     /* Need to destroy old MKL handle. */
235     stat = mkl_sparse_destroy(aijmkl->csrA);
236     if (stat != SPARSE_STATUS_SUCCESS) {
237       PetscFunctionReturn(PETSC_ERR_LIB);
238     }
239 
240     /* The new matrix is supposed to have the same sparsity pattern, so copy only the array of numerical values. */
241     ierr = PetscMemcpy(a->a,aa,sizeof(MatScalar)*a->nz);CHKERRQ(ierr);
242   }
243   aijmkl->csrA = csrA;
244 
245   /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but
246    * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one,
247    * and just need to be able to run the MKL optimization step. */
248   aijmkl->descr.type        = SPARSE_MATRIX_TYPE_GENERAL;
249   aijmkl->descr.mode        = SPARSE_FILL_MODE_LOWER;
250   aijmkl->descr.diag        = SPARSE_DIAG_NON_UNIT;
251   stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
252   stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
253   stat = mkl_sparse_optimize(aijmkl->csrA);
254   if (stat != SPARSE_STATUS_SUCCESS) {
255     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set hints/complete mkl_sparse_optimize");
256     PetscFunctionReturn(PETSC_ERR_LIB);
257   }
258   aijmkl->sparse_optimized = PETSC_TRUE;
259 
260   *mat = A;
261   PetscFunctionReturn(0);
262 }
263 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
264 
265 PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
266 {
267   PetscErrorCode ierr;
268   Mat_SeqAIJMKL *aijmkl;
269   Mat_SeqAIJMKL *aijmkl_dest;
270 
271   PetscFunctionBegin;
272   ierr = MatDuplicate_SeqAIJ(A,op,M);CHKERRQ(ierr);
273   aijmkl      = (Mat_SeqAIJMKL*) A->spptr;
274   aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr;
275   ierr = PetscMemcpy(aijmkl_dest,aijmkl,sizeof(Mat_SeqAIJMKL));CHKERRQ(ierr);
276   aijmkl_dest->sparse_optimized = PETSC_FALSE;
277   if (aijmkl->eager_inspection) {
278     ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
279   }
280   PetscFunctionReturn(0);
281 }
282 
283 PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode)
284 {
285   PetscErrorCode  ierr;
286   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
287   Mat_SeqAIJMKL *aijmkl;
288 
289   PetscFunctionBegin;
290   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
291 
292   /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some
293    * extra information and some different methods, call the AssemblyEnd
294    * routine for a MATSEQAIJ.
295    * I'm not sure if this is the best way to do this, but it avoids
296    * a lot of code duplication. */
297   a->inode.use = PETSC_FALSE;  /* Must disable: otherwise the MKL routines won't get used. */
298   ierr = MatAssemblyEnd_SeqAIJ(A, mode);CHKERRQ(ierr);
299 
300   /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks).
301    * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */
302   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
303   if (aijmkl->eager_inspection) {
304     ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
305   }
306 
307   PetscFunctionReturn(0);
308 }
309 
310 PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy)
311 {
312   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
313   const PetscScalar *x;
314   PetscScalar       *y;
315   const MatScalar   *aa;
316   PetscErrorCode    ierr;
317   PetscInt          m=A->rmap->n;
318   PetscInt          n=A->cmap->n;
319   PetscScalar       alpha = 1.0;
320   PetscScalar       beta = 0.0;
321   const PetscInt    *aj,*ai;
322   char              matdescra[6];
323 
324 
325   /* Variables not in MatMult_SeqAIJ. */
326   char transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
327 
328   PetscFunctionBegin;
329   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
330   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
331   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
332   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
333   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
334   aa   = a->a;  /* Nonzero elements stored row-by-row. */
335   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
336 
337   /* Call MKL sparse BLAS routine to do the MatMult. */
338   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
339 
340   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
341   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
342   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
343   PetscFunctionReturn(0);
344 }
345 
346 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
347 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
348 {
349   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
350   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
351   const PetscScalar *x;
352   PetscScalar       *y;
353   PetscErrorCode    ierr;
354   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
355 
356   PetscFunctionBegin;
357 
358   /* If there are no nonzero entries, zero yy and return immediately. */
359   if(!a->nz) {
360     PetscInt i;
361     PetscInt m=A->rmap->n;
362     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
363     for (i=0; i<m; i++) {
364       y[i] = 0.0;
365     }
366     ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
367     PetscFunctionReturn(0);
368   }
369 
370   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
371   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
372 
373   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
374    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
375    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
376   if (!aijmkl->sparse_optimized) {
377     MatSeqAIJMKL_create_mkl_handle(A);
378   }
379 
380   /* Call MKL SpMV2 executor routine to do the MatMult. */
381   stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
382 
383   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
384   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
385   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
386   if (stat != SPARSE_STATUS_SUCCESS) {
387     PetscFunctionReturn(PETSC_ERR_LIB);
388   }
389   PetscFunctionReturn(0);
390 }
391 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
392 
393 PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy)
394 {
395   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
396   const PetscScalar *x;
397   PetscScalar       *y;
398   const MatScalar   *aa;
399   PetscErrorCode    ierr;
400   PetscInt          m=A->rmap->n;
401   PetscInt          n=A->cmap->n;
402   PetscScalar       alpha = 1.0;
403   PetscScalar       beta = 0.0;
404   const PetscInt    *aj,*ai;
405   char              matdescra[6];
406 
407   /* Variables not in MatMultTranspose_SeqAIJ. */
408   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
409 
410   PetscFunctionBegin;
411   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
412   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
413   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
414   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
415   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
416   aa   = a->a;  /* Nonzero elements stored row-by-row. */
417   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
418 
419   /* Call MKL sparse BLAS routine to do the MatMult. */
420   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
421 
422   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
423   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
424   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
425   PetscFunctionReturn(0);
426 }
427 
428 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
429 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
430 {
431   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
432   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
433   const PetscScalar *x;
434   PetscScalar       *y;
435   PetscErrorCode    ierr;
436   sparse_status_t   stat;
437 
438   PetscFunctionBegin;
439 
440   /* If there are no nonzero entries, zero yy and return immediately. */
441   if(!a->nz) {
442     PetscInt i;
443     PetscInt n=A->cmap->n;
444     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
445     for (i=0; i<n; i++) {
446       y[i] = 0.0;
447     }
448     ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
449     PetscFunctionReturn(0);
450   }
451 
452   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
453   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
454 
455   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
456    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
457    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
458   if (!aijmkl->sparse_optimized) {
459     MatSeqAIJMKL_create_mkl_handle(A);
460   }
461 
462   /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
463   stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
464 
465   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
466   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
467   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
468   if (stat != SPARSE_STATUS_SUCCESS) {
469     PetscFunctionReturn(PETSC_ERR_LIB);
470   }
471   PetscFunctionReturn(0);
472 }
473 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
474 
475 PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
476 {
477   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
478   const PetscScalar *x;
479   PetscScalar       *y,*z;
480   const MatScalar   *aa;
481   PetscErrorCode    ierr;
482   PetscInt          m=A->rmap->n;
483   PetscInt          n=A->cmap->n;
484   const PetscInt    *aj,*ai;
485   PetscInt          i;
486 
487   /* Variables not in MatMultAdd_SeqAIJ. */
488   char transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
489   PetscScalar       alpha = 1.0;
490   PetscScalar       beta;
491   char              matdescra[6];
492 
493   PetscFunctionBegin;
494   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
495   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
496 
497   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
498   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
499   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
500   aa   = a->a;  /* Nonzero elements stored row-by-row. */
501   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
502 
503   /* Call MKL sparse BLAS routine to do the MatMult. */
504   if (zz == yy) {
505     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
506     beta = 1.0;
507     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
508   } else {
509     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
510      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
511     beta = 0.0;
512     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
513     for (i=0; i<m; i++) {
514       z[i] += y[i];
515     }
516   }
517 
518   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
519   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
520   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
521   PetscFunctionReturn(0);
522 }
523 
524 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
525 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
526 {
527   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
528   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
529   const PetscScalar *x;
530   PetscScalar       *y,*z;
531   PetscErrorCode    ierr;
532   PetscInt          m=A->rmap->n;
533   PetscInt          i;
534 
535   /* Variables not in MatMultAdd_SeqAIJ. */
536   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
537 
538   PetscFunctionBegin;
539 
540   /* If there are no nonzero entries, set zz = yy and return immediately. */
541   if(!a->nz) {
542     PetscInt i;
543     ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
544     for (i=0; i<m; i++) {
545       z[i] = y[i];
546     }
547     ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
548     PetscFunctionReturn(0);
549   }
550 
551   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
552   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
553 
554   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
555    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
556    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
557   if (!aijmkl->sparse_optimized) {
558     MatSeqAIJMKL_create_mkl_handle(A);
559   }
560 
561   /* Call MKL sparse BLAS routine to do the MatMult. */
562   if (zz == yy) {
563     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
564      * with alpha and beta both set to 1.0. */
565     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
566   } else {
567     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
568      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
569     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
570     for (i=0; i<m; i++) {
571       z[i] += y[i];
572     }
573   }
574 
575   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
576   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
577   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
578   if (stat != SPARSE_STATUS_SUCCESS) {
579     PetscFunctionReturn(PETSC_ERR_LIB);
580   }
581   PetscFunctionReturn(0);
582 }
583 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
584 
585 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
586 {
587   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
588   const PetscScalar *x;
589   PetscScalar       *y,*z;
590   const MatScalar   *aa;
591   PetscErrorCode    ierr;
592   PetscInt          m=A->rmap->n;
593   PetscInt          n=A->cmap->n;
594   const PetscInt    *aj,*ai;
595   PetscInt          i;
596 
597   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
598   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
599   PetscScalar       alpha = 1.0;
600   PetscScalar       beta;
601   char              matdescra[6];
602 
603   PetscFunctionBegin;
604   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
605   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
606 
607   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
608   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
609   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
610   aa   = a->a;  /* Nonzero elements stored row-by-row. */
611   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
612 
613   /* Call MKL sparse BLAS routine to do the MatMult. */
614   if (zz == yy) {
615     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
616     beta = 1.0;
617     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
618   } else {
619     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
620      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
621     beta = 0.0;
622     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
623     for (i=0; i<n; i++) {
624       z[i] += y[i];
625     }
626   }
627 
628   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
629   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
630   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
631   PetscFunctionReturn(0);
632 }
633 
634 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
635 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
636 {
637   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
638   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
639   const PetscScalar *x;
640   PetscScalar       *y,*z;
641   PetscErrorCode    ierr;
642   PetscInt          n=A->cmap->n;
643   PetscInt          i;
644 
645   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
646   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
647 
648   PetscFunctionBegin;
649 
650   /* If there are no nonzero entries, set zz = yy and return immediately. */
651   if(!a->nz) {
652     PetscInt i;
653     ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
654     for (i=0; i<n; i++) {
655       z[i] = y[i];
656     }
657     ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
658     PetscFunctionReturn(0);
659   }
660 
661   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
662   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
663 
664   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
665    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
666    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
667   if (!aijmkl->sparse_optimized) {
668     MatSeqAIJMKL_create_mkl_handle(A);
669   }
670 
671   /* Call MKL sparse BLAS routine to do the MatMult. */
672   if (zz == yy) {
673     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
674      * with alpha and beta both set to 1.0. */
675     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
676   } else {
677     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
678      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
679     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
680     for (i=0; i<n; i++) {
681       z[i] += y[i];
682     }
683   }
684 
685   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
686   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
687   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
688   if (stat != SPARSE_STATUS_SUCCESS) {
689     PetscFunctionReturn(PETSC_ERR_LIB);
690   }
691   PetscFunctionReturn(0);
692 }
693 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
694 
695 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
696 PetscErrorCode MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat*C)
697 {
698   Mat_SeqAIJMKL *a, *b;
699   sparse_matrix_t csrA, csrB, csrC;
700   PetscErrorCode ierr;
701   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
702 
703   PetscFunctionBegin;
704   a = (Mat_SeqAIJMKL*)A->spptr;
705   b = (Mat_SeqAIJMKL*)B->spptr;
706   if (!a->sparse_optimized) {
707     MatSeqAIJMKL_create_mkl_handle(A);
708   }
709   if (!b->sparse_optimized) {
710     MatSeqAIJMKL_create_mkl_handle(B);
711   }
712   csrA = a->csrA;
713   csrB = b->csrA;
714 
715   stat = mkl_sparse_spmm(SPARSE_OPERATION_NON_TRANSPOSE,csrA,csrB,&csrC);
716   if (stat != SPARSE_STATUS_SUCCESS) {
717     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete sparse matrix-matrix multiply");
718     PetscFunctionReturn(PETSC_ERR_LIB);
719   }
720 
721   ierr = MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);CHKERRQ(ierr);
722 
723   PetscFunctionReturn(0);
724 }
725 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
726 
727 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
728 PetscErrorCode MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat*C)
729 {
730   Mat_SeqAIJMKL *a, *b;
731   sparse_matrix_t csrA, csrB, csrC;
732   PetscErrorCode ierr;
733   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
734 
735   PetscFunctionBegin;
736   a = (Mat_SeqAIJMKL*)A->spptr;
737   b = (Mat_SeqAIJMKL*)B->spptr;
738   if (!a->sparse_optimized) {
739     MatSeqAIJMKL_create_mkl_handle(A);
740   }
741   if (!b->sparse_optimized) {
742     MatSeqAIJMKL_create_mkl_handle(B);
743   }
744   csrA = a->csrA;
745   csrB = b->csrA;
746 
747   stat = mkl_sparse_spmm(SPARSE_OPERATION_TRANSPOSE,csrA,csrB,&csrC);
748   if (stat != SPARSE_STATUS_SUCCESS) {
749     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete sparse matrix-matrix multiply");
750     PetscFunctionReturn(PETSC_ERR_LIB);
751   }
752 
753   ierr = MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);CHKERRQ(ierr);
754 
755   PetscFunctionReturn(0);
756 }
757 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
758 
759 PetscErrorCode MatScale_SeqAIJMKL(Mat inA,PetscScalar alpha)
760 {
761   PetscErrorCode ierr;
762 
763   PetscFunctionBegin;
764   ierr = MatScale_SeqAIJ(inA,alpha);CHKERRQ(ierr);
765   ierr = MatSeqAIJMKL_create_mkl_handle(inA);CHKERRQ(ierr);
766   PetscFunctionReturn(0);
767 }
768 
769 PetscErrorCode MatDiagonalScale_SeqAIJMKL(Mat A,Vec ll,Vec rr)
770 {
771   PetscErrorCode ierr;
772 
773   PetscFunctionBegin;
774   ierr = MatDiagonalScale_SeqAIJ(A,ll,rr);CHKERRQ(ierr);
775   ierr = MatSeqAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
776   PetscFunctionReturn(0);
777 }
778 
779 PetscErrorCode MatDiagonalSet_SeqAIJMKL(Mat Y,Vec D,InsertMode is)
780 {
781   PetscErrorCode ierr;
782 
783   PetscFunctionBegin;
784   ierr = MatDiagonalSet_SeqAIJ(Y,D,is);CHKERRQ(ierr);
785   ierr = MatSeqAIJMKL_create_mkl_handle(Y);CHKERRQ(ierr);
786   PetscFunctionReturn(0);
787 }
788 
789 PetscErrorCode MatAXPY_SeqAIJMKL(Mat Y,PetscScalar a,Mat X,MatStructure str)
790 {
791   PetscErrorCode ierr;
792 
793   PetscFunctionBegin;
794   ierr = MatAXPY_SeqAIJ(Y,a,X,str);CHKERRQ(ierr);
795   if (str == SAME_NONZERO_PATTERN) {
796     /* MatAssemblyEnd() is not called if SAME_NONZERO_PATTERN, so we need to force update of the MKL matrix handle. */
797     ierr = MatSeqAIJMKL_create_mkl_handle(Y);CHKERRQ(ierr);
798   }
799   PetscFunctionReturn(0);
800 }
801 
802 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a
803  * SeqAIJMKL matrix.  This routine is called by the MatCreate_SeqMKLAIJ()
804  * routine, but can also be used to convert an assembled SeqAIJ matrix
805  * into a SeqAIJMKL one. */
806 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
807 {
808   PetscErrorCode ierr;
809   Mat            B = *newmat;
810   Mat_SeqAIJMKL  *aijmkl;
811   PetscBool      set;
812   PetscBool      sametype;
813 
814   PetscFunctionBegin;
815   if (reuse == MAT_INITIAL_MATRIX) {
816     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
817   }
818 
819   ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr);
820   if (sametype) PetscFunctionReturn(0);
821 
822   ierr     = PetscNewLog(B,&aijmkl);CHKERRQ(ierr);
823   B->spptr = (void*) aijmkl;
824 
825   /* Set function pointers for methods that we inherit from AIJ but override.
826    * We also parse some command line options below, since those determine some of the methods we point to. */
827   B->ops->duplicate        = MatDuplicate_SeqAIJMKL;
828   B->ops->assemblyend      = MatAssemblyEnd_SeqAIJMKL;
829   B->ops->destroy          = MatDestroy_SeqAIJMKL;
830 
831   aijmkl->sparse_optimized = PETSC_FALSE;
832 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
833   aijmkl->no_SpMV2 = PETSC_FALSE;  /* Default to using the SpMV2 routines if our MKL supports them. */
834 #else
835   aijmkl->no_SpMV2 = PETSC_TRUE;
836 #endif
837   aijmkl->eager_inspection = PETSC_FALSE;
838 
839   /* Parse command line options. */
840   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");CHKERRQ(ierr);
841   ierr = PetscOptionsBool("-mat_aijmkl_no_spmv2","NoSPMV2","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);CHKERRQ(ierr);
842   ierr = PetscOptionsBool("-mat_aijmkl_eager_inspection","Eager Inspection","None",(PetscBool)aijmkl->eager_inspection,(PetscBool*)&aijmkl->eager_inspection,&set);CHKERRQ(ierr);
843   ierr = PetscOptionsEnd();CHKERRQ(ierr);
844 #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
845   if(!aijmkl->no_SpMV2) {
846     ierr = PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize();  defaulting to non-SpMV2 routines.\n");
847     aijmkl->no_SpMV2 = PETSC_TRUE;
848   }
849 #endif
850 
851   if(!aijmkl->no_SpMV2) {
852 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
853     B->ops->mult             = MatMult_SeqAIJMKL_SpMV2;
854     B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL_SpMV2;
855     B->ops->multadd          = MatMultAdd_SeqAIJMKL_SpMV2;
856     B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2;
857     B->ops->matmult          = MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2;
858     B->ops->mattransposemult = MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2;
859 #endif
860   } else {
861     B->ops->mult             = MatMult_SeqAIJMKL;
862     B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL;
863     B->ops->multadd          = MatMultAdd_SeqAIJMKL;
864     B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL;
865   }
866 
867   B->ops->scale              = MatScale_SeqAIJMKL;
868   B->ops->diagonalscale      = MatDiagonalScale_SeqAIJMKL;
869   B->ops->diagonalset        = MatDiagonalSet_SeqAIJMKL;
870   B->ops->axpy               = MatAXPY_SeqAIJMKL;
871 
872   ierr = PetscObjectComposeFunction((PetscObject)B,"MatScale_SeqAIJMKL_C",MatScale_SeqAIJMKL);CHKERRQ(ierr);
873   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);CHKERRQ(ierr);
874   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",MatMatMult_SeqDense_SeqAIJ);CHKERRQ(ierr);
875   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",MatMatMultSymbolic_SeqDense_SeqAIJ);CHKERRQ(ierr);
876   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",MatMatMultNumeric_SeqDense_SeqAIJ);CHKERRQ(ierr);
877   if(!aijmkl->no_SpMV2) {
878 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
879     ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqaijmkl_seqaijmkl_C",MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2);CHKERRQ(ierr);
880     ierr = PetscObjectComposeFunction((PetscObject)B,"MatTransposeMatMult_seqaijmkl_seqaijmkl_C",MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2);CHKERRQ(ierr);
881 #endif
882   }
883 
884   ierr    = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);CHKERRQ(ierr);
885   *newmat = B;
886   PetscFunctionReturn(0);
887 }
888 
889 /*@C
890    MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL.
891    This type inherits from AIJ and is largely identical, but uses sparse BLAS
892    routines from Intel MKL whenever possible.
893    MatMult, MatMultAdd, MatMultTranspose, and MatMultTransposeAdd
894    operations are currently supported.
895    If the installed version of MKL supports the "SpMV2" sparse
896    inspector-executor routines, then those are used by default.
897 
898    Collective on MPI_Comm
899 
900    Input Parameters:
901 +  comm - MPI communicator, set to PETSC_COMM_SELF
902 .  m - number of rows
903 .  n - number of columns
904 .  nz - number of nonzeros per row (same for all rows)
905 -  nnz - array containing the number of nonzeros in the various rows
906          (possibly different for each row) or NULL
907 
908    Output Parameter:
909 .  A - the matrix
910 
911    Options Database Keys:
912 .  -mat_aijmkl_no_spmv2 - disables use of the SpMV2 inspector-executor routines
913 
914    Notes:
915    If nnz is given then nz is ignored
916 
917    Level: intermediate
918 
919 .keywords: matrix, MKL, sparse, parallel
920 
921 .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues()
922 @*/
923 PetscErrorCode  MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
924 {
925   PetscErrorCode ierr;
926 
927   PetscFunctionBegin;
928   ierr = MatCreate(comm,A);CHKERRQ(ierr);
929   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
930   ierr = MatSetType(*A,MATSEQAIJMKL);CHKERRQ(ierr);
931   ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);CHKERRQ(ierr);
932   PetscFunctionReturn(0);
933 }
934 
935 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A)
936 {
937   PetscErrorCode ierr;
938 
939   PetscFunctionBegin;
940   ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
941   ierr = MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
942   PetscFunctionReturn(0);
943 }
944