xref: /petsc/src/mat/impls/baij/seq/baijmkl/baijmkl.c (revision b5b72c8a3e9c2e7ea2cb9716c0e8b3bdf5d569e9)
1 /*
2   Defines basic operations for the MATSEQBAIJMKL matrix class.
3   Uses sparse BLAS operations from the Intel Math Kernel Library (MKL)
4   wherever possible. If used MKL verion is older than 11.3 PETSc default
5   code for sparse matrix operations is used.
6 */
7 
8 #include <../src/mat/impls/baij/seq/baij.h>
9 #include <../src/mat/impls/baij/seq/baijmkl/baijmkl.h>
10 
11 
12 /* MKL include files. */
13 #include <mkl_spblas.h>  /* Sparse BLAS */
14 
15 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
16 typedef struct {
17   PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
18   sparse_matrix_t bsrA; /* "Handle" used by SpMV2 inspector-executor routines. */
19   struct matrix_descr descr;
20 #ifndef PETSC_MKL_SUPPORTS_BAIJ_ZERO_BASED
21   PetscInt *ai1;
22   PetscInt *aj1;
23 #endif
24 } Mat_SeqBAIJMKL;
25 
26 extern PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat,MatAssemblyType);
27 
28 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJMKL_SeqBAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat)
29 {
30   /* This routine is only called to convert a MATBAIJMKL to its base PETSc type, */
31   /* so we will ignore 'MatType type'. */
32   PetscErrorCode ierr;
33   Mat            B       = *newmat;
34   Mat_SeqBAIJMKL  *baijmkl=(Mat_SeqBAIJMKL*)A->spptr;
35 
36   PetscFunctionBegin;
37   if (reuse == MAT_INITIAL_MATRIX) {
38     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
39   }
40 
41   /* Reset the original function pointers. */
42   B->ops->duplicate        = MatDuplicate_SeqBAIJ;
43   B->ops->assemblyend      = MatAssemblyEnd_SeqBAIJ;
44   B->ops->destroy          = MatDestroy_SeqBAIJ;
45   B->ops->multtranspose    = MatMultTranspose_SeqBAIJ;
46   B->ops->multtransposeadd = MatMultTransposeAdd_SeqBAIJ;
47   B->ops->scale            = MatScale_SeqBAIJ;
48   B->ops->diagonalscale    = MatDiagonalScale_SeqBAIJ;
49   B->ops->axpy             = MatAXPY_SeqBAIJ;
50 
51   switch (A->rmap->bs) {
52     case 1:
53       B->ops->mult    = MatMult_SeqBAIJ_1;
54       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
55       break;
56     case 2:
57       B->ops->mult    = MatMult_SeqBAIJ_2;
58       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
59       break;
60     case 3:
61       B->ops->mult    = MatMult_SeqBAIJ_3;
62       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
63       break;
64     case 4:
65       B->ops->mult    = MatMult_SeqBAIJ_4;
66       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
67       break;
68     case 5:
69       B->ops->mult    = MatMult_SeqBAIJ_5;
70       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
71       break;
72     case 6:
73       B->ops->mult    = MatMult_SeqBAIJ_6;
74       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
75       break;
76     case 7:
77       B->ops->mult    = MatMult_SeqBAIJ_7;
78       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
79       break;
80     case 11:
81       B->ops->mult    = MatMult_SeqBAIJ_11;
82       B->ops->multadd = MatMultAdd_SeqBAIJ_11;
83       break;
84     case 15:
85       B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
86       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
87       break;
88     default:
89       B->ops->mult    = MatMult_SeqBAIJ_N;
90       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
91       break;
92   }
93   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaijmkl_seqbaij_C",NULL);CHKERRQ(ierr);
94 
95   /* Free everything in the Mat_SeqBAIJMKL data structure. Currently, this
96    * simply involves destroying the MKL sparse matrix handle and then freeing
97    * the spptr pointer. */
98   if (reuse == MAT_INITIAL_MATRIX) baijmkl = (Mat_SeqBAIJMKL*)B->spptr;
99 
100   if (baijmkl->sparse_optimized) {
101     sparse_status_t stat;
102     stat = mkl_sparse_destroy(baijmkl->bsrA);
103     if (stat != SPARSE_STATUS_SUCCESS) {
104       PetscFunctionReturn(PETSC_ERR_LIB);
105     }
106   }
107 #ifndef PETSC_MKL_SUPPORTS_BAIJ_ZERO_BASED
108    ierr = PetscFree(baijmkl->ai1);CHKERRQ(ierr);
109    ierr = PetscFree(baijmkl->aj1);CHKERRQ(ierr);
110 #endif
111   ierr = PetscFree(B->spptr);CHKERRQ(ierr);
112 
113   /* Change the type of B to MATSEQBAIJ. */
114   ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJ);CHKERRQ(ierr);
115 
116   *newmat = B;
117   PetscFunctionReturn(0);
118 }
119 
120 PetscErrorCode MatDestroy_SeqBAIJMKL(Mat A)
121 {
122   PetscErrorCode ierr;
123   Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL*) A->spptr;
124 
125   PetscFunctionBegin;
126 
127   if (baijmkl) {
128     /* Clean up everything in the Mat_SeqBAIJMKL data structure, then free A->spptr. */
129     if (baijmkl->sparse_optimized) {
130       sparse_status_t stat = SPARSE_STATUS_SUCCESS;
131       stat = mkl_sparse_destroy(baijmkl->bsrA);
132       if (stat != SPARSE_STATUS_SUCCESS) {
133         PetscFunctionReturn(PETSC_ERR_LIB);
134       }
135     }
136 #ifndef PETSC_MKL_SUPPORTS_BAIJ_ZERO_BASED
137    ierr = PetscFree(baijmkl->ai1);CHKERRQ(ierr);
138    ierr = PetscFree(baijmkl->aj1);CHKERRQ(ierr);
139 #endif
140     ierr = PetscFree(A->spptr);CHKERRQ(ierr);
141   }
142 
143   /* Change the type of A back to SEQBAIJ and use MatDestroy_SeqBAIJ()
144    * to destroy everything that remains. */
145   ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQBAIJ);CHKERRQ(ierr);
146   ierr = MatDestroy_SeqBAIJ(A);CHKERRQ(ierr);
147   PetscFunctionReturn(0);
148 }
149 
150 PETSC_INTERN PetscErrorCode MatSeqBAIJMKL_create_mkl_handle(Mat A)
151 {
152   PetscErrorCode ierr;
153   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
154   Mat_SeqBAIJMKL  *baijmkl = (Mat_SeqBAIJMKL*)A->spptr;
155   PetscInt        mbs, nbs, nz, bs, i;
156   MatScalar       *aa;
157   PetscInt        *aj,*ai;
158   sparse_status_t stat;
159 
160   PetscFunctionBegin;
161   if (baijmkl->sparse_optimized) {
162     /* Matrix has been previously assembled and optimized. Must destroy old
163      * matrix handle before running the optimization step again. */
164     stat = mkl_sparse_destroy(baijmkl->bsrA);
165     if (stat != SPARSE_STATUS_SUCCESS) {
166       PetscFunctionReturn(PETSC_ERR_LIB);
167     }
168   }
169   baijmkl->sparse_optimized = PETSC_FALSE;
170 
171   /* Now perform the SpMV2 setup and matrix optimization. */
172   baijmkl->descr.type        = SPARSE_MATRIX_TYPE_GENERAL;
173   baijmkl->descr.mode        = SPARSE_FILL_MODE_LOWER;
174   baijmkl->descr.diag        = SPARSE_DIAG_NON_UNIT;
175   mbs = a->mbs;
176   nbs = a->nbs;
177   nz  = a->nz;
178   bs  = A->rmap->bs;
179   aa  = a->a;
180 
181   if (nz) {
182     /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
183      * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
184 #ifdef PETSC_MKL_SUPPORTS_BAIJ_ZERO_BASED
185     aj   = a->j;
186     ai   = a->i;
187     stat = mkl_sparse_x_create_bsr(&(baijmkl->bsrA),SPARSE_INDEX_BASE_ZERO,SPARSE_LAYOUT_COLUMN_MAJOR,mbs,nbs,bs,ai,ai+1,aj,aa);
188 #else
189     ierr = PetscMalloc1(mbs+1,&ai);CHKERRQ(ierr);
190     ierr = PetscMalloc1(nz,&aj);CHKERRQ(ierr);
191     for (i=0;i<mbs+1;i++)
192 	ai[i]=a->i[i]+1;
193     for (i=0;i<nz;i++)
194 	aj[i]=a->j[i]+1;
195     aa   = a->a;
196     stat = mkl_sparse_x_create_bsr(&baijmkl->bsrA,SPARSE_INDEX_BASE_ONE,SPARSE_LAYOUT_COLUMN_MAJOR,mbs,nbs,bs,ai,ai+1,aj,aa);
197     baijmkl->ai1 = ai;
198     baijmkl->aj1 = aj;
199 #endif
200     stat = mkl_sparse_set_mv_hint(baijmkl->bsrA,SPARSE_OPERATION_NON_TRANSPOSE,baijmkl->descr,1000);
201     stat = mkl_sparse_set_memory_hint(baijmkl->bsrA,SPARSE_MEMORY_AGGRESSIVE);
202     stat = mkl_sparse_optimize(baijmkl->bsrA);
203     if (stat != SPARSE_STATUS_SUCCESS) {
204       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle/complete mkl_sparse_optimize");
205       PetscFunctionReturn(PETSC_ERR_LIB);
206     }
207     baijmkl->sparse_optimized = PETSC_TRUE;
208   }
209 
210   PetscFunctionReturn(0);
211 }
212 
213 PetscErrorCode MatDuplicate_SeqBAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
214 {
215   PetscErrorCode ierr;
216   Mat_SeqBAIJMKL *baijmkl;
217   Mat_SeqBAIJMKL *baijmkl_dest;
218 
219   PetscFunctionBegin;
220   ierr = MatDuplicate_SeqBAIJ(A,op,M);CHKERRQ(ierr);
221   baijmkl      = (Mat_SeqBAIJMKL*) A->spptr;
222   baijmkl_dest = (Mat_SeqBAIJMKL*) (*M)->spptr;
223   ierr = PetscMemcpy(baijmkl_dest,baijmkl,sizeof(Mat_SeqBAIJMKL));CHKERRQ(ierr);
224   baijmkl_dest->sparse_optimized = PETSC_FALSE;
225   ierr = MatSeqBAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
226   PetscFunctionReturn(0);
227 }
228 
229 PetscErrorCode MatAssemblyEnd_SeqBAIJMKL(Mat A, MatAssemblyType mode)
230 {
231   PetscErrorCode  ierr;
232   PetscFunctionBegin;
233   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
234 
235   ierr = MatAssemblyEnd_SeqBAIJ(A, mode);CHKERRQ(ierr);
236   ierr = MatSeqBAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
237 
238   PetscFunctionReturn(0);
239 }
240 
241 PetscErrorCode MatMult_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
242 {
243   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
244   Mat_SeqBAIJMKL     *baijmkl=(Mat_SeqBAIJMKL*)A->spptr;
245   const PetscScalar  *x;
246   PetscScalar        *y;
247   PetscErrorCode     ierr;
248   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
249 
250   PetscFunctionBegin;
251 
252   /* If there are no nonzero entries, zero yy and return immediately. */
253   if(!a->nz) {
254     PetscInt i;
255     PetscInt m=a->mbs*A->rmap->bs;
256     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
257     for (i=0; i<m; i++) {
258       y[i] = 0.0;
259     }
260     ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
261     PetscFunctionReturn(0);
262   }
263 
264   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
265   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
266 
267   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
268    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
269    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
270   if (!baijmkl->sparse_optimized) {
271     MatSeqBAIJMKL_create_mkl_handle(A);
272   }
273 
274   /* Call MKL SpMV2 executor routine to do the MatMult. */
275   stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,y);
276 
277   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
278   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
279   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
280   if (stat != SPARSE_STATUS_SUCCESS) {
281     PetscFunctionReturn(PETSC_ERR_LIB);
282   }
283   PetscFunctionReturn(0);
284 }
285 
286 PetscErrorCode MatMultTranspose_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
287 {
288   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
289   Mat_SeqBAIJMKL     *baijmkl=(Mat_SeqBAIJMKL*)A->spptr;
290   const PetscScalar *x;
291   PetscScalar       *y;
292   PetscErrorCode    ierr;
293   sparse_status_t   stat;
294 
295   PetscFunctionBegin;
296 
297   /* If there are no nonzero entries, zero yy and return immediately. */
298   if(!a->nz) {
299     PetscInt i;
300     PetscInt n=a->nbs;
301     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
302     for (i=0; i<n; i++) {
303       y[i] = 0.0;
304     }
305     ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
306     PetscFunctionReturn(0);
307   }
308 
309   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
310   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
311 
312   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
313    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
314    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
315   if (!baijmkl->sparse_optimized) {
316     MatSeqBAIJMKL_create_mkl_handle(A);
317   }
318 
319   /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
320   stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,y);
321 
322   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
323   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
324   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
325   if (stat != SPARSE_STATUS_SUCCESS) {
326     PetscFunctionReturn(PETSC_ERR_LIB);
327   }
328   PetscFunctionReturn(0);
329 }
330 
331 PetscErrorCode MatMultAdd_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
332 {
333   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
334   Mat_SeqBAIJMKL     *baijmkl=(Mat_SeqBAIJMKL*)A->spptr;
335   const PetscScalar  *x;
336   PetscScalar        *y,*z;
337   PetscErrorCode     ierr;
338   PetscInt           m=a->mbs*A->rmap->bs;
339   PetscInt           i;
340 
341   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
342 
343   PetscFunctionBegin;
344 
345   /* If there are no nonzero entries, set zz = yy and return immediately. */
346   if(!a->nz) {
347     PetscInt i;
348     ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
349     for (i=0; i<m; i++) {
350       z[i] = y[i];
351     }
352     ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
353     PetscFunctionReturn(0);
354   }
355 
356   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
357   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
358 
359   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
360    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
361    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
362   if (!baijmkl->sparse_optimized) {
363     MatSeqBAIJMKL_create_mkl_handle(A);
364   }
365 
366   /* Call MKL sparse BLAS routine to do the MatMult. */
367   if (zz == yy) {
368     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
369      * with alpha and beta both set to 1.0. */
370     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,1.0,z);
371   } else {
372     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
373      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
374     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,z);
375     for (i=0; i<m; i++) {
376       z[i] += y[i];
377     }
378   }
379 
380   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
381   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
382   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
383   if (stat != SPARSE_STATUS_SUCCESS) {
384     PetscFunctionReturn(PETSC_ERR_LIB);
385   }
386   PetscFunctionReturn(0);
387 }
388 
389 PetscErrorCode MatMultTransposeAdd_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
390 {
391   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
392   Mat_SeqBAIJMKL     *baijmkl=(Mat_SeqBAIJMKL*)A->spptr;
393   const PetscScalar *x;
394   PetscScalar       *y,*z;
395   PetscErrorCode    ierr;
396   PetscInt          n=a->nbs*A->rmap->bs;
397   PetscInt          i;
398 
399   /* Variables not in MatMultTransposeAdd_SeqBAIJ. */
400   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
401 
402   PetscFunctionBegin;
403 
404   /* If there are no nonzero entries, set zz = yy and return immediately. */
405   if(!a->nz) {
406     PetscInt i;
407     ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
408     for (i=0; i<n; i++) {
409       z[i] = y[i];
410     }
411     ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
412     PetscFunctionReturn(0);
413   }
414 
415   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
416   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
417 
418   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
419    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
420    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
421   if (!baijmkl->sparse_optimized) {
422     MatSeqBAIJMKL_create_mkl_handle(A);
423   }
424 
425   /* Call MKL sparse BLAS routine to do the MatMult. */
426   if (zz == yy) {
427     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
428      * with alpha and beta both set to 1.0. */
429     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,1.0,z);
430   } else {
431     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
432      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
433     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,z);
434     for (i=0; i<n; i++) {
435       z[i] += y[i];
436     }
437   }
438 
439   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
440   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
441   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
442   if (stat != SPARSE_STATUS_SUCCESS) {
443     PetscFunctionReturn(PETSC_ERR_LIB);
444   }
445   PetscFunctionReturn(0);
446 }
447 
448 PetscErrorCode MatScale_SeqBAIJMKL(Mat inA,PetscScalar alpha)
449 {
450   PetscErrorCode ierr;
451 
452   PetscFunctionBegin;
453   ierr = MatScale_SeqBAIJ(inA,alpha);CHKERRQ(ierr);
454   ierr = MatSeqBAIJMKL_create_mkl_handle(inA);CHKERRQ(ierr);
455   PetscFunctionReturn(0);
456 }
457 
458 PetscErrorCode MatDiagonalScale_SeqBAIJMKL(Mat A,Vec ll,Vec rr)
459 {
460   PetscErrorCode ierr;
461 
462   PetscFunctionBegin;
463   ierr = MatDiagonalScale_SeqBAIJ(A,ll,rr);CHKERRQ(ierr);
464   ierr = MatSeqBAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
465   PetscFunctionReturn(0);
466 }
467 
468 PetscErrorCode MatAXPY_SeqBAIJMKL(Mat Y,PetscScalar a,Mat X,MatStructure str)
469 {
470   PetscErrorCode ierr;
471 
472   PetscFunctionBegin;
473   ierr = MatAXPY_SeqBAIJ(Y,a,X,str);CHKERRQ(ierr);
474   if (str == SAME_NONZERO_PATTERN) {
475     /* MatAssemblyEnd() is not called if SAME_NONZERO_PATTERN, so we need to force update of the MKL matrix handle. */
476     ierr = MatSeqBAIJMKL_create_mkl_handle(Y);CHKERRQ(ierr);
477   }
478   PetscFunctionReturn(0);
479 }
480 /* MatConvert_SeqBAIJ_SeqBAIJMKL converts a SeqBAIJ matrix into a
481  * SeqBAIJMKL matrix.  This routine is called by the MatCreate_SeqMKLBAIJ()
482  * routine, but can also be used to convert an assembled SeqBAIJ matrix
483  * into a SeqBAIJMKL one. */
484 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
485 {
486   PetscErrorCode ierr;
487   Mat            B = *newmat;
488   Mat_SeqBAIJMKL  *baijmkl;
489   PetscBool      sametype;
490 
491   PetscFunctionBegin;
492   if (reuse == MAT_INITIAL_MATRIX) {
493     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
494   }
495 
496   ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr);
497   if (sametype) PetscFunctionReturn(0);
498 
499   ierr     = PetscNewLog(B,&baijmkl);CHKERRQ(ierr);
500   B->spptr = (void*) baijmkl;
501 
502   /* Set function pointers for methods that we inherit from BAIJ but override.
503    * We also parse some command line options below, since those determine some of the methods we point to. */
504   B->ops->duplicate        = MatDuplicate_SeqBAIJMKL;
505   B->ops->assemblyend      = MatAssemblyEnd_SeqBAIJMKL;
506   B->ops->destroy          = MatDestroy_SeqBAIJMKL;
507   B->ops->mult             = MatMult_SeqBAIJMKL_SpMV2;
508   B->ops->multtranspose    = MatMultTranspose_SeqBAIJMKL_SpMV2;
509   B->ops->multadd          = MatMultAdd_SeqBAIJMKL_SpMV2;
510   B->ops->multtransposeadd = MatMultTransposeAdd_SeqBAIJMKL_SpMV2;
511   B->ops->scale            = MatScale_SeqBAIJMKL;
512   B->ops->diagonalscale    = MatDiagonalScale_SeqBAIJMKL;
513   B->ops->axpy             = MatAXPY_SeqBAIJMKL;
514 
515   baijmkl->sparse_optimized = PETSC_FALSE;
516 
517   ierr = PetscObjectComposeFunction((PetscObject)B,"MatScale_SeqBAIJMKL_C",MatScale_SeqBAIJMKL);CHKERRQ(ierr);
518   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaijmkl_seqbaij_C",MatConvert_SeqBAIJMKL_SeqBAIJ);CHKERRQ(ierr);
519 
520   ierr    = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJMKL);CHKERRQ(ierr);
521   *newmat = B;
522   PetscFunctionReturn(0);
523 }
524 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
525 /*@C
526    MatCreateSeqBAIJMKL - Creates a sparse matrix of type SEQBAIJMKL.
527    This type inherits from BAIJ and is largely identical, but uses sparse BLAS
528    routines from Intel MKL whenever possible.
529    MatMult, MatMultAdd, MatMultTranspose, and MatMultTransposeAdd
530    operations are currently supported.
531    If the installed version of MKL supports the "SpMV2" sparse
532    inspector-executor routines, then those are used by default.
533    Default PETSc kernels are used otherwise.
534 
535    Input Parameters:
536 +  comm - MPI communicator, set to PETSC_COMM_SELF
537 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
538           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
539 .  m - number of rows
540 .  n - number of columns
541 .  nz - number of nonzero blocks  per block row (same for all rows)
542 -  nnz - array containing the number of nonzero blocks in the various block rows
543          (possibly different for each block row) or NULL
544 
545 
546    Output Parameter:
547 .  A - the matrix
548 
549    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
550    MatXXXXSetPreallocation() paradgm instead of this routine directly.
551    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
552 
553    Options Database Keys:
554 .   -mat_no_unroll - uses code that does not unroll the loops in the
555                      block calculations (much slower)
556 .    -mat_block_size - size of the blocks to use
557 
558    Level: intermediate
559 
560    Notes:
561    The number of rows and columns must be divisible by blocksize.
562 
563    If the nnz parameter is given then the nz parameter is ignored
564 
565    A nonzero block is any block that as 1 or more nonzeros in it
566 
567    The block AIJ format is fully compatible with standard Fortran 77
568    storage.  That is, the stored row and column indices can begin at
569    either one (as in Fortran) or zero.  See the users' manual for details.
570 
571    Specify the preallocated storage with either nz or nnz (not both).
572    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
573    allocation.  See Users-Manual: ch_mat for details.
574    matrices.
575 
576 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
577 
578 @*/
579 PetscErrorCode  MatCreateSeqBAIJMKL(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
580 {
581   PetscErrorCode ierr;
582 
583   PetscFunctionBegin;
584   ierr = MatCreate(comm,A);CHKERRQ(ierr);
585   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
586 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
587   ierr = MatSetType(*A,MATSEQBAIJMKL);CHKERRQ(ierr);
588   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
589 #else
590   ierr = PetscInfo(A,"MKL baij routines are not supported for used version of MKL. Using PETSc default routines. \n Please use version of MKL 11.3 and higher. \n");
591   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
592   ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
593 #endif
594   PetscFunctionReturn(0);
595 }
596 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJMKL(Mat A)
597 {
598   PetscErrorCode ierr;
599 
600   PetscFunctionBegin;
601   ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr);
602 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
603   ierr = MatConvert_SeqBAIJ_SeqBAIJMKL(A,MATSEQBAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
604 #else
605   ierr = PetscInfo(A,"MKL baij routines are not supported for used version of MKL. Using PETSc default routines. \n Please use version of MKL 11.3 and higher. \n");
606 #endif
607   PetscFunctionReturn(0);
608 }
609