xref: /petsc/src/mat/impls/baij/seq/baijmkl/baijmkl.c (revision 37eeb8152ec6a2cf24186d3591c2c5de5dfd8fa5)
1 /*
2   Defines basic operations for the MATSEQBAIJMKL matrix class.
3   Uses sparse BLAS operations from the Intel Math Kernel Library (MKL)
4   wherever possible. If used MKL verion is older than 11.3 PETSc default
5   code for sparse matrix operations is used.
6 */
7 
8 #include <../src/mat/impls/baij/seq/baij.h>
9 #include <../src/mat/impls/baij/seq/baijmkl/baijmkl.h>
10 
11 
12 /* MKL include files. */
13 #include <mkl_spblas.h>  /* Sparse BLAS */
14 
15 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
16 typedef struct {
17   PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
18   sparse_matrix_t bsrA; /* "Handle" used by SpMV2 inspector-executor routines. */
19   struct matrix_descr descr;
20 #if !defined(PETSC_MKL_SUPPORTS_BAIJ_ZERO_BASED)
21   PetscInt *ai1;
22   PetscInt *aj1;
23 #endif
24 } Mat_SeqBAIJMKL;
25 
26 PetscErrorCode MatAssemblyEnd_SeqBAIJMKL(Mat A, MatAssemblyType mode);
27 extern PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat,MatAssemblyType);
28 
29 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJMKL_SeqBAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat)
30 {
31   /* This routine is only called to convert a MATBAIJMKL to its base PETSc type, */
32   /* so we will ignore 'MatType type'. */
33   PetscErrorCode ierr;
34   Mat            B        = *newmat;
35   Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL*)A->spptr;
36 
37   PetscFunctionBegin;
38   if (reuse == MAT_INITIAL_MATRIX) {
39     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
40   }
41 
42   /* Reset the original function pointers. */
43   B->ops->duplicate        = MatDuplicate_SeqBAIJ;
44   B->ops->assemblyend      = MatAssemblyEnd_SeqBAIJ;
45   B->ops->destroy          = MatDestroy_SeqBAIJ;
46   B->ops->multtranspose    = MatMultTranspose_SeqBAIJ;
47   B->ops->multtransposeadd = MatMultTransposeAdd_SeqBAIJ;
48   B->ops->scale            = MatScale_SeqBAIJ;
49   B->ops->diagonalscale    = MatDiagonalScale_SeqBAIJ;
50   B->ops->axpy             = MatAXPY_SeqBAIJ;
51 
52   switch (A->rmap->bs) {
53     case 1:
54       B->ops->mult    = MatMult_SeqBAIJ_1;
55       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
56       break;
57     case 2:
58       B->ops->mult    = MatMult_SeqBAIJ_2;
59       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
60       break;
61     case 3:
62       B->ops->mult    = MatMult_SeqBAIJ_3;
63       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
64       break;
65     case 4:
66       B->ops->mult    = MatMult_SeqBAIJ_4;
67       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
68       break;
69     case 5:
70       B->ops->mult    = MatMult_SeqBAIJ_5;
71       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
72       break;
73     case 6:
74       B->ops->mult    = MatMult_SeqBAIJ_6;
75       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
76       break;
77     case 7:
78       B->ops->mult    = MatMult_SeqBAIJ_7;
79       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
80       break;
81     case 11:
82       B->ops->mult    = MatMult_SeqBAIJ_11;
83       B->ops->multadd = MatMultAdd_SeqBAIJ_11;
84       break;
85     case 15:
86       B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
87       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
88       break;
89     default:
90       B->ops->mult    = MatMult_SeqBAIJ_N;
91       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
92       break;
93   }
94   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaijmkl_seqbaij_C",NULL);CHKERRQ(ierr);
95 
96   /* Free everything in the Mat_SeqBAIJMKL data structure. Currently, this
97    * simply involves destroying the MKL sparse matrix handle and then freeing
98    * the spptr pointer. */
99   if (reuse == MAT_INITIAL_MATRIX) baijmkl = (Mat_SeqBAIJMKL*)B->spptr;
100 
101   if (baijmkl->sparse_optimized) {
102     sparse_status_t stat;
103     stat = mkl_sparse_destroy(baijmkl->bsrA);
104     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy");
105   }
106 #if !defined(PETSC_MKL_SUPPORTS_BAIJ_ZERO_BASED)
107    ierr = PetscFree(baijmkl->ai1);CHKERRQ(ierr);
108    ierr = PetscFree(baijmkl->aj1);CHKERRQ(ierr);
109 #endif
110   ierr = PetscFree(B->spptr);CHKERRQ(ierr);
111 
112   /* Change the type of B to MATSEQBAIJ. */
113   ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJ);CHKERRQ(ierr);
114 
115   *newmat = B;
116   PetscFunctionReturn(0);
117 }
118 
119 PetscErrorCode MatDestroy_SeqBAIJMKL(Mat A)
120 {
121   PetscErrorCode ierr;
122   Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL*) A->spptr;
123 
124   PetscFunctionBegin;
125   if (baijmkl) {
126     /* Clean up everything in the Mat_SeqBAIJMKL data structure, then free A->spptr. */
127     if (baijmkl->sparse_optimized) {
128       sparse_status_t stat = SPARSE_STATUS_SUCCESS;
129       stat = mkl_sparse_destroy(baijmkl->bsrA);
130       if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy");
131     }
132 #if !defined(PETSC_MKL_SUPPORTS_BAIJ_ZERO_BASED)
133    ierr = PetscFree(baijmkl->ai1);CHKERRQ(ierr);
134    ierr = PetscFree(baijmkl->aj1);CHKERRQ(ierr);
135 #endif
136     ierr = PetscFree(A->spptr);CHKERRQ(ierr);
137   }
138 
139   /* Change the type of A back to SEQBAIJ and use MatDestroy_SeqBAIJ()
140    * to destroy everything that remains. */
141   ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQBAIJ);CHKERRQ(ierr);
142   ierr = MatDestroy_SeqBAIJ(A);CHKERRQ(ierr);
143   PetscFunctionReturn(0);
144 }
145 
146 PETSC_INTERN PetscErrorCode MatSeqBAIJMKL_create_mkl_handle(Mat A)
147 {
148   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
149   Mat_SeqBAIJMKL  *baijmkl = (Mat_SeqBAIJMKL*)A->spptr;
150   PetscInt        mbs, nbs, nz, bs;
151   MatScalar       *aa;
152   PetscInt        *aj,*ai;
153   sparse_status_t stat;
154 #if !defined(PETSC_MKL_SUPPORTS_BAIJ_ZERO_BASED)
155   PetscErrorCode  ierr;
156   PetscInt        i;
157 #endif
158 
159   PetscFunctionBegin;
160   if (baijmkl->sparse_optimized) {
161     /* Matrix has been previously assembled and optimized. Must destroy old
162      * matrix handle before running the optimization step again. */
163 #if !defined(PETSC_MKL_SUPPORTS_BAIJ_ZERO_BASED)
164     ierr = PetscFree(baijmkl->ai1);CHKERRQ(ierr);
165     ierr = PetscFree(baijmkl->aj1);CHKERRQ(ierr);
166 #endif
167     stat = mkl_sparse_destroy(baijmkl->bsrA);CHKERRMKL(stat);
168   }
169   baijmkl->sparse_optimized = PETSC_FALSE;
170 
171   /* Now perform the SpMV2 setup and matrix optimization. */
172   baijmkl->descr.type        = SPARSE_MATRIX_TYPE_GENERAL;
173   baijmkl->descr.mode        = SPARSE_FILL_MODE_LOWER;
174   baijmkl->descr.diag        = SPARSE_DIAG_NON_UNIT;
175   mbs = a->mbs;
176   nbs = a->nbs;
177   nz  = a->nz;
178   bs  = A->rmap->bs;
179   aa  = a->a;
180 
181   if ((nz!=0) & !(A->structure_only)) {
182     /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
183      * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
184 #if defined(PETSC_MKL_SUPPORTS_BAIJ_ZERO_BASED)
185     aj   = a->j;
186     ai   = a->i;
187     stat = mkl_sparse_x_create_bsr(&(baijmkl->bsrA),SPARSE_INDEX_BASE_ZERO,SPARSE_LAYOUT_COLUMN_MAJOR,mbs,nbs,bs,ai,ai+1,aj,aa);CHKERRMKL(stat);
188 #else
189     ierr = PetscMalloc1(mbs+1,&ai);CHKERRQ(ierr);
190     ierr = PetscMalloc1(nz,&aj);CHKERRQ(ierr);
191     for (i=0;i<mbs+1;i++)
192       ai[i] = a->i[i]+1;
193     for (i=0;i<nz;i++)
194       aj[i] = a->j[i]+1;
195     aa   = a->a;
196     stat = mkl_sparse_x_create_bsr(&baijmkl->bsrA,SPARSE_INDEX_BASE_ONE,SPARSE_LAYOUT_COLUMN_MAJOR,mbs,nbs,bs,ai,ai+1,aj,aa);CHKERRMKL(stat);
197     baijmkl->ai1 = ai;
198     baijmkl->aj1 = aj;
199 #endif
200     stat = mkl_sparse_set_mv_hint(baijmkl->bsrA,SPARSE_OPERATION_NON_TRANSPOSE,baijmkl->descr,1000);CHKERRMKL(stat);
201     stat = mkl_sparse_set_memory_hint(baijmkl->bsrA,SPARSE_MEMORY_AGGRESSIVE);CHKERRMKL(stat);
202     stat = mkl_sparse_optimize(baijmkl->bsrA);CHKERRMKL(stat);
203     baijmkl->sparse_optimized = PETSC_TRUE;
204   }
205   PetscFunctionReturn(0);
206 }
207 
208 PetscErrorCode MatDuplicate_SeqBAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
209 {
210   PetscErrorCode ierr;
211   Mat_SeqBAIJMKL *baijmkl;
212   Mat_SeqBAIJMKL *baijmkl_dest;
213 
214   PetscFunctionBegin;
215   ierr = MatDuplicate_SeqBAIJ(A,op,M);CHKERRQ(ierr);
216   baijmkl = (Mat_SeqBAIJMKL*) A->spptr;
217   ierr = PetscNewLog((*M),&baijmkl_dest);CHKERRQ(ierr);
218   (*M)->spptr = (void*)baijmkl_dest;
219   ierr = PetscMemcpy(baijmkl_dest,baijmkl,sizeof(Mat_SeqBAIJMKL));CHKERRQ(ierr);
220   baijmkl_dest->sparse_optimized = PETSC_FALSE;
221   ierr = MatSeqBAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
222   PetscFunctionReturn(0);
223 }
224 
225 PetscErrorCode MatMult_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
226 {
227   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
228   Mat_SeqBAIJMKL     *baijmkl=(Mat_SeqBAIJMKL*)A->spptr;
229   const PetscScalar  *x;
230   PetscScalar        *y;
231   PetscErrorCode     ierr;
232   sparse_status_t    stat = SPARSE_STATUS_SUCCESS;
233 
234   PetscFunctionBegin;
235   /* If there are no nonzero entries, zero yy and return immediately. */
236   if(!a->nz) {
237     PetscInt i;
238     PetscInt m=a->mbs*A->rmap->bs;
239     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
240     for (i=0; i<m; i++) {
241       y[i] = 0.0;
242     }
243     ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
244     PetscFunctionReturn(0);
245   }
246 
247   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
248   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
249 
250   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
251    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
252    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
253   if (!baijmkl->sparse_optimized) {
254     ierr = MatSeqBAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
255   }
256 
257   /* Call MKL SpMV2 executor routine to do the MatMult. */
258   stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,y);CHKERRMKL(stat);
259 
260   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
261   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
262   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
263   PetscFunctionReturn(0);
264 }
265 
266 PetscErrorCode MatMultTranspose_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
267 {
268   Mat_SeqBAIJ       *a       = (Mat_SeqBAIJ*)A->data;
269   Mat_SeqBAIJMKL    *baijmkl = (Mat_SeqBAIJMKL*)A->spptr;
270   const PetscScalar *x;
271   PetscScalar       *y;
272   PetscErrorCode    ierr;
273   sparse_status_t   stat;
274 
275   PetscFunctionBegin;
276   /* If there are no nonzero entries, zero yy and return immediately. */
277   if(!a->nz) {
278     PetscInt i;
279     PetscInt n=a->nbs;
280     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
281     for (i=0; i<n; i++) {
282       y[i] = 0.0;
283     }
284     ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
285     PetscFunctionReturn(0);
286   }
287 
288   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
289   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
290 
291   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
292    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
293    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
294   if (!baijmkl->sparse_optimized) {
295     ierr = MatSeqBAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
296   }
297 
298   /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
299   stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,y);CHKERRMKL(stat);
300 
301   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
302   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
303   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
304   PetscFunctionReturn(0);
305 }
306 
307 PetscErrorCode MatMultAdd_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
308 {
309   Mat_SeqBAIJ        *a       = (Mat_SeqBAIJ*)A->data;
310   Mat_SeqBAIJMKL     *baijmkl = (Mat_SeqBAIJMKL*)A->spptr;
311   const PetscScalar  *x;
312   PetscScalar        *y,*z;
313   PetscErrorCode     ierr;
314   PetscInt           m=a->mbs*A->rmap->bs;
315   PetscInt           i;
316 
317   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
318 
319   PetscFunctionBegin;
320   /* If there are no nonzero entries, set zz = yy and return immediately. */
321   if(!a->nz) {
322     PetscInt i;
323     ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
324     for (i=0; i<m; i++) {
325       z[i] = y[i];
326     }
327     ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
328     PetscFunctionReturn(0);
329   }
330 
331   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
332   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
333 
334   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
335    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
336    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
337   if (!baijmkl->sparse_optimized) {
338     ierr = MatSeqBAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
339   }
340 
341   /* Call MKL sparse BLAS routine to do the MatMult. */
342   if (zz == yy) {
343     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
344      * with alpha and beta both set to 1.0. */
345     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,1.0,z);CHKERRMKL(stat);
346   } else {
347     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
348      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
349     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,z);CHKERRMKL(stat);
350     for (i=0; i<m; i++) {
351       z[i] += y[i];
352     }
353   }
354 
355   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
356   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
357   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
358   PetscFunctionReturn(0);
359 }
360 
361 PetscErrorCode MatMultTransposeAdd_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
362 {
363   Mat_SeqBAIJ       *a       = (Mat_SeqBAIJ*)A->data;
364   Mat_SeqBAIJMKL    *baijmkl = (Mat_SeqBAIJMKL*)A->spptr;
365   const PetscScalar *x;
366   PetscScalar       *y,*z;
367   PetscErrorCode    ierr;
368   PetscInt          n=a->nbs*A->rmap->bs;
369   PetscInt          i;
370   /* Variables not in MatMultTransposeAdd_SeqBAIJ. */
371   sparse_status_t   stat = SPARSE_STATUS_SUCCESS;
372 
373   PetscFunctionBegin;
374   /* If there are no nonzero entries, set zz = yy and return immediately. */
375   if(!a->nz) {
376     PetscInt i;
377     ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
378     for (i=0; i<n; i++) {
379       z[i] = y[i];
380     }
381     ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
382     PetscFunctionReturn(0);
383   }
384 
385   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
386   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
387 
388   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
389    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
390    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
391   if (!baijmkl->sparse_optimized) {
392     ierr = MatSeqBAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
393   }
394 
395   /* Call MKL sparse BLAS routine to do the MatMult. */
396   if (zz == yy) {
397     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
398      * with alpha and beta both set to 1.0. */
399     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,1.0,z);CHKERRMKL(stat);
400   } else {
401     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
402      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
403     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,z);CHKERRMKL(stat);
404     for (i=0; i<n; i++) {
405       z[i] += y[i];
406     }
407   }
408 
409   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
410   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
411   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
412   PetscFunctionReturn(0);
413 }
414 
415 PetscErrorCode MatScale_SeqBAIJMKL(Mat inA,PetscScalar alpha)
416 {
417   PetscErrorCode ierr;
418 
419   PetscFunctionBegin;
420   ierr = MatScale_SeqBAIJ(inA,alpha);CHKERRQ(ierr);
421   ierr = MatSeqBAIJMKL_create_mkl_handle(inA);CHKERRQ(ierr);
422   PetscFunctionReturn(0);
423 }
424 
425 PetscErrorCode MatDiagonalScale_SeqBAIJMKL(Mat A,Vec ll,Vec rr)
426 {
427   PetscErrorCode ierr;
428 
429   PetscFunctionBegin;
430   ierr = MatDiagonalScale_SeqBAIJ(A,ll,rr);CHKERRQ(ierr);
431   ierr = MatSeqBAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
432   PetscFunctionReturn(0);
433 }
434 
435 PetscErrorCode MatAXPY_SeqBAIJMKL(Mat Y,PetscScalar a,Mat X,MatStructure str)
436 {
437   PetscErrorCode ierr;
438 
439   PetscFunctionBegin;
440   ierr = MatAXPY_SeqBAIJ(Y,a,X,str);CHKERRQ(ierr);
441   if (str == SAME_NONZERO_PATTERN) {
442     /* MatAssemblyEnd() is not called if SAME_NONZERO_PATTERN, so we need to force update of the MKL matrix handle. */
443     ierr = MatSeqBAIJMKL_create_mkl_handle(Y);CHKERRQ(ierr);
444   }
445   PetscFunctionReturn(0);
446 }
447 /* MatConvert_SeqBAIJ_SeqBAIJMKL converts a SeqBAIJ matrix into a
448  * SeqBAIJMKL matrix.  This routine is called by the MatCreate_SeqMKLBAIJ()
449  * routine, but can also be used to convert an assembled SeqBAIJ matrix
450  * into a SeqBAIJMKL one. */
451 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
452 {
453   PetscErrorCode ierr;
454   Mat            B = *newmat;
455   Mat_SeqBAIJMKL *baijmkl;
456   PetscBool      sametype;
457 
458   PetscFunctionBegin;
459   if (reuse == MAT_INITIAL_MATRIX) {
460     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
461   }
462 
463   ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr);
464   if (sametype) PetscFunctionReturn(0);
465 
466   ierr     = PetscNewLog(B,&baijmkl);CHKERRQ(ierr);
467   B->spptr = (void*)baijmkl;
468 
469   /* Set function pointers for methods that we inherit from BAIJ but override.
470    * We also parse some command line options below, since those determine some of the methods we point to. */
471   B->ops->assemblyend      = MatAssemblyEnd_SeqBAIJMKL;
472 
473   baijmkl->sparse_optimized = PETSC_FALSE;
474 
475   ierr = PetscObjectComposeFunction((PetscObject)B,"MatScale_SeqBAIJMKL_C",MatScale_SeqBAIJMKL);CHKERRQ(ierr);
476   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaijmkl_seqbaij_C",MatConvert_SeqBAIJMKL_SeqBAIJ);CHKERRQ(ierr);
477 
478   ierr    = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJMKL);CHKERRQ(ierr);
479   *newmat = B;
480   PetscFunctionReturn(0);
481 }
482 
483 PetscErrorCode MatAssemblyEnd_SeqBAIJMKL(Mat A, MatAssemblyType mode)
484 {
485   PetscErrorCode  ierr;
486 
487   PetscFunctionBegin;
488   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
489   ierr = MatAssemblyEnd_SeqBAIJ(A, mode);CHKERRQ(ierr);
490   ierr = MatSeqBAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
491   A->ops->destroy          = MatDestroy_SeqBAIJMKL;
492   A->ops->mult             = MatMult_SeqBAIJMKL_SpMV2;
493   A->ops->multtranspose    = MatMultTranspose_SeqBAIJMKL_SpMV2;
494   A->ops->multadd          = MatMultAdd_SeqBAIJMKL_SpMV2;
495   A->ops->multtransposeadd = MatMultTransposeAdd_SeqBAIJMKL_SpMV2;
496   A->ops->scale            = MatScale_SeqBAIJMKL;
497   A->ops->diagonalscale    = MatDiagonalScale_SeqBAIJMKL;
498   A->ops->axpy             = MatAXPY_SeqBAIJMKL;
499   A->ops->duplicate        = MatDuplicate_SeqBAIJMKL;
500   PetscFunctionReturn(0);
501 }
502 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
503 
504 /*@C
505    MatCreateSeqBAIJMKL - Creates a sparse matrix of type SEQBAIJMKL.
506    This type inherits from BAIJ and is largely identical, but uses sparse BLAS
507    routines from Intel MKL whenever possible.
508    MatMult, MatMultAdd, MatMultTranspose, and MatMultTransposeAdd
509    operations are currently supported.
510    If the installed version of MKL supports the "SpMV2" sparse
511    inspector-executor routines, then those are used by default.
512    Default PETSc kernels are used otherwise.
513 
514    Input Parameters:
515 +  comm - MPI communicator, set to PETSC_COMM_SELF
516 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
517           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
518 .  m - number of rows
519 .  n - number of columns
520 .  nz - number of nonzero blocks  per block row (same for all rows)
521 -  nnz - array containing the number of nonzero blocks in the various block rows
522          (possibly different for each block row) or NULL
523 
524 
525    Output Parameter:
526 .  A - the matrix
527 
528    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
529    MatXXXXSetPreallocation() paradigm instead of this routine directly.
530    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
531 
532    Options Database Keys:
533 .   -mat_no_unroll - uses code that does not unroll the loops in the
534                      block calculations (much slower)
535 .    -mat_block_size - size of the blocks to use
536 
537    Level: intermediate
538 
539    Notes:
540    The number of rows and columns must be divisible by blocksize.
541 
542    If the nnz parameter is given then the nz parameter is ignored
543 
544    A nonzero block is any block that as 1 or more nonzeros in it
545 
546    The block AIJ format is fully compatible with standard Fortran 77
547    storage.  That is, the stored row and column indices can begin at
548    either one (as in Fortran) or zero.  See the users' manual for details.
549 
550    Specify the preallocated storage with either nz or nnz (not both).
551    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
552    allocation.  See Users-Manual: ch_mat for details.
553    matrices.
554 
555 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
556 
557 @*/
558 PetscErrorCode  MatCreateSeqBAIJMKL(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
559 {
560   PetscErrorCode ierr;
561 
562   PetscFunctionBegin;
563   ierr = MatCreate(comm,A);CHKERRQ(ierr);
564   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
565 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
566   ierr = MatSetType(*A,MATSEQBAIJMKL);CHKERRQ(ierr);
567   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
568 #else
569   ierr = PetscInfo(A,"MKL baij routines are not supported for used version of MKL. Using PETSc default routines. \n Please use version of MKL 11.3 and higher. \n");
570   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
571   ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
572 #endif
573   PetscFunctionReturn(0);
574 }
575 
576 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJMKL(Mat A)
577 {
578   PetscErrorCode ierr;
579 
580   PetscFunctionBegin;
581   ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr);
582 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
583   ierr = MatConvert_SeqBAIJ_SeqBAIJMKL(A,MATSEQBAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
584 #else
585   ierr = PetscInfo(A,"MKL baij routines are not supported for used version of MKL. Using PETSc default routines. \n Please use version of MKL 11.3 and higher. \n");
586 #endif
587   PetscFunctionReturn(0);
588 }
589