xref: /petsc/src/mat/impls/baij/seq/baijmkl/baijmkl.c (revision 4d6dccb51e9e03a57f5f8e913b0c0d1a5b0a506e)
1 /*
2   Defines basic operations for the MATSEQBAIJMKL matrix class.
3   Uses sparse BLAS operations from the Intel Math Kernel Library (MKL)
4   wherever possible. If used MKL verion is older than 11.3 PETSc default
5   code for sparse matrix operations is used.
6 */
7 
8 #include <../src/mat/impls/baij/seq/baij.h>
9 #include <../src/mat/impls/baij/seq/baijmkl/baijmkl.h>
10 
11 
12 /* MKL include files. */
13 #include <mkl_spblas.h>  /* Sparse BLAS */
14 
15 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
16 typedef struct {
17   PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
18   sparse_matrix_t bsrA; /* "Handle" used by SpMV2 inspector-executor routines. */
19   struct matrix_descr descr;
20 #ifndef PETSC_MKL_SUPPORTS_BAIJ_ZERO_BASED
21   PetscInt *ai1;
22   PetscInt *aj1;
23 #endif
24 } Mat_SeqBAIJMKL;
25 
26 PetscErrorCode MatAssemblyEnd_SeqBAIJMKL(Mat A, MatAssemblyType mode);
27 extern PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat,MatAssemblyType);
28 
29 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJMKL_SeqBAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat)
30 {
31   /* This routine is only called to convert a MATBAIJMKL to its base PETSc type, */
32   /* so we will ignore 'MatType type'. */
33   PetscErrorCode ierr;
34   Mat            B       = *newmat;
35   Mat_SeqBAIJMKL  *baijmkl=(Mat_SeqBAIJMKL*)A->spptr;
36 
37   PetscFunctionBegin;
38   if (reuse == MAT_INITIAL_MATRIX) {
39     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
40   }
41 
42   /* Reset the original function pointers. */
43   B->ops->duplicate        = MatDuplicate_SeqBAIJ;
44   B->ops->assemblyend      = MatAssemblyEnd_SeqBAIJ;
45   B->ops->destroy          = MatDestroy_SeqBAIJ;
46   B->ops->multtranspose    = MatMultTranspose_SeqBAIJ;
47   B->ops->multtransposeadd = MatMultTransposeAdd_SeqBAIJ;
48   B->ops->scale            = MatScale_SeqBAIJ;
49   B->ops->diagonalscale    = MatDiagonalScale_SeqBAIJ;
50   B->ops->axpy             = MatAXPY_SeqBAIJ;
51 
52   switch (A->rmap->bs) {
53     case 1:
54       B->ops->mult    = MatMult_SeqBAIJ_1;
55       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
56       break;
57     case 2:
58       B->ops->mult    = MatMult_SeqBAIJ_2;
59       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
60       break;
61     case 3:
62       B->ops->mult    = MatMult_SeqBAIJ_3;
63       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
64       break;
65     case 4:
66       B->ops->mult    = MatMult_SeqBAIJ_4;
67       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
68       break;
69     case 5:
70       B->ops->mult    = MatMult_SeqBAIJ_5;
71       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
72       break;
73     case 6:
74       B->ops->mult    = MatMult_SeqBAIJ_6;
75       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
76       break;
77     case 7:
78       B->ops->mult    = MatMult_SeqBAIJ_7;
79       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
80       break;
81     case 11:
82       B->ops->mult    = MatMult_SeqBAIJ_11;
83       B->ops->multadd = MatMultAdd_SeqBAIJ_11;
84       break;
85     case 15:
86       B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
87       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
88       break;
89     default:
90       B->ops->mult    = MatMult_SeqBAIJ_N;
91       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
92       break;
93   }
94   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaijmkl_seqbaij_C",NULL);CHKERRQ(ierr);
95 
96   /* Free everything in the Mat_SeqBAIJMKL data structure. Currently, this
97    * simply involves destroying the MKL sparse matrix handle and then freeing
98    * the spptr pointer. */
99   if (reuse == MAT_INITIAL_MATRIX) baijmkl = (Mat_SeqBAIJMKL*)B->spptr;
100 
101   if (baijmkl->sparse_optimized) {
102     sparse_status_t stat;
103     stat = mkl_sparse_destroy(baijmkl->bsrA);
104     if (stat != SPARSE_STATUS_SUCCESS) {
105       PetscFunctionReturn(PETSC_ERR_LIB);
106     }
107   }
108 #ifndef PETSC_MKL_SUPPORTS_BAIJ_ZERO_BASED
109    ierr = PetscFree(baijmkl->ai1);CHKERRQ(ierr);
110    ierr = PetscFree(baijmkl->aj1);CHKERRQ(ierr);
111 #endif
112   ierr = PetscFree(B->spptr);CHKERRQ(ierr);
113 
114   /* Change the type of B to MATSEQBAIJ. */
115   ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJ);CHKERRQ(ierr);
116 
117   *newmat = B;
118   PetscFunctionReturn(0);
119 }
120 
121 PetscErrorCode MatDestroy_SeqBAIJMKL(Mat A)
122 {
123   PetscErrorCode ierr;
124   Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL*) A->spptr;
125 
126   PetscFunctionBegin;
127 
128   if (baijmkl) {
129     /* Clean up everything in the Mat_SeqBAIJMKL data structure, then free A->spptr. */
130     if (baijmkl->sparse_optimized) {
131       sparse_status_t stat = SPARSE_STATUS_SUCCESS;
132       stat = mkl_sparse_destroy(baijmkl->bsrA);
133       if (stat != SPARSE_STATUS_SUCCESS) {
134         PetscFunctionReturn(PETSC_ERR_LIB);
135       }
136     }
137 #ifndef PETSC_MKL_SUPPORTS_BAIJ_ZERO_BASED
138    ierr = PetscFree(baijmkl->ai1);CHKERRQ(ierr);
139    ierr = PetscFree(baijmkl->aj1);CHKERRQ(ierr);
140 #endif
141     ierr = PetscFree(A->spptr);CHKERRQ(ierr);
142   }
143 
144   /* Change the type of A back to SEQBAIJ and use MatDestroy_SeqBAIJ()
145    * to destroy everything that remains. */
146   ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQBAIJ);CHKERRQ(ierr);
147   ierr = MatDestroy_SeqBAIJ(A);CHKERRQ(ierr);
148   PetscFunctionReturn(0);
149 }
150 
151 PETSC_INTERN PetscErrorCode MatSeqBAIJMKL_create_mkl_handle(Mat A)
152 {
153   PetscErrorCode ierr;
154   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
155   Mat_SeqBAIJMKL  *baijmkl = (Mat_SeqBAIJMKL*)A->spptr;
156   PetscInt        mbs, nbs, nz, bs, i;
157   MatScalar       *aa;
158   PetscInt        *aj,*ai;
159   sparse_status_t stat;
160 
161   PetscFunctionBegin;
162   if (baijmkl->sparse_optimized) {
163     /* Matrix has been previously assembled and optimized. Must destroy old
164      * matrix handle before running the optimization step again. */
165 #ifndef PETSC_MKL_SUPPORTS_BAIJ_ZERO_BASED
166     ierr = PetscFree(baijmkl->ai1);CHKERRQ(ierr);
167     ierr = PetscFree(baijmkl->aj1);CHKERRQ(ierr);
168 #endif
169     stat = mkl_sparse_destroy(baijmkl->bsrA);
170     if (stat != SPARSE_STATUS_SUCCESS) {
171       PetscFunctionReturn(PETSC_ERR_LIB);
172     }
173   }
174   baijmkl->sparse_optimized = PETSC_FALSE;
175 
176   /* Now perform the SpMV2 setup and matrix optimization. */
177   baijmkl->descr.type        = SPARSE_MATRIX_TYPE_GENERAL;
178   baijmkl->descr.mode        = SPARSE_FILL_MODE_LOWER;
179   baijmkl->descr.diag        = SPARSE_DIAG_NON_UNIT;
180   mbs = a->mbs;
181   nbs = a->nbs;
182   nz  = a->nz;
183   bs  = A->rmap->bs;
184   aa  = a->a;
185 
186   if (nz) {
187     /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
188      * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
189 #ifdef PETSC_MKL_SUPPORTS_BAIJ_ZERO_BASED
190     aj   = a->j;
191     ai   = a->i;
192     stat = mkl_sparse_x_create_bsr(&(baijmkl->bsrA),SPARSE_INDEX_BASE_ZERO,SPARSE_LAYOUT_COLUMN_MAJOR,mbs,nbs,bs,ai,ai+1,aj,aa);
193 #else
194     ierr = PetscMalloc1(mbs+1,&ai);CHKERRQ(ierr);
195     ierr = PetscMalloc1(nz,&aj);CHKERRQ(ierr);
196     for (i=0;i<mbs+1;i++)
197 	ai[i]=a->i[i]+1;
198     for (i=0;i<nz;i++)
199 	aj[i]=a->j[i]+1;
200     aa   = a->a;
201     stat = mkl_sparse_x_create_bsr(&baijmkl->bsrA,SPARSE_INDEX_BASE_ONE,SPARSE_LAYOUT_COLUMN_MAJOR,mbs,nbs,bs,ai,ai+1,aj,aa);
202     baijmkl->ai1 = ai;
203     baijmkl->aj1 = aj;
204 #endif
205     stat = mkl_sparse_set_mv_hint(baijmkl->bsrA,SPARSE_OPERATION_NON_TRANSPOSE,baijmkl->descr,1000);
206     stat = mkl_sparse_set_memory_hint(baijmkl->bsrA,SPARSE_MEMORY_AGGRESSIVE);
207     stat = mkl_sparse_optimize(baijmkl->bsrA);
208     if (stat != SPARSE_STATUS_SUCCESS) {
209       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle/complete mkl_sparse_optimize");
210       PetscFunctionReturn(PETSC_ERR_LIB);
211     }
212     baijmkl->sparse_optimized = PETSC_TRUE;
213   }
214 
215   PetscFunctionReturn(0);
216 }
217 
218 PetscErrorCode MatDuplicate_SeqBAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
219 {
220   PetscErrorCode ierr;
221   Mat_SeqBAIJMKL *baijmkl;
222   Mat_SeqBAIJMKL *baijmkl_dest;
223 
224   PetscFunctionBegin;
225   ierr = MatDuplicate_SeqBAIJ(A,op,M);CHKERRQ(ierr);
226   baijmkl      = (Mat_SeqBAIJMKL*) A->spptr;
227   baijmkl_dest = (Mat_SeqBAIJMKL*) (*M)->spptr;
228   ierr = PetscMemcpy(baijmkl_dest,baijmkl,sizeof(Mat_SeqBAIJMKL));CHKERRQ(ierr);
229   baijmkl_dest->sparse_optimized = PETSC_FALSE;
230   ierr = MatSeqBAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
231   PetscFunctionReturn(0);
232 }
233 
234 PetscErrorCode MatMult_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
235 {
236   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
237   Mat_SeqBAIJMKL     *baijmkl=(Mat_SeqBAIJMKL*)A->spptr;
238   const PetscScalar  *x;
239   PetscScalar        *y;
240   PetscErrorCode     ierr;
241   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
242 
243   PetscFunctionBegin;
244 
245   /* If there are no nonzero entries, zero yy and return immediately. */
246   if(!a->nz) {
247     PetscInt i;
248     PetscInt m=a->mbs*A->rmap->bs;
249     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
250     for (i=0; i<m; i++) {
251       y[i] = 0.0;
252     }
253     ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
254     PetscFunctionReturn(0);
255   }
256 
257   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
258   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
259 
260   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
261    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
262    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
263   if (!baijmkl->sparse_optimized) {
264     MatSeqBAIJMKL_create_mkl_handle(A);
265   }
266 
267   /* Call MKL SpMV2 executor routine to do the MatMult. */
268   stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,y);
269 
270   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
271   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
272   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
273   if (stat != SPARSE_STATUS_SUCCESS) {
274     PetscFunctionReturn(PETSC_ERR_LIB);
275   }
276   PetscFunctionReturn(0);
277 }
278 
279 PetscErrorCode MatMultTranspose_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
280 {
281   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
282   Mat_SeqBAIJMKL     *baijmkl=(Mat_SeqBAIJMKL*)A->spptr;
283   const PetscScalar *x;
284   PetscScalar       *y;
285   PetscErrorCode    ierr;
286   sparse_status_t   stat;
287 
288   PetscFunctionBegin;
289 
290   /* If there are no nonzero entries, zero yy and return immediately. */
291   if(!a->nz) {
292     PetscInt i;
293     PetscInt n=a->nbs;
294     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
295     for (i=0; i<n; i++) {
296       y[i] = 0.0;
297     }
298     ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
299     PetscFunctionReturn(0);
300   }
301 
302   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
303   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
304 
305   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
306    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
307    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
308   if (!baijmkl->sparse_optimized) {
309     MatSeqBAIJMKL_create_mkl_handle(A);
310   }
311 
312   /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
313   stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,y);
314 
315   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
316   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
317   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
318   if (stat != SPARSE_STATUS_SUCCESS) {
319     PetscFunctionReturn(PETSC_ERR_LIB);
320   }
321   PetscFunctionReturn(0);
322 }
323 
324 PetscErrorCode MatMultAdd_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
325 {
326   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
327   Mat_SeqBAIJMKL     *baijmkl=(Mat_SeqBAIJMKL*)A->spptr;
328   const PetscScalar  *x;
329   PetscScalar        *y,*z;
330   PetscErrorCode     ierr;
331   PetscInt           m=a->mbs*A->rmap->bs;
332   PetscInt           i;
333 
334   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
335 
336   PetscFunctionBegin;
337 
338   /* If there are no nonzero entries, set zz = yy and return immediately. */
339   if(!a->nz) {
340     PetscInt i;
341     ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
342     for (i=0; i<m; i++) {
343       z[i] = y[i];
344     }
345     ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
346     PetscFunctionReturn(0);
347   }
348 
349   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
350   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
351 
352   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
353    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
354    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
355   if (!baijmkl->sparse_optimized) {
356     MatSeqBAIJMKL_create_mkl_handle(A);
357   }
358 
359   /* Call MKL sparse BLAS routine to do the MatMult. */
360   if (zz == yy) {
361     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
362      * with alpha and beta both set to 1.0. */
363     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,1.0,z);
364   } else {
365     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
366      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
367     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,z);
368     for (i=0; i<m; i++) {
369       z[i] += y[i];
370     }
371   }
372 
373   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
374   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
375   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
376   if (stat != SPARSE_STATUS_SUCCESS) {
377     PetscFunctionReturn(PETSC_ERR_LIB);
378   }
379   PetscFunctionReturn(0);
380 }
381 
382 PetscErrorCode MatMultTransposeAdd_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
383 {
384   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
385   Mat_SeqBAIJMKL     *baijmkl=(Mat_SeqBAIJMKL*)A->spptr;
386   const PetscScalar *x;
387   PetscScalar       *y,*z;
388   PetscErrorCode    ierr;
389   PetscInt          n=a->nbs*A->rmap->bs;
390   PetscInt          i;
391 
392   /* Variables not in MatMultTransposeAdd_SeqBAIJ. */
393   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
394 
395   PetscFunctionBegin;
396 
397   /* If there are no nonzero entries, set zz = yy and return immediately. */
398   if(!a->nz) {
399     PetscInt i;
400     ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
401     for (i=0; i<n; i++) {
402       z[i] = y[i];
403     }
404     ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
405     PetscFunctionReturn(0);
406   }
407 
408   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
409   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
410 
411   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
412    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
413    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
414   if (!baijmkl->sparse_optimized) {
415     MatSeqBAIJMKL_create_mkl_handle(A);
416   }
417 
418   /* Call MKL sparse BLAS routine to do the MatMult. */
419   if (zz == yy) {
420     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
421      * with alpha and beta both set to 1.0. */
422     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,1.0,z);
423   } else {
424     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
425      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
426     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,z);
427     for (i=0; i<n; i++) {
428       z[i] += y[i];
429     }
430   }
431 
432   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
433   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
434   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
435   if (stat != SPARSE_STATUS_SUCCESS) {
436     PetscFunctionReturn(PETSC_ERR_LIB);
437   }
438   PetscFunctionReturn(0);
439 }
440 
441 PetscErrorCode MatScale_SeqBAIJMKL(Mat inA,PetscScalar alpha)
442 {
443   PetscErrorCode ierr;
444 
445   PetscFunctionBegin;
446   ierr = MatScale_SeqBAIJ(inA,alpha);CHKERRQ(ierr);
447   ierr = MatSeqBAIJMKL_create_mkl_handle(inA);CHKERRQ(ierr);
448   PetscFunctionReturn(0);
449 }
450 
451 PetscErrorCode MatDiagonalScale_SeqBAIJMKL(Mat A,Vec ll,Vec rr)
452 {
453   PetscErrorCode ierr;
454 
455   PetscFunctionBegin;
456   ierr = MatDiagonalScale_SeqBAIJ(A,ll,rr);CHKERRQ(ierr);
457   ierr = MatSeqBAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
458   PetscFunctionReturn(0);
459 }
460 
461 PetscErrorCode MatAXPY_SeqBAIJMKL(Mat Y,PetscScalar a,Mat X,MatStructure str)
462 {
463   PetscErrorCode ierr;
464 
465   PetscFunctionBegin;
466   ierr = MatAXPY_SeqBAIJ(Y,a,X,str);CHKERRQ(ierr);
467   if (str == SAME_NONZERO_PATTERN) {
468     /* MatAssemblyEnd() is not called if SAME_NONZERO_PATTERN, so we need to force update of the MKL matrix handle. */
469     ierr = MatSeqBAIJMKL_create_mkl_handle(Y);CHKERRQ(ierr);
470   }
471   PetscFunctionReturn(0);
472 }
473 /* MatConvert_SeqBAIJ_SeqBAIJMKL converts a SeqBAIJ matrix into a
474  * SeqBAIJMKL matrix.  This routine is called by the MatCreate_SeqMKLBAIJ()
475  * routine, but can also be used to convert an assembled SeqBAIJ matrix
476  * into a SeqBAIJMKL one. */
477 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
478 {
479   PetscErrorCode ierr;
480   Mat            B = *newmat;
481   Mat_SeqBAIJMKL  *baijmkl;
482   PetscBool      sametype;
483 
484   PetscFunctionBegin;
485   if (reuse == MAT_INITIAL_MATRIX) {
486     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
487   }
488 
489   ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr);
490   if (sametype) PetscFunctionReturn(0);
491 
492   ierr     = PetscNewLog(B,&baijmkl);CHKERRQ(ierr);
493   B->spptr = (void*) baijmkl;
494 
495   /* Set function pointers for methods that we inherit from BAIJ but override.
496    * We also parse some command line options below, since those determine some of the methods we point to. */
497   B->ops->assemblyend      = MatAssemblyEnd_SeqBAIJMKL;
498 
499   baijmkl->sparse_optimized = PETSC_FALSE;
500 
501   ierr = PetscObjectComposeFunction((PetscObject)B,"MatScale_SeqBAIJMKL_C",MatScale_SeqBAIJMKL);CHKERRQ(ierr);
502   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaijmkl_seqbaij_C",MatConvert_SeqBAIJMKL_SeqBAIJ);CHKERRQ(ierr);
503 
504   ierr    = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJMKL);CHKERRQ(ierr);
505   *newmat = B;
506   PetscFunctionReturn(0);
507 }
508 PetscErrorCode MatAssemblyEnd_SeqBAIJMKL(Mat A, MatAssemblyType mode)
509 {
510   PetscErrorCode  ierr;
511   PetscFunctionBegin;
512   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
513 
514   ierr = MatAssemblyEnd_SeqBAIJ(A, mode);CHKERRQ(ierr);
515   ierr = MatSeqBAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
516   A->ops->destroy          = MatDestroy_SeqBAIJMKL;
517   A->ops->mult             = MatMult_SeqBAIJMKL_SpMV2;
518   A->ops->multtranspose    = MatMultTranspose_SeqBAIJMKL_SpMV2;
519   A->ops->multadd          = MatMultAdd_SeqBAIJMKL_SpMV2;
520   A->ops->multtransposeadd = MatMultTransposeAdd_SeqBAIJMKL_SpMV2;
521   A->ops->scale            = MatScale_SeqBAIJMKL;
522   A->ops->diagonalscale    = MatDiagonalScale_SeqBAIJMKL;
523   A->ops->axpy             = MatAXPY_SeqBAIJMKL;
524   A->ops->duplicate        = MatDuplicate_SeqBAIJMKL;
525   PetscFunctionReturn(0);
526 }
527 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
528 /*@C
529    MatCreateSeqBAIJMKL - Creates a sparse matrix of type SEQBAIJMKL.
530    This type inherits from BAIJ and is largely identical, but uses sparse BLAS
531    routines from Intel MKL whenever possible.
532    MatMult, MatMultAdd, MatMultTranspose, and MatMultTransposeAdd
533    operations are currently supported.
534    If the installed version of MKL supports the "SpMV2" sparse
535    inspector-executor routines, then those are used by default.
536    Default PETSc kernels are used otherwise.
537 
538    Input Parameters:
539 +  comm - MPI communicator, set to PETSC_COMM_SELF
540 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
541           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
542 .  m - number of rows
543 .  n - number of columns
544 .  nz - number of nonzero blocks  per block row (same for all rows)
545 -  nnz - array containing the number of nonzero blocks in the various block rows
546          (possibly different for each block row) or NULL
547 
548 
549    Output Parameter:
550 .  A - the matrix
551 
552    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
553    MatXXXXSetPreallocation() paradgm instead of this routine directly.
554    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
555 
556    Options Database Keys:
557 .   -mat_no_unroll - uses code that does not unroll the loops in the
558                      block calculations (much slower)
559 .    -mat_block_size - size of the blocks to use
560 
561    Level: intermediate
562 
563    Notes:
564    The number of rows and columns must be divisible by blocksize.
565 
566    If the nnz parameter is given then the nz parameter is ignored
567 
568    A nonzero block is any block that as 1 or more nonzeros in it
569 
570    The block AIJ format is fully compatible with standard Fortran 77
571    storage.  That is, the stored row and column indices can begin at
572    either one (as in Fortran) or zero.  See the users' manual for details.
573 
574    Specify the preallocated storage with either nz or nnz (not both).
575    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
576    allocation.  See Users-Manual: ch_mat for details.
577    matrices.
578 
579 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
580 
581 @*/
582 PetscErrorCode  MatCreateSeqBAIJMKL(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
583 {
584   PetscErrorCode ierr;
585 
586   PetscFunctionBegin;
587   ierr = MatCreate(comm,A);CHKERRQ(ierr);
588   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
589 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
590   ierr = MatSetType(*A,MATSEQBAIJMKL);CHKERRQ(ierr);
591   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
592 #else
593   ierr = PetscInfo(A,"MKL baij routines are not supported for used version of MKL. Using PETSc default routines. \n Please use version of MKL 11.3 and higher. \n");
594   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
595   ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
596 #endif
597   PetscFunctionReturn(0);
598 }
599 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJMKL(Mat A)
600 {
601   PetscErrorCode ierr;
602 
603   PetscFunctionBegin;
604   ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr);
605 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
606   ierr = MatConvert_SeqBAIJ_SeqBAIJMKL(A,MATSEQBAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
607 #else
608   ierr = PetscInfo(A,"MKL baij routines are not supported for used version of MKL. Using PETSc default routines. \n Please use version of MKL 11.3 and higher. \n");
609 #endif
610   PetscFunctionReturn(0);
611 }
612