xref: /petsc/src/mat/impls/baij/seq/baijmkl/baijmkl.c (revision e7f46db8d62cb2e4e59111bf21061e64ea60daab)
1 /*
2   Defines basic operations for the MATSEQBAIJMKL matrix class.
3   Uses sparse BLAS operations from the Intel Math Kernel Library (MKL)
4   wherever possible. If used MKL verion is older than 11.3 PETSc default
5   code for sparse matrix operations is used.
6 */
7 
8 #include <../src/mat/impls/baij/seq/baij.h>
9 #include <../src/mat/impls/baij/seq/baijmkl/baijmkl.h>
10 
11 
12 /* MKL include files. */
13 #include <mkl_spblas.h>  /* Sparse BLAS */
14 
15 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
16 typedef struct {
17   PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
18   sparse_matrix_t bsrA; /* "Handle" used by SpMV2 inspector-executor routines. */
19   struct matrix_descr descr;
20 #ifndef PETSC_MKL_SUPPORTS_BAIJ_ZERO_BASED
21   PetscInt *ai1;
22   PetscInt *aj1;
23 #endif
24 } Mat_SeqBAIJMKL;
25 
26 PetscErrorCode MatAssemblyEnd_SeqBAIJMKL(Mat A, MatAssemblyType mode);
27 extern PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat,MatAssemblyType);
28 
29 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJMKL_SeqBAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat)
30 {
31   /* This routine is only called to convert a MATBAIJMKL to its base PETSc type, */
32   /* so we will ignore 'MatType type'. */
33   PetscErrorCode ierr;
34   Mat            B        = *newmat;
35   Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL*)A->spptr;
36 
37   PetscFunctionBegin;
38   if (reuse == MAT_INITIAL_MATRIX) {
39     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
40   }
41 
42   /* Reset the original function pointers. */
43   B->ops->duplicate        = MatDuplicate_SeqBAIJ;
44   B->ops->assemblyend      = MatAssemblyEnd_SeqBAIJ;
45   B->ops->destroy          = MatDestroy_SeqBAIJ;
46   B->ops->multtranspose    = MatMultTranspose_SeqBAIJ;
47   B->ops->multtransposeadd = MatMultTransposeAdd_SeqBAIJ;
48   B->ops->scale            = MatScale_SeqBAIJ;
49   B->ops->diagonalscale    = MatDiagonalScale_SeqBAIJ;
50   B->ops->axpy             = MatAXPY_SeqBAIJ;
51 
52   switch (A->rmap->bs) {
53     case 1:
54       B->ops->mult    = MatMult_SeqBAIJ_1;
55       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
56       break;
57     case 2:
58       B->ops->mult    = MatMult_SeqBAIJ_2;
59       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
60       break;
61     case 3:
62       B->ops->mult    = MatMult_SeqBAIJ_3;
63       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
64       break;
65     case 4:
66       B->ops->mult    = MatMult_SeqBAIJ_4;
67       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
68       break;
69     case 5:
70       B->ops->mult    = MatMult_SeqBAIJ_5;
71       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
72       break;
73     case 6:
74       B->ops->mult    = MatMult_SeqBAIJ_6;
75       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
76       break;
77     case 7:
78       B->ops->mult    = MatMult_SeqBAIJ_7;
79       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
80       break;
81     case 11:
82       B->ops->mult    = MatMult_SeqBAIJ_11;
83       B->ops->multadd = MatMultAdd_SeqBAIJ_11;
84       break;
85     case 15:
86       B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
87       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
88       break;
89     default:
90       B->ops->mult    = MatMult_SeqBAIJ_N;
91       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
92       break;
93   }
94   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaijmkl_seqbaij_C",NULL);CHKERRQ(ierr);
95 
96   /* Free everything in the Mat_SeqBAIJMKL data structure. Currently, this
97    * simply involves destroying the MKL sparse matrix handle and then freeing
98    * the spptr pointer. */
99   if (reuse == MAT_INITIAL_MATRIX) baijmkl = (Mat_SeqBAIJMKL*)B->spptr;
100 
101   if (baijmkl->sparse_optimized) {
102     sparse_status_t stat;
103     stat = mkl_sparse_destroy(baijmkl->bsrA);
104     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy");
105   }
106 #ifndef PETSC_MKL_SUPPORTS_BAIJ_ZERO_BASED
107    ierr = PetscFree(baijmkl->ai1);CHKERRQ(ierr);
108    ierr = PetscFree(baijmkl->aj1);CHKERRQ(ierr);
109 #endif
110   ierr = PetscFree(B->spptr);CHKERRQ(ierr);
111 
112   /* Change the type of B to MATSEQBAIJ. */
113   ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJ);CHKERRQ(ierr);
114 
115   *newmat = B;
116   PetscFunctionReturn(0);
117 }
118 
119 PetscErrorCode MatDestroy_SeqBAIJMKL(Mat A)
120 {
121   PetscErrorCode ierr;
122   Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL*) A->spptr;
123 
124   PetscFunctionBegin;
125   if (baijmkl) {
126     /* Clean up everything in the Mat_SeqBAIJMKL data structure, then free A->spptr. */
127     if (baijmkl->sparse_optimized) {
128       sparse_status_t stat = SPARSE_STATUS_SUCCESS;
129       stat = mkl_sparse_destroy(baijmkl->bsrA);
130       if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy");
131     }
132 #ifndef PETSC_MKL_SUPPORTS_BAIJ_ZERO_BASED
133    ierr = PetscFree(baijmkl->ai1);CHKERRQ(ierr);
134    ierr = PetscFree(baijmkl->aj1);CHKERRQ(ierr);
135 #endif
136     ierr = PetscFree(A->spptr);CHKERRQ(ierr);
137   }
138 
139   /* Change the type of A back to SEQBAIJ and use MatDestroy_SeqBAIJ()
140    * to destroy everything that remains. */
141   ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQBAIJ);CHKERRQ(ierr);
142   ierr = MatDestroy_SeqBAIJ(A);CHKERRQ(ierr);
143   PetscFunctionReturn(0);
144 }
145 
146 PETSC_INTERN PetscErrorCode MatSeqBAIJMKL_create_mkl_handle(Mat A)
147 {
148   PetscErrorCode ierr;
149   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
150   Mat_SeqBAIJMKL  *baijmkl = (Mat_SeqBAIJMKL*)A->spptr;
151   PetscInt        mbs, nbs, nz, bs, i;
152   MatScalar       *aa;
153   PetscInt        *aj,*ai;
154   sparse_status_t stat;
155 
156   PetscFunctionBegin;
157   if (baijmkl->sparse_optimized) {
158     /* Matrix has been previously assembled and optimized. Must destroy old
159      * matrix handle before running the optimization step again. */
160 #ifndef PETSC_MKL_SUPPORTS_BAIJ_ZERO_BASED
161     ierr = PetscFree(baijmkl->ai1);CHKERRQ(ierr);
162     ierr = PetscFree(baijmkl->aj1);CHKERRQ(ierr);
163 #endif
164     stat = mkl_sparse_destroy(baijmkl->bsrA);CHKERRMKL(stat);
165   }
166   baijmkl->sparse_optimized = PETSC_FALSE;
167 
168   /* Now perform the SpMV2 setup and matrix optimization. */
169   baijmkl->descr.type        = SPARSE_MATRIX_TYPE_GENERAL;
170   baijmkl->descr.mode        = SPARSE_FILL_MODE_LOWER;
171   baijmkl->descr.diag        = SPARSE_DIAG_NON_UNIT;
172   mbs = a->mbs;
173   nbs = a->nbs;
174   nz  = a->nz;
175   bs  = A->rmap->bs;
176   aa  = a->a;
177 
178   if ((nz!=0) & !(A->structure_only)) {
179     /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
180      * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
181 #ifdef PETSC_MKL_SUPPORTS_BAIJ_ZERO_BASED
182     aj   = a->j;
183     ai   = a->i;
184     stat = mkl_sparse_x_create_bsr(&(baijmkl->bsrA),SPARSE_INDEX_BASE_ZERO,SPARSE_LAYOUT_COLUMN_MAJOR,mbs,nbs,bs,ai,ai+1,aj,aa);CHKERRMKL(stat);
185 #else
186     ierr = PetscMalloc1(mbs+1,&ai);CHKERRQ(ierr);
187     ierr = PetscMalloc1(nz,&aj);CHKERRQ(ierr);
188     for (i=0;i<mbs+1;i++)
189       ai[i] = a->i[i]+1;
190     for (i=0;i<nz;i++)
191       aj[i] = a->j[i]+1;
192     aa   = a->a;
193     stat = mkl_sparse_x_create_bsr(&baijmkl->bsrA,SPARSE_INDEX_BASE_ONE,SPARSE_LAYOUT_COLUMN_MAJOR,mbs,nbs,bs,ai,ai+1,aj,aa);CHKERRMKL(stat);
194     baijmkl->ai1 = ai;
195     baijmkl->aj1 = aj;
196 #endif
197     stat = mkl_sparse_set_mv_hint(baijmkl->bsrA,SPARSE_OPERATION_NON_TRANSPOSE,baijmkl->descr,1000);CHKERRMKL(stat);
198     stat = mkl_sparse_set_memory_hint(baijmkl->bsrA,SPARSE_MEMORY_AGGRESSIVE);CHKERRMKL(stat);
199     stat = mkl_sparse_optimize(baijmkl->bsrA);CHKERRMKL(stat);
200     baijmkl->sparse_optimized = PETSC_TRUE;
201   }
202   PetscFunctionReturn(0);
203 }
204 
205 PetscErrorCode MatDuplicate_SeqBAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
206 {
207   PetscErrorCode ierr;
208   Mat_SeqBAIJMKL *baijmkl;
209   Mat_SeqBAIJMKL *baijmkl_dest;
210 
211   PetscFunctionBegin;
212   ierr = MatDuplicate_SeqBAIJ(A,op,M);CHKERRQ(ierr);
213   baijmkl = (Mat_SeqBAIJMKL*) A->spptr;
214   ierr = PetscNewLog((*M),&baijmkl_dest);CHKERRQ(ierr);
215   (*M)->spptr = (void*)baijmkl_dest;
216   ierr = PetscMemcpy(baijmkl_dest,baijmkl,sizeof(Mat_SeqBAIJMKL));CHKERRQ(ierr);
217   baijmkl_dest->sparse_optimized = PETSC_FALSE;
218   ierr = MatSeqBAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
219   PetscFunctionReturn(0);
220 }
221 
222 PetscErrorCode MatMult_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
223 {
224   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
225   Mat_SeqBAIJMKL     *baijmkl=(Mat_SeqBAIJMKL*)A->spptr;
226   const PetscScalar  *x;
227   PetscScalar        *y;
228   PetscErrorCode     ierr;
229   sparse_status_t    stat = SPARSE_STATUS_SUCCESS;
230 
231   PetscFunctionBegin;
232   /* If there are no nonzero entries, zero yy and return immediately. */
233   if(!a->nz) {
234     PetscInt i;
235     PetscInt m=a->mbs*A->rmap->bs;
236     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
237     for (i=0; i<m; i++) {
238       y[i] = 0.0;
239     }
240     ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
241     PetscFunctionReturn(0);
242   }
243 
244   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
245   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
246 
247   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
248    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
249    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
250   if (!baijmkl->sparse_optimized) {
251     ierr = MatSeqBAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
252   }
253 
254   /* Call MKL SpMV2 executor routine to do the MatMult. */
255   stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,y);CHKERRMKL(stat);
256 
257   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
258   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
259   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
260   PetscFunctionReturn(0);
261 }
262 
263 PetscErrorCode MatMultTranspose_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
264 {
265   Mat_SeqBAIJ       *a       = (Mat_SeqBAIJ*)A->data;
266   Mat_SeqBAIJMKL    *baijmkl = (Mat_SeqBAIJMKL*)A->spptr;
267   const PetscScalar *x;
268   PetscScalar       *y;
269   PetscErrorCode    ierr;
270   sparse_status_t   stat;
271 
272   PetscFunctionBegin;
273   /* If there are no nonzero entries, zero yy and return immediately. */
274   if(!a->nz) {
275     PetscInt i;
276     PetscInt n=a->nbs;
277     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
278     for (i=0; i<n; i++) {
279       y[i] = 0.0;
280     }
281     ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
282     PetscFunctionReturn(0);
283   }
284 
285   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
286   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
287 
288   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
289    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
290    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
291   if (!baijmkl->sparse_optimized) {
292     ierr = MatSeqBAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
293   }
294 
295   /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
296   stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,y);CHKERRMKL(stat);
297 
298   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
299   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
300   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
301   PetscFunctionReturn(0);
302 }
303 
304 PetscErrorCode MatMultAdd_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
305 {
306   Mat_SeqBAIJ        *a       = (Mat_SeqBAIJ*)A->data;
307   Mat_SeqBAIJMKL     *baijmkl = (Mat_SeqBAIJMKL*)A->spptr;
308   const PetscScalar  *x;
309   PetscScalar        *y,*z;
310   PetscErrorCode     ierr;
311   PetscInt           m=a->mbs*A->rmap->bs;
312   PetscInt           i;
313 
314   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
315 
316   PetscFunctionBegin;
317   /* If there are no nonzero entries, set zz = yy and return immediately. */
318   if(!a->nz) {
319     PetscInt i;
320     ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
321     for (i=0; i<m; i++) {
322       z[i] = y[i];
323     }
324     ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
325     PetscFunctionReturn(0);
326   }
327 
328   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
329   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
330 
331   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
332    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
333    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
334   if (!baijmkl->sparse_optimized) {
335     ierr = MatSeqBAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
336   }
337 
338   /* Call MKL sparse BLAS routine to do the MatMult. */
339   if (zz == yy) {
340     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
341      * with alpha and beta both set to 1.0. */
342     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,1.0,z);CHKERRMKL(stat);
343   } else {
344     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
345      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
346     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,z);CHKERRMKL(stat);
347     for (i=0; i<m; i++) {
348       z[i] += y[i];
349     }
350   }
351 
352   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
353   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
354   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
355   PetscFunctionReturn(0);
356 }
357 
358 PetscErrorCode MatMultTransposeAdd_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
359 {
360   Mat_SeqBAIJ       *a       = (Mat_SeqBAIJ*)A->data;
361   Mat_SeqBAIJMKL    *baijmkl = (Mat_SeqBAIJMKL*)A->spptr;
362   const PetscScalar *x;
363   PetscScalar       *y,*z;
364   PetscErrorCode    ierr;
365   PetscInt          n=a->nbs*A->rmap->bs;
366   PetscInt          i;
367   /* Variables not in MatMultTransposeAdd_SeqBAIJ. */
368   sparse_status_t   stat = SPARSE_STATUS_SUCCESS;
369 
370   PetscFunctionBegin;
371   /* If there are no nonzero entries, set zz = yy and return immediately. */
372   if(!a->nz) {
373     PetscInt i;
374     ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
375     for (i=0; i<n; i++) {
376       z[i] = y[i];
377     }
378     ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
379     PetscFunctionReturn(0);
380   }
381 
382   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
383   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
384 
385   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
386    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
387    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
388   if (!baijmkl->sparse_optimized) {
389     ierr = MatSeqBAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
390   }
391 
392   /* Call MKL sparse BLAS routine to do the MatMult. */
393   if (zz == yy) {
394     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
395      * with alpha and beta both set to 1.0. */
396     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,1.0,z);CHKERRMKL(stat);
397   } else {
398     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
399      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
400     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,z);CHKERRMKL(stat);
401     for (i=0; i<n; i++) {
402       z[i] += y[i];
403     }
404   }
405 
406   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
407   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
408   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
409   PetscFunctionReturn(0);
410 }
411 
412 PetscErrorCode MatScale_SeqBAIJMKL(Mat inA,PetscScalar alpha)
413 {
414   PetscErrorCode ierr;
415 
416   PetscFunctionBegin;
417   ierr = MatScale_SeqBAIJ(inA,alpha);CHKERRQ(ierr);
418   ierr = MatSeqBAIJMKL_create_mkl_handle(inA);CHKERRQ(ierr);
419   PetscFunctionReturn(0);
420 }
421 
422 PetscErrorCode MatDiagonalScale_SeqBAIJMKL(Mat A,Vec ll,Vec rr)
423 {
424   PetscErrorCode ierr;
425 
426   PetscFunctionBegin;
427   ierr = MatDiagonalScale_SeqBAIJ(A,ll,rr);CHKERRQ(ierr);
428   ierr = MatSeqBAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
429   PetscFunctionReturn(0);
430 }
431 
432 PetscErrorCode MatAXPY_SeqBAIJMKL(Mat Y,PetscScalar a,Mat X,MatStructure str)
433 {
434   PetscErrorCode ierr;
435 
436   PetscFunctionBegin;
437   ierr = MatAXPY_SeqBAIJ(Y,a,X,str);CHKERRQ(ierr);
438   if (str == SAME_NONZERO_PATTERN) {
439     /* MatAssemblyEnd() is not called if SAME_NONZERO_PATTERN, so we need to force update of the MKL matrix handle. */
440     ierr = MatSeqBAIJMKL_create_mkl_handle(Y);CHKERRQ(ierr);
441   }
442   PetscFunctionReturn(0);
443 }
444 /* MatConvert_SeqBAIJ_SeqBAIJMKL converts a SeqBAIJ matrix into a
445  * SeqBAIJMKL matrix.  This routine is called by the MatCreate_SeqMKLBAIJ()
446  * routine, but can also be used to convert an assembled SeqBAIJ matrix
447  * into a SeqBAIJMKL one. */
448 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
449 {
450   PetscErrorCode ierr;
451   Mat            B = *newmat;
452   Mat_SeqBAIJMKL *baijmkl;
453   PetscBool      sametype;
454 
455   PetscFunctionBegin;
456   if (reuse == MAT_INITIAL_MATRIX) {
457     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
458   }
459 
460   ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr);
461   if (sametype) PetscFunctionReturn(0);
462 
463   ierr     = PetscNewLog(B,&baijmkl);CHKERRQ(ierr);
464   B->spptr = (void*)baijmkl;
465 
466   /* Set function pointers for methods that we inherit from BAIJ but override.
467    * We also parse some command line options below, since those determine some of the methods we point to. */
468   B->ops->assemblyend      = MatAssemblyEnd_SeqBAIJMKL;
469 
470   baijmkl->sparse_optimized = PETSC_FALSE;
471 
472   ierr = PetscObjectComposeFunction((PetscObject)B,"MatScale_SeqBAIJMKL_C",MatScale_SeqBAIJMKL);CHKERRQ(ierr);
473   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaijmkl_seqbaij_C",MatConvert_SeqBAIJMKL_SeqBAIJ);CHKERRQ(ierr);
474 
475   ierr    = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJMKL);CHKERRQ(ierr);
476   *newmat = B;
477   PetscFunctionReturn(0);
478 }
479 
480 PetscErrorCode MatAssemblyEnd_SeqBAIJMKL(Mat A, MatAssemblyType mode)
481 {
482   PetscErrorCode  ierr;
483 
484   PetscFunctionBegin;
485   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
486   ierr = MatAssemblyEnd_SeqBAIJ(A, mode);CHKERRQ(ierr);
487   ierr = MatSeqBAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
488   A->ops->destroy          = MatDestroy_SeqBAIJMKL;
489   A->ops->mult             = MatMult_SeqBAIJMKL_SpMV2;
490   A->ops->multtranspose    = MatMultTranspose_SeqBAIJMKL_SpMV2;
491   A->ops->multadd          = MatMultAdd_SeqBAIJMKL_SpMV2;
492   A->ops->multtransposeadd = MatMultTransposeAdd_SeqBAIJMKL_SpMV2;
493   A->ops->scale            = MatScale_SeqBAIJMKL;
494   A->ops->diagonalscale    = MatDiagonalScale_SeqBAIJMKL;
495   A->ops->axpy             = MatAXPY_SeqBAIJMKL;
496   A->ops->duplicate        = MatDuplicate_SeqBAIJMKL;
497   PetscFunctionReturn(0);
498 }
499 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
500 
501 /*@C
502    MatCreateSeqBAIJMKL - Creates a sparse matrix of type SEQBAIJMKL.
503    This type inherits from BAIJ and is largely identical, but uses sparse BLAS
504    routines from Intel MKL whenever possible.
505    MatMult, MatMultAdd, MatMultTranspose, and MatMultTransposeAdd
506    operations are currently supported.
507    If the installed version of MKL supports the "SpMV2" sparse
508    inspector-executor routines, then those are used by default.
509    Default PETSc kernels are used otherwise.
510 
511    Input Parameters:
512 +  comm - MPI communicator, set to PETSC_COMM_SELF
513 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
514           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
515 .  m - number of rows
516 .  n - number of columns
517 .  nz - number of nonzero blocks  per block row (same for all rows)
518 -  nnz - array containing the number of nonzero blocks in the various block rows
519          (possibly different for each block row) or NULL
520 
521 
522    Output Parameter:
523 .  A - the matrix
524 
525    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
526    MatXXXXSetPreallocation() paradgm instead of this routine directly.
527    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
528 
529    Options Database Keys:
530 .   -mat_no_unroll - uses code that does not unroll the loops in the
531                      block calculations (much slower)
532 .    -mat_block_size - size of the blocks to use
533 
534    Level: intermediate
535 
536    Notes:
537    The number of rows and columns must be divisible by blocksize.
538 
539    If the nnz parameter is given then the nz parameter is ignored
540 
541    A nonzero block is any block that as 1 or more nonzeros in it
542 
543    The block AIJ format is fully compatible with standard Fortran 77
544    storage.  That is, the stored row and column indices can begin at
545    either one (as in Fortran) or zero.  See the users' manual for details.
546 
547    Specify the preallocated storage with either nz or nnz (not both).
548    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
549    allocation.  See Users-Manual: ch_mat for details.
550    matrices.
551 
552 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
553 
554 @*/
555 PetscErrorCode  MatCreateSeqBAIJMKL(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
556 {
557   PetscErrorCode ierr;
558 
559   PetscFunctionBegin;
560   ierr = MatCreate(comm,A);CHKERRQ(ierr);
561   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
562 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
563   ierr = MatSetType(*A,MATSEQBAIJMKL);CHKERRQ(ierr);
564   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
565 #else
566   ierr = PetscInfo(A,"MKL baij routines are not supported for used version of MKL. Using PETSc default routines. \n Please use version of MKL 11.3 and higher. \n");
567   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
568   ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
569 #endif
570   PetscFunctionReturn(0);
571 }
572 
573 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJMKL(Mat A)
574 {
575   PetscErrorCode ierr;
576 
577   PetscFunctionBegin;
578   ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr);
579 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
580   ierr = MatConvert_SeqBAIJ_SeqBAIJMKL(A,MATSEQBAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
581 #else
582   ierr = PetscInfo(A,"MKL baij routines are not supported for used version of MKL. Using PETSc default routines. \n Please use version of MKL 11.3 and higher. \n");
583 #endif
584   PetscFunctionReturn(0);
585 }
586