xref: /petsc/src/mat/impls/baij/seq/baijmkl/baijmkl.c (revision 71bc03e07a347361588709802b5935ad16030ed2)
1 /*
2   Defines basic operations for the MATSEQBAIJMKL matrix class.
3   Uses sparse BLAS operations from the Intel Math Kernel Library (MKL)
4   wherever possible. If used MKL verion is older than 11.3 PETSc default
5   code for sparse matrix operations is used.
6 */
7 
8 #include <../src/mat/impls/baij/seq/baij.h>
9 #include <../src/mat/impls/baij/seq/baijmkl/baijmkl.h>
10 
11 
12 /* MKL include files. */
13 #include <mkl_spblas.h>  /* Sparse BLAS */
14 
15 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
16 typedef struct {
17   PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
18   sparse_matrix_t bsrA; /* "Handle" used by SpMV2 inspector-executor routines. */
19   struct matrix_descr descr;
20 #ifndef PETSC_MKL_SUPPORTS_BAIJ_ZERO_BASED
21   PetscInt *ai1;
22   PetscInt *aj1;
23 #endif
24 } Mat_SeqBAIJMKL;
25 
26 PetscErrorCode MatAssemblyEnd_SeqBAIJMKL(Mat A, MatAssemblyType mode);
27 extern PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat,MatAssemblyType);
28 
29 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJMKL_SeqBAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat)
30 {
31   /* This routine is only called to convert a MATBAIJMKL to its base PETSc type, */
32   /* so we will ignore 'MatType type'. */
33   PetscErrorCode ierr;
34   Mat            B       = *newmat;
35   Mat_SeqBAIJMKL  *baijmkl=(Mat_SeqBAIJMKL*)A->spptr;
36 
37   PetscFunctionBegin;
38   if (reuse == MAT_INITIAL_MATRIX) {
39     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
40   }
41 
42   /* Reset the original function pointers. */
43   B->ops->duplicate        = MatDuplicate_SeqBAIJ;
44   B->ops->assemblyend      = MatAssemblyEnd_SeqBAIJ;
45   B->ops->destroy          = MatDestroy_SeqBAIJ;
46   B->ops->multtranspose    = MatMultTranspose_SeqBAIJ;
47   B->ops->multtransposeadd = MatMultTransposeAdd_SeqBAIJ;
48   B->ops->scale            = MatScale_SeqBAIJ;
49   B->ops->diagonalscale    = MatDiagonalScale_SeqBAIJ;
50   B->ops->axpy             = MatAXPY_SeqBAIJ;
51 
52   switch (A->rmap->bs) {
53     case 1:
54       B->ops->mult    = MatMult_SeqBAIJ_1;
55       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
56       break;
57     case 2:
58       B->ops->mult    = MatMult_SeqBAIJ_2;
59       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
60       break;
61     case 3:
62       B->ops->mult    = MatMult_SeqBAIJ_3;
63       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
64       break;
65     case 4:
66       B->ops->mult    = MatMult_SeqBAIJ_4;
67       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
68       break;
69     case 5:
70       B->ops->mult    = MatMult_SeqBAIJ_5;
71       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
72       break;
73     case 6:
74       B->ops->mult    = MatMult_SeqBAIJ_6;
75       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
76       break;
77     case 7:
78       B->ops->mult    = MatMult_SeqBAIJ_7;
79       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
80       break;
81     case 11:
82       B->ops->mult    = MatMult_SeqBAIJ_11;
83       B->ops->multadd = MatMultAdd_SeqBAIJ_11;
84       break;
85     case 15:
86       B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
87       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
88       break;
89     default:
90       B->ops->mult    = MatMult_SeqBAIJ_N;
91       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
92       break;
93   }
94   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaijmkl_seqbaij_C",NULL);CHKERRQ(ierr);
95 
96   /* Free everything in the Mat_SeqBAIJMKL data structure. Currently, this
97    * simply involves destroying the MKL sparse matrix handle and then freeing
98    * the spptr pointer. */
99   if (reuse == MAT_INITIAL_MATRIX) baijmkl = (Mat_SeqBAIJMKL*)B->spptr;
100 
101   if (baijmkl->sparse_optimized) {
102     sparse_status_t stat;
103     stat = mkl_sparse_destroy(baijmkl->bsrA);
104     if (stat != SPARSE_STATUS_SUCCESS) {
105       PetscFunctionReturn(PETSC_ERR_LIB);
106     }
107   }
108 #ifndef PETSC_MKL_SUPPORTS_BAIJ_ZERO_BASED
109    ierr = PetscFree(baijmkl->ai1);CHKERRQ(ierr);
110    ierr = PetscFree(baijmkl->aj1);CHKERRQ(ierr);
111 #endif
112   ierr = PetscFree(B->spptr);CHKERRQ(ierr);
113 
114   /* Change the type of B to MATSEQBAIJ. */
115   ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJ);CHKERRQ(ierr);
116 
117   *newmat = B;
118   PetscFunctionReturn(0);
119 }
120 
121 PetscErrorCode MatDestroy_SeqBAIJMKL(Mat A)
122 {
123   PetscErrorCode ierr;
124   Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL*) A->spptr;
125 
126   PetscFunctionBegin;
127 
128   if (baijmkl) {
129     /* Clean up everything in the Mat_SeqBAIJMKL data structure, then free A->spptr. */
130     if (baijmkl->sparse_optimized) {
131       sparse_status_t stat = SPARSE_STATUS_SUCCESS;
132       stat = mkl_sparse_destroy(baijmkl->bsrA);
133       if (stat != SPARSE_STATUS_SUCCESS) {
134         PetscFunctionReturn(PETSC_ERR_LIB);
135       }
136     }
137 #ifndef PETSC_MKL_SUPPORTS_BAIJ_ZERO_BASED
138    ierr = PetscFree(baijmkl->ai1);CHKERRQ(ierr);
139    ierr = PetscFree(baijmkl->aj1);CHKERRQ(ierr);
140 #endif
141     ierr = PetscFree(A->spptr);CHKERRQ(ierr);
142   }
143 
144   /* Change the type of A back to SEQBAIJ and use MatDestroy_SeqBAIJ()
145    * to destroy everything that remains. */
146   ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQBAIJ);CHKERRQ(ierr);
147   ierr = MatDestroy_SeqBAIJ(A);CHKERRQ(ierr);
148   PetscFunctionReturn(0);
149 }
150 
151 PETSC_INTERN PetscErrorCode MatSeqBAIJMKL_create_mkl_handle(Mat A)
152 {
153   PetscErrorCode ierr;
154   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
155   Mat_SeqBAIJMKL  *baijmkl = (Mat_SeqBAIJMKL*)A->spptr;
156   PetscInt        mbs, nbs, nz, bs, i;
157   MatScalar       *aa;
158   PetscInt        *aj,*ai;
159   sparse_status_t stat;
160 
161   PetscFunctionBegin;
162   if (baijmkl->sparse_optimized) {
163     /* Matrix has been previously assembled and optimized. Must destroy old
164      * matrix handle before running the optimization step again. */
165 #ifndef PETSC_MKL_SUPPORTS_BAIJ_ZERO_BASED
166     ierr = PetscFree(baijmkl->ai1);CHKERRQ(ierr);
167     ierr = PetscFree(baijmkl->aj1);CHKERRQ(ierr);
168 #endif
169     stat = mkl_sparse_destroy(baijmkl->bsrA);
170     if (stat != SPARSE_STATUS_SUCCESS) {
171       PetscFunctionReturn(PETSC_ERR_LIB);
172     }
173   }
174   baijmkl->sparse_optimized = PETSC_FALSE;
175 
176   /* Now perform the SpMV2 setup and matrix optimization. */
177   baijmkl->descr.type        = SPARSE_MATRIX_TYPE_GENERAL;
178   baijmkl->descr.mode        = SPARSE_FILL_MODE_LOWER;
179   baijmkl->descr.diag        = SPARSE_DIAG_NON_UNIT;
180   mbs = a->mbs;
181   nbs = a->nbs;
182   nz  = a->nz;
183   bs  = A->rmap->bs;
184   aa  = a->a;
185 
186   if (nz) {
187     /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
188      * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
189 #ifdef PETSC_MKL_SUPPORTS_BAIJ_ZERO_BASED
190     aj   = a->j;
191     ai   = a->i;
192     stat = mkl_sparse_x_create_bsr(&(baijmkl->bsrA),SPARSE_INDEX_BASE_ZERO,SPARSE_LAYOUT_COLUMN_MAJOR,mbs,nbs,bs,ai,ai+1,aj,aa);
193 #else
194     ierr = PetscMalloc1(mbs+1,&ai);CHKERRQ(ierr);
195     ierr = PetscMalloc1(nz,&aj);CHKERRQ(ierr);
196     for (i=0;i<mbs+1;i++)
197 	ai[i]=a->i[i]+1;
198     for (i=0;i<nz;i++)
199 	aj[i]=a->j[i]+1;
200     aa   = a->a;
201     stat = mkl_sparse_x_create_bsr(&baijmkl->bsrA,SPARSE_INDEX_BASE_ONE,SPARSE_LAYOUT_COLUMN_MAJOR,mbs,nbs,bs,ai,ai+1,aj,aa);
202     baijmkl->ai1 = ai;
203     baijmkl->aj1 = aj;
204 #endif
205     stat = mkl_sparse_set_mv_hint(baijmkl->bsrA,SPARSE_OPERATION_NON_TRANSPOSE,baijmkl->descr,1000);
206     stat = mkl_sparse_set_memory_hint(baijmkl->bsrA,SPARSE_MEMORY_AGGRESSIVE);
207     stat = mkl_sparse_optimize(baijmkl->bsrA);
208     if (stat != SPARSE_STATUS_SUCCESS) {
209       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle/complete mkl_sparse_optimize");
210       PetscFunctionReturn(PETSC_ERR_LIB);
211     }
212     baijmkl->sparse_optimized = PETSC_TRUE;
213   }
214 
215   PetscFunctionReturn(0);
216 }
217 
218 PetscErrorCode MatDuplicate_SeqBAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
219 {
220   PetscErrorCode ierr;
221   Mat_SeqBAIJMKL *baijmkl;
222   Mat_SeqBAIJMKL *baijmkl_dest;
223 
224   PetscFunctionBegin;
225   ierr = MatDuplicate_SeqBAIJ(A,op,M);CHKERRQ(ierr);
226   baijmkl      = (Mat_SeqBAIJMKL*) A->spptr;
227   ierr         = PetscNewLog((*M),&baijmkl_dest);CHKERRQ(ierr);
228   (*M)->spptr  = (void*)baijmkl_dest;
229   ierr = PetscMemcpy(baijmkl_dest,baijmkl,sizeof(Mat_SeqBAIJMKL));CHKERRQ(ierr);
230   baijmkl_dest->sparse_optimized = PETSC_FALSE;
231   ierr = MatSeqBAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
232   PetscFunctionReturn(0);
233 }
234 
235 PetscErrorCode MatMult_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
236 {
237   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
238   Mat_SeqBAIJMKL     *baijmkl=(Mat_SeqBAIJMKL*)A->spptr;
239   const PetscScalar  *x;
240   PetscScalar        *y;
241   PetscErrorCode     ierr;
242   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
243 
244   PetscFunctionBegin;
245 
246   /* If there are no nonzero entries, zero yy and return immediately. */
247   if(!a->nz) {
248     PetscInt i;
249     PetscInt m=a->mbs*A->rmap->bs;
250     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
251     for (i=0; i<m; i++) {
252       y[i] = 0.0;
253     }
254     ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
255     PetscFunctionReturn(0);
256   }
257 
258   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
259   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
260 
261   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
262    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
263    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
264   if (!baijmkl->sparse_optimized) {
265     MatSeqBAIJMKL_create_mkl_handle(A);
266   }
267 
268   /* Call MKL SpMV2 executor routine to do the MatMult. */
269   stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,y);
270 
271   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
272   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
273   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
274   if (stat != SPARSE_STATUS_SUCCESS) {
275     PetscFunctionReturn(PETSC_ERR_LIB);
276   }
277   PetscFunctionReturn(0);
278 }
279 
280 PetscErrorCode MatMultTranspose_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
281 {
282   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
283   Mat_SeqBAIJMKL     *baijmkl=(Mat_SeqBAIJMKL*)A->spptr;
284   const PetscScalar *x;
285   PetscScalar       *y;
286   PetscErrorCode    ierr;
287   sparse_status_t   stat;
288 
289   PetscFunctionBegin;
290 
291   /* If there are no nonzero entries, zero yy and return immediately. */
292   if(!a->nz) {
293     PetscInt i;
294     PetscInt n=a->nbs;
295     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
296     for (i=0; i<n; i++) {
297       y[i] = 0.0;
298     }
299     ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
300     PetscFunctionReturn(0);
301   }
302 
303   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
304   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
305 
306   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
307    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
308    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
309   if (!baijmkl->sparse_optimized) {
310     MatSeqBAIJMKL_create_mkl_handle(A);
311   }
312 
313   /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
314   stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,y);
315 
316   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
317   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
318   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
319   if (stat != SPARSE_STATUS_SUCCESS) {
320     PetscFunctionReturn(PETSC_ERR_LIB);
321   }
322   PetscFunctionReturn(0);
323 }
324 
325 PetscErrorCode MatMultAdd_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
326 {
327   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
328   Mat_SeqBAIJMKL     *baijmkl=(Mat_SeqBAIJMKL*)A->spptr;
329   const PetscScalar  *x;
330   PetscScalar        *y,*z;
331   PetscErrorCode     ierr;
332   PetscInt           m=a->mbs*A->rmap->bs;
333   PetscInt           i;
334 
335   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
336 
337   PetscFunctionBegin;
338 
339   /* If there are no nonzero entries, set zz = yy and return immediately. */
340   if(!a->nz) {
341     PetscInt i;
342     ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
343     for (i=0; i<m; i++) {
344       z[i] = y[i];
345     }
346     ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
347     PetscFunctionReturn(0);
348   }
349 
350   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
351   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
352 
353   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
354    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
355    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
356   if (!baijmkl->sparse_optimized) {
357     MatSeqBAIJMKL_create_mkl_handle(A);
358   }
359 
360   /* Call MKL sparse BLAS routine to do the MatMult. */
361   if (zz == yy) {
362     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
363      * with alpha and beta both set to 1.0. */
364     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,1.0,z);
365   } else {
366     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
367      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
368     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,z);
369     for (i=0; i<m; i++) {
370       z[i] += y[i];
371     }
372   }
373 
374   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
375   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
376   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
377   if (stat != SPARSE_STATUS_SUCCESS) {
378     PetscFunctionReturn(PETSC_ERR_LIB);
379   }
380   PetscFunctionReturn(0);
381 }
382 
383 PetscErrorCode MatMultTransposeAdd_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
384 {
385   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
386   Mat_SeqBAIJMKL     *baijmkl=(Mat_SeqBAIJMKL*)A->spptr;
387   const PetscScalar *x;
388   PetscScalar       *y,*z;
389   PetscErrorCode    ierr;
390   PetscInt          n=a->nbs*A->rmap->bs;
391   PetscInt          i;
392 
393   /* Variables not in MatMultTransposeAdd_SeqBAIJ. */
394   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
395 
396   PetscFunctionBegin;
397 
398   /* If there are no nonzero entries, set zz = yy and return immediately. */
399   if(!a->nz) {
400     PetscInt i;
401     ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
402     for (i=0; i<n; i++) {
403       z[i] = y[i];
404     }
405     ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
406     PetscFunctionReturn(0);
407   }
408 
409   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
410   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
411 
412   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
413    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
414    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
415   if (!baijmkl->sparse_optimized) {
416     MatSeqBAIJMKL_create_mkl_handle(A);
417   }
418 
419   /* Call MKL sparse BLAS routine to do the MatMult. */
420   if (zz == yy) {
421     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
422      * with alpha and beta both set to 1.0. */
423     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,1.0,z);
424   } else {
425     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
426      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
427     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,z);
428     for (i=0; i<n; i++) {
429       z[i] += y[i];
430     }
431   }
432 
433   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
434   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
435   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
436   if (stat != SPARSE_STATUS_SUCCESS) {
437     PetscFunctionReturn(PETSC_ERR_LIB);
438   }
439   PetscFunctionReturn(0);
440 }
441 
442 PetscErrorCode MatScale_SeqBAIJMKL(Mat inA,PetscScalar alpha)
443 {
444   PetscErrorCode ierr;
445 
446   PetscFunctionBegin;
447   ierr = MatScale_SeqBAIJ(inA,alpha);CHKERRQ(ierr);
448   ierr = MatSeqBAIJMKL_create_mkl_handle(inA);CHKERRQ(ierr);
449   PetscFunctionReturn(0);
450 }
451 
452 PetscErrorCode MatDiagonalScale_SeqBAIJMKL(Mat A,Vec ll,Vec rr)
453 {
454   PetscErrorCode ierr;
455 
456   PetscFunctionBegin;
457   ierr = MatDiagonalScale_SeqBAIJ(A,ll,rr);CHKERRQ(ierr);
458   ierr = MatSeqBAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
459   PetscFunctionReturn(0);
460 }
461 
462 PetscErrorCode MatAXPY_SeqBAIJMKL(Mat Y,PetscScalar a,Mat X,MatStructure str)
463 {
464   PetscErrorCode ierr;
465 
466   PetscFunctionBegin;
467   ierr = MatAXPY_SeqBAIJ(Y,a,X,str);CHKERRQ(ierr);
468   if (str == SAME_NONZERO_PATTERN) {
469     /* MatAssemblyEnd() is not called if SAME_NONZERO_PATTERN, so we need to force update of the MKL matrix handle. */
470     ierr = MatSeqBAIJMKL_create_mkl_handle(Y);CHKERRQ(ierr);
471   }
472   PetscFunctionReturn(0);
473 }
474 /* MatConvert_SeqBAIJ_SeqBAIJMKL converts a SeqBAIJ matrix into a
475  * SeqBAIJMKL matrix.  This routine is called by the MatCreate_SeqMKLBAIJ()
476  * routine, but can also be used to convert an assembled SeqBAIJ matrix
477  * into a SeqBAIJMKL one. */
478 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
479 {
480   PetscErrorCode ierr;
481   Mat            B = *newmat;
482   Mat_SeqBAIJMKL  *baijmkl;
483   PetscBool      sametype;
484 
485   PetscFunctionBegin;
486   if (reuse == MAT_INITIAL_MATRIX) {
487     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
488   }
489 
490   ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr);
491   if (sametype) PetscFunctionReturn(0);
492 
493   ierr     = PetscNewLog(B,&baijmkl);CHKERRQ(ierr);
494   B->spptr = (void*) baijmkl;
495 
496   /* Set function pointers for methods that we inherit from BAIJ but override.
497    * We also parse some command line options below, since those determine some of the methods we point to. */
498   B->ops->assemblyend      = MatAssemblyEnd_SeqBAIJMKL;
499 
500   baijmkl->sparse_optimized = PETSC_FALSE;
501 
502   ierr = PetscObjectComposeFunction((PetscObject)B,"MatScale_SeqBAIJMKL_C",MatScale_SeqBAIJMKL);CHKERRQ(ierr);
503   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaijmkl_seqbaij_C",MatConvert_SeqBAIJMKL_SeqBAIJ);CHKERRQ(ierr);
504 
505   ierr    = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJMKL);CHKERRQ(ierr);
506   *newmat = B;
507   PetscFunctionReturn(0);
508 }
509 PetscErrorCode MatAssemblyEnd_SeqBAIJMKL(Mat A, MatAssemblyType mode)
510 {
511   PetscErrorCode  ierr;
512   PetscFunctionBegin;
513   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
514 
515   ierr = MatAssemblyEnd_SeqBAIJ(A, mode);CHKERRQ(ierr);
516   ierr = MatSeqBAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
517   A->ops->destroy          = MatDestroy_SeqBAIJMKL;
518   A->ops->mult             = MatMult_SeqBAIJMKL_SpMV2;
519   A->ops->multtranspose    = MatMultTranspose_SeqBAIJMKL_SpMV2;
520   A->ops->multadd          = MatMultAdd_SeqBAIJMKL_SpMV2;
521   A->ops->multtransposeadd = MatMultTransposeAdd_SeqBAIJMKL_SpMV2;
522   A->ops->scale            = MatScale_SeqBAIJMKL;
523   A->ops->diagonalscale    = MatDiagonalScale_SeqBAIJMKL;
524   A->ops->axpy             = MatAXPY_SeqBAIJMKL;
525   A->ops->duplicate        = MatDuplicate_SeqBAIJMKL;
526   PetscFunctionReturn(0);
527 }
528 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
529 /*@C
530    MatCreateSeqBAIJMKL - Creates a sparse matrix of type SEQBAIJMKL.
531    This type inherits from BAIJ and is largely identical, but uses sparse BLAS
532    routines from Intel MKL whenever possible.
533    MatMult, MatMultAdd, MatMultTranspose, and MatMultTransposeAdd
534    operations are currently supported.
535    If the installed version of MKL supports the "SpMV2" sparse
536    inspector-executor routines, then those are used by default.
537    Default PETSc kernels are used otherwise.
538 
539    Input Parameters:
540 +  comm - MPI communicator, set to PETSC_COMM_SELF
541 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
542           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
543 .  m - number of rows
544 .  n - number of columns
545 .  nz - number of nonzero blocks  per block row (same for all rows)
546 -  nnz - array containing the number of nonzero blocks in the various block rows
547          (possibly different for each block row) or NULL
548 
549 
550    Output Parameter:
551 .  A - the matrix
552 
553    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
554    MatXXXXSetPreallocation() paradgm instead of this routine directly.
555    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
556 
557    Options Database Keys:
558 .   -mat_no_unroll - uses code that does not unroll the loops in the
559                      block calculations (much slower)
560 .    -mat_block_size - size of the blocks to use
561 
562    Level: intermediate
563 
564    Notes:
565    The number of rows and columns must be divisible by blocksize.
566 
567    If the nnz parameter is given then the nz parameter is ignored
568 
569    A nonzero block is any block that as 1 or more nonzeros in it
570 
571    The block AIJ format is fully compatible with standard Fortran 77
572    storage.  That is, the stored row and column indices can begin at
573    either one (as in Fortran) or zero.  See the users' manual for details.
574 
575    Specify the preallocated storage with either nz or nnz (not both).
576    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
577    allocation.  See Users-Manual: ch_mat for details.
578    matrices.
579 
580 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
581 
582 @*/
583 PetscErrorCode  MatCreateSeqBAIJMKL(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
584 {
585   PetscErrorCode ierr;
586 
587   PetscFunctionBegin;
588   ierr = MatCreate(comm,A);CHKERRQ(ierr);
589   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
590 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
591   ierr = MatSetType(*A,MATSEQBAIJMKL);CHKERRQ(ierr);
592   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
593 #else
594   ierr = PetscInfo(A,"MKL baij routines are not supported for used version of MKL. Using PETSc default routines. \n Please use version of MKL 11.3 and higher. \n");
595   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
596   ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
597 #endif
598   PetscFunctionReturn(0);
599 }
600 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJMKL(Mat A)
601 {
602   PetscErrorCode ierr;
603 
604   PetscFunctionBegin;
605   ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr);
606 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
607   ierr = MatConvert_SeqBAIJ_SeqBAIJMKL(A,MATSEQBAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
608 #else
609   ierr = PetscInfo(A,"MKL baij routines are not supported for used version of MKL. Using PETSc default routines. \n Please use version of MKL 11.3 and higher. \n");
610 #endif
611   PetscFunctionReturn(0);
612 }
613