xref: /petsc/src/mat/impls/baij/seq/baijmkl/baijmkl.c (revision 5e71baeff2f3138f93cd4f5927dfd596eb8325cc)
1 /*
2   Defines basic operations for the MATSEQBAIJMKL matrix class.
3   Uses sparse BLAS operations from the Intel Math Kernel Library (MKL)
4   wherever possible. If used MKL verion is older than 11.3 PETSc default
5   code for sparse matrix operations is used.
6 */
7 
8 #include <../src/mat/impls/baij/seq/baij.h>
9 #include <../src/mat/impls/baij/seq/baijmkl/baijmkl.h>
10 
11 
12 /* MKL include files. */
13 #include <mkl_spblas.h>  /* Sparse BLAS */
14 
15 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
16 typedef struct {
17   PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
18   sparse_matrix_t bsrA; /* "Handle" used by SpMV2 inspector-executor routines. */
19   struct matrix_descr descr;
20 #ifndef PETSC_MKL_SUPPORTS_BAIJ_ZERO_BASED
21   PetscInt *ai1;
22   PetscInt *aj1;
23 #endif
24 } Mat_SeqBAIJMKL;
25 
26 PetscErrorCode MatAssemblyEnd_SeqBAIJMKL(Mat A, MatAssemblyType mode);
27 extern PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat,MatAssemblyType);
28 
29 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJMKL_SeqBAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat)
30 {
31   /* This routine is only called to convert a MATBAIJMKL to its base PETSc type, */
32   /* so we will ignore 'MatType type'. */
33   PetscErrorCode ierr;
34   Mat            B        = *newmat;
35   Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL*)A->spptr;
36 
37   PetscFunctionBegin;
38   if (reuse == MAT_INITIAL_MATRIX) {
39     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
40   }
41 
42   /* Reset the original function pointers. */
43   B->ops->duplicate        = MatDuplicate_SeqBAIJ;
44   B->ops->assemblyend      = MatAssemblyEnd_SeqBAIJ;
45   B->ops->destroy          = MatDestroy_SeqBAIJ;
46   B->ops->multtranspose    = MatMultTranspose_SeqBAIJ;
47   B->ops->multtransposeadd = MatMultTransposeAdd_SeqBAIJ;
48   B->ops->scale            = MatScale_SeqBAIJ;
49   B->ops->diagonalscale    = MatDiagonalScale_SeqBAIJ;
50   B->ops->axpy             = MatAXPY_SeqBAIJ;
51 
52   switch (A->rmap->bs) {
53     case 1:
54       B->ops->mult    = MatMult_SeqBAIJ_1;
55       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
56       break;
57     case 2:
58       B->ops->mult    = MatMult_SeqBAIJ_2;
59       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
60       break;
61     case 3:
62       B->ops->mult    = MatMult_SeqBAIJ_3;
63       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
64       break;
65     case 4:
66       B->ops->mult    = MatMult_SeqBAIJ_4;
67       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
68       break;
69     case 5:
70       B->ops->mult    = MatMult_SeqBAIJ_5;
71       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
72       break;
73     case 6:
74       B->ops->mult    = MatMult_SeqBAIJ_6;
75       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
76       break;
77     case 7:
78       B->ops->mult    = MatMult_SeqBAIJ_7;
79       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
80       break;
81     case 11:
82       B->ops->mult    = MatMult_SeqBAIJ_11;
83       B->ops->multadd = MatMultAdd_SeqBAIJ_11;
84       break;
85     case 15:
86       B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
87       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
88       break;
89     default:
90       B->ops->mult    = MatMult_SeqBAIJ_N;
91       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
92       break;
93   }
94   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaijmkl_seqbaij_C",NULL);CHKERRQ(ierr);
95 
96   /* Free everything in the Mat_SeqBAIJMKL data structure. Currently, this
97    * simply involves destroying the MKL sparse matrix handle and then freeing
98    * the spptr pointer. */
99   if (reuse == MAT_INITIAL_MATRIX) baijmkl = (Mat_SeqBAIJMKL*)B->spptr;
100 
101   if (baijmkl->sparse_optimized) {
102     sparse_status_t stat;
103     stat = mkl_sparse_destroy(baijmkl->bsrA);
104     if (stat != SPARSE_STATUS_SUCCESS) {
105       PetscFunctionReturn(PETSC_ERR_LIB);
106     }
107   }
108 #ifndef PETSC_MKL_SUPPORTS_BAIJ_ZERO_BASED
109    ierr = PetscFree(baijmkl->ai1);CHKERRQ(ierr);
110    ierr = PetscFree(baijmkl->aj1);CHKERRQ(ierr);
111 #endif
112   ierr = PetscFree(B->spptr);CHKERRQ(ierr);
113 
114   /* Change the type of B to MATSEQBAIJ. */
115   ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJ);CHKERRQ(ierr);
116 
117   *newmat = B;
118   PetscFunctionReturn(0);
119 }
120 
121 PetscErrorCode MatDestroy_SeqBAIJMKL(Mat A)
122 {
123   PetscErrorCode ierr;
124   Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL*) A->spptr;
125 
126   PetscFunctionBegin;
127   if (baijmkl) {
128     /* Clean up everything in the Mat_SeqBAIJMKL data structure, then free A->spptr. */
129     if (baijmkl->sparse_optimized) {
130       sparse_status_t stat = SPARSE_STATUS_SUCCESS;
131       stat = mkl_sparse_destroy(baijmkl->bsrA);
132       if (stat != SPARSE_STATUS_SUCCESS) {
133         PetscFunctionReturn(PETSC_ERR_LIB);
134       }
135     }
136 #ifndef PETSC_MKL_SUPPORTS_BAIJ_ZERO_BASED
137    ierr = PetscFree(baijmkl->ai1);CHKERRQ(ierr);
138    ierr = PetscFree(baijmkl->aj1);CHKERRQ(ierr);
139 #endif
140     ierr = PetscFree(A->spptr);CHKERRQ(ierr);
141   }
142 
143   /* Change the type of A back to SEQBAIJ and use MatDestroy_SeqBAIJ()
144    * to destroy everything that remains. */
145   ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQBAIJ);CHKERRQ(ierr);
146   ierr = MatDestroy_SeqBAIJ(A);CHKERRQ(ierr);
147   PetscFunctionReturn(0);
148 }
149 
150 PETSC_INTERN PetscErrorCode MatSeqBAIJMKL_create_mkl_handle(Mat A)
151 {
152   PetscErrorCode ierr;
153   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
154   Mat_SeqBAIJMKL  *baijmkl = (Mat_SeqBAIJMKL*)A->spptr;
155   PetscInt        mbs, nbs, nz, bs, i;
156   MatScalar       *aa;
157   PetscInt        *aj,*ai;
158   sparse_status_t stat;
159 
160   PetscFunctionBegin;
161   if (baijmkl->sparse_optimized) {
162     /* Matrix has been previously assembled and optimized. Must destroy old
163      * matrix handle before running the optimization step again. */
164 #ifndef PETSC_MKL_SUPPORTS_BAIJ_ZERO_BASED
165     ierr = PetscFree(baijmkl->ai1);CHKERRQ(ierr);
166     ierr = PetscFree(baijmkl->aj1);CHKERRQ(ierr);
167 #endif
168     stat = mkl_sparse_destroy(baijmkl->bsrA);CHKERRMKL(stat);
169   }
170   baijmkl->sparse_optimized = PETSC_FALSE;
171 
172   /* Now perform the SpMV2 setup and matrix optimization. */
173   baijmkl->descr.type        = SPARSE_MATRIX_TYPE_GENERAL;
174   baijmkl->descr.mode        = SPARSE_FILL_MODE_LOWER;
175   baijmkl->descr.diag        = SPARSE_DIAG_NON_UNIT;
176   mbs = a->mbs;
177   nbs = a->nbs;
178   nz  = a->nz;
179   bs  = A->rmap->bs;
180   aa  = a->a;
181 
182   if ((nz!=0) & !(A->structure_only)) {
183     /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
184      * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
185 #ifdef PETSC_MKL_SUPPORTS_BAIJ_ZERO_BASED
186     aj   = a->j;
187     ai   = a->i;
188     stat = mkl_sparse_x_create_bsr(&(baijmkl->bsrA),SPARSE_INDEX_BASE_ZERO,SPARSE_LAYOUT_COLUMN_MAJOR,mbs,nbs,bs,ai,ai+1,aj,aa);CHKERRMKL(stat);
189 #else
190     ierr = PetscMalloc1(mbs+1,&ai);CHKERRQ(ierr);
191     ierr = PetscMalloc1(nz,&aj);CHKERRQ(ierr);
192     for (i=0;i<mbs+1;i++)
193       ai[i] = a->i[i]+1;
194     for (i=0;i<nz;i++)
195       aj[i] = a->j[i]+1;
196     aa   = a->a;
197     stat = mkl_sparse_x_create_bsr(&baijmkl->bsrA,SPARSE_INDEX_BASE_ONE,SPARSE_LAYOUT_COLUMN_MAJOR,mbs,nbs,bs,ai,ai+1,aj,aa);CHKERRMKL(stat);
198     baijmkl->ai1 = ai;
199     baijmkl->aj1 = aj;
200 #endif
201     stat = mkl_sparse_set_mv_hint(baijmkl->bsrA,SPARSE_OPERATION_NON_TRANSPOSE,baijmkl->descr,1000);CHKERRMKL(stat);
202     stat = mkl_sparse_set_memory_hint(baijmkl->bsrA,SPARSE_MEMORY_AGGRESSIVE);CHKERRMKL(stat);
203     stat = mkl_sparse_optimize(baijmkl->bsrA);CHKERRMKL(stat);
204     baijmkl->sparse_optimized = PETSC_TRUE;
205   }
206   PetscFunctionReturn(0);
207 }
208 
209 PetscErrorCode MatDuplicate_SeqBAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
210 {
211   PetscErrorCode ierr;
212   Mat_SeqBAIJMKL *baijmkl;
213   Mat_SeqBAIJMKL *baijmkl_dest;
214 
215   PetscFunctionBegin;
216   ierr = MatDuplicate_SeqBAIJ(A,op,M);CHKERRQ(ierr);
217   baijmkl = (Mat_SeqBAIJMKL*) A->spptr;
218   ierr = PetscNewLog((*M),&baijmkl_dest);CHKERRQ(ierr);
219   (*M)->spptr = (void*)baijmkl_dest;
220   ierr = PetscMemcpy(baijmkl_dest,baijmkl,sizeof(Mat_SeqBAIJMKL));CHKERRQ(ierr);
221   baijmkl_dest->sparse_optimized = PETSC_FALSE;
222   ierr = MatSeqBAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
223   PetscFunctionReturn(0);
224 }
225 
226 PetscErrorCode MatMult_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
227 {
228   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
229   Mat_SeqBAIJMKL     *baijmkl=(Mat_SeqBAIJMKL*)A->spptr;
230   const PetscScalar  *x;
231   PetscScalar        *y;
232   PetscErrorCode     ierr;
233   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
234 
235   PetscFunctionBegin;
236   /* If there are no nonzero entries, zero yy and return immediately. */
237   if(!a->nz) {
238     PetscInt i;
239     PetscInt m=a->mbs*A->rmap->bs;
240     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
241     for (i=0; i<m; i++) {
242       y[i] = 0.0;
243     }
244     ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
245     PetscFunctionReturn(0);
246   }
247 
248   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
249   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
250 
251   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
252    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
253    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
254   if (!baijmkl->sparse_optimized) {
255     ierr = MatSeqBAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
256   }
257 
258   /* Call MKL SpMV2 executor routine to do the MatMult. */
259   stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,y);CHKERRMKL(stat);
260 
261   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
262   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
263   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
264   PetscFunctionReturn(0);
265 }
266 
267 PetscErrorCode MatMultTranspose_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
268 {
269   Mat_SeqBAIJ       *a       = (Mat_SeqBAIJ*)A->data;
270   Mat_SeqBAIJMKL    *baijmkl = (Mat_SeqBAIJMKL*)A->spptr;
271   const PetscScalar *x;
272   PetscScalar       *y;
273   PetscErrorCode    ierr;
274   sparse_status_t   stat;
275 
276   PetscFunctionBegin;
277   /* If there are no nonzero entries, zero yy and return immediately. */
278   if(!a->nz) {
279     PetscInt i;
280     PetscInt n=a->nbs;
281     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
282     for (i=0; i<n; i++) {
283       y[i] = 0.0;
284     }
285     ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
286     PetscFunctionReturn(0);
287   }
288 
289   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
290   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
291 
292   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
293    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
294    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
295   if (!baijmkl->sparse_optimized) {
296     ierr = MatSeqBAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
297   }
298 
299   /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
300   stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,y);CHKERRMKL(stat);
301 
302   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
303   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
304   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
305   PetscFunctionReturn(0);
306 }
307 
308 PetscErrorCode MatMultAdd_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
309 {
310   Mat_SeqBAIJ        *a       = (Mat_SeqBAIJ*)A->data;
311   Mat_SeqBAIJMKL     *baijmkl = (Mat_SeqBAIJMKL*)A->spptr;
312   const PetscScalar  *x;
313   PetscScalar        *y,*z;
314   PetscErrorCode     ierr;
315   PetscInt           m=a->mbs*A->rmap->bs;
316   PetscInt           i;
317 
318   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
319 
320   PetscFunctionBegin;
321   /* If there are no nonzero entries, set zz = yy and return immediately. */
322   if(!a->nz) {
323     PetscInt i;
324     ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
325     for (i=0; i<m; i++) {
326       z[i] = y[i];
327     }
328     ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
329     PetscFunctionReturn(0);
330   }
331 
332   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
333   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
334 
335   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
336    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
337    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
338   if (!baijmkl->sparse_optimized) {
339     ierr = MatSeqBAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
340   }
341 
342   /* Call MKL sparse BLAS routine to do the MatMult. */
343   if (zz == yy) {
344     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
345      * with alpha and beta both set to 1.0. */
346     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,1.0,z);CHKERRMKL(stat);
347   } else {
348     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
349      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
350     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,z);CHKERRMKL(stat);
351     for (i=0; i<m; i++) {
352       z[i] += y[i];
353     }
354   }
355 
356   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
357   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
358   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
359   PetscFunctionReturn(0);
360 }
361 
362 PetscErrorCode MatMultTransposeAdd_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
363 {
364   Mat_SeqBAIJ       *a       = (Mat_SeqBAIJ*)A->data;
365   Mat_SeqBAIJMKL    *baijmkl = (Mat_SeqBAIJMKL*)A->spptr;
366   const PetscScalar *x;
367   PetscScalar       *y,*z;
368   PetscErrorCode    ierr;
369   PetscInt          n=a->nbs*A->rmap->bs;
370   PetscInt          i;
371   /* Variables not in MatMultTransposeAdd_SeqBAIJ. */
372   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
373 
374   PetscFunctionBegin;
375   /* If there are no nonzero entries, set zz = yy and return immediately. */
376   if(!a->nz) {
377     PetscInt i;
378     ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
379     for (i=0; i<n; i++) {
380       z[i] = y[i];
381     }
382     ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
383     PetscFunctionReturn(0);
384   }
385 
386   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
387   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
388 
389   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
390    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
391    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
392   if (!baijmkl->sparse_optimized) {
393     ierr = MatSeqBAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
394   }
395 
396   /* Call MKL sparse BLAS routine to do the MatMult. */
397   if (zz == yy) {
398     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
399      * with alpha and beta both set to 1.0. */
400     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,1.0,z);CHKERRMKL(stat);
401   } else {
402     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
403      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
404     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,z);CHKERRMKL(stat);
405     for (i=0; i<n; i++) {
406       z[i] += y[i];
407     }
408   }
409 
410   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
411   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
412   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
413   PetscFunctionReturn(0);
414 }
415 
416 PetscErrorCode MatScale_SeqBAIJMKL(Mat inA,PetscScalar alpha)
417 {
418   PetscErrorCode ierr;
419 
420   PetscFunctionBegin;
421   ierr = MatScale_SeqBAIJ(inA,alpha);CHKERRQ(ierr);
422   ierr = MatSeqBAIJMKL_create_mkl_handle(inA);CHKERRQ(ierr);
423   PetscFunctionReturn(0);
424 }
425 
426 PetscErrorCode MatDiagonalScale_SeqBAIJMKL(Mat A,Vec ll,Vec rr)
427 {
428   PetscErrorCode ierr;
429 
430   PetscFunctionBegin;
431   ierr = MatDiagonalScale_SeqBAIJ(A,ll,rr);CHKERRQ(ierr);
432   ierr = MatSeqBAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
433   PetscFunctionReturn(0);
434 }
435 
436 PetscErrorCode MatAXPY_SeqBAIJMKL(Mat Y,PetscScalar a,Mat X,MatStructure str)
437 {
438   PetscErrorCode ierr;
439 
440   PetscFunctionBegin;
441   ierr = MatAXPY_SeqBAIJ(Y,a,X,str);CHKERRQ(ierr);
442   if (str == SAME_NONZERO_PATTERN) {
443     /* MatAssemblyEnd() is not called if SAME_NONZERO_PATTERN, so we need to force update of the MKL matrix handle. */
444     ierr = MatSeqBAIJMKL_create_mkl_handle(Y);CHKERRQ(ierr);
445   }
446   PetscFunctionReturn(0);
447 }
448 /* MatConvert_SeqBAIJ_SeqBAIJMKL converts a SeqBAIJ matrix into a
449  * SeqBAIJMKL matrix.  This routine is called by the MatCreate_SeqMKLBAIJ()
450  * routine, but can also be used to convert an assembled SeqBAIJ matrix
451  * into a SeqBAIJMKL one. */
452 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
453 {
454   PetscErrorCode ierr;
455   Mat            B = *newmat;
456   Mat_SeqBAIJMKL *baijmkl;
457   PetscBool      sametype;
458 
459   PetscFunctionBegin;
460   if (reuse == MAT_INITIAL_MATRIX) {
461     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
462   }
463 
464   ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr);
465   if (sametype) PetscFunctionReturn(0);
466 
467   ierr     = PetscNewLog(B,&baijmkl);CHKERRQ(ierr);
468   B->spptr = (void*)baijmkl;
469 
470   /* Set function pointers for methods that we inherit from BAIJ but override.
471    * We also parse some command line options below, since those determine some of the methods we point to. */
472   B->ops->assemblyend      = MatAssemblyEnd_SeqBAIJMKL;
473 
474   baijmkl->sparse_optimized = PETSC_FALSE;
475 
476   ierr = PetscObjectComposeFunction((PetscObject)B,"MatScale_SeqBAIJMKL_C",MatScale_SeqBAIJMKL);CHKERRQ(ierr);
477   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaijmkl_seqbaij_C",MatConvert_SeqBAIJMKL_SeqBAIJ);CHKERRQ(ierr);
478 
479   ierr    = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJMKL);CHKERRQ(ierr);
480   *newmat = B;
481   PetscFunctionReturn(0);
482 }
483 PetscErrorCode MatAssemblyEnd_SeqBAIJMKL(Mat A, MatAssemblyType mode)
484 {
485   PetscErrorCode  ierr;
486   PetscFunctionBegin;
487   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
488   ierr = MatAssemblyEnd_SeqBAIJ(A, mode);CHKERRQ(ierr);
489   ierr = MatSeqBAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
490   A->ops->destroy          = MatDestroy_SeqBAIJMKL;
491   A->ops->mult             = MatMult_SeqBAIJMKL_SpMV2;
492   A->ops->multtranspose    = MatMultTranspose_SeqBAIJMKL_SpMV2;
493   A->ops->multadd          = MatMultAdd_SeqBAIJMKL_SpMV2;
494   A->ops->multtransposeadd = MatMultTransposeAdd_SeqBAIJMKL_SpMV2;
495   A->ops->scale            = MatScale_SeqBAIJMKL;
496   A->ops->diagonalscale    = MatDiagonalScale_SeqBAIJMKL;
497   A->ops->axpy             = MatAXPY_SeqBAIJMKL;
498   A->ops->duplicate        = MatDuplicate_SeqBAIJMKL;
499   PetscFunctionReturn(0);
500 }
501 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
502 /*@C
503    MatCreateSeqBAIJMKL - Creates a sparse matrix of type SEQBAIJMKL.
504    This type inherits from BAIJ and is largely identical, but uses sparse BLAS
505    routines from Intel MKL whenever possible.
506    MatMult, MatMultAdd, MatMultTranspose, and MatMultTransposeAdd
507    operations are currently supported.
508    If the installed version of MKL supports the "SpMV2" sparse
509    inspector-executor routines, then those are used by default.
510    Default PETSc kernels are used otherwise.
511 
512    Input Parameters:
513 +  comm - MPI communicator, set to PETSC_COMM_SELF
514 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
515           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
516 .  m - number of rows
517 .  n - number of columns
518 .  nz - number of nonzero blocks  per block row (same for all rows)
519 -  nnz - array containing the number of nonzero blocks in the various block rows
520          (possibly different for each block row) or NULL
521 
522 
523    Output Parameter:
524 .  A - the matrix
525 
526    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
527    MatXXXXSetPreallocation() paradgm instead of this routine directly.
528    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
529 
530    Options Database Keys:
531 .   -mat_no_unroll - uses code that does not unroll the loops in the
532                      block calculations (much slower)
533 .    -mat_block_size - size of the blocks to use
534 
535    Level: intermediate
536 
537    Notes:
538    The number of rows and columns must be divisible by blocksize.
539 
540    If the nnz parameter is given then the nz parameter is ignored
541 
542    A nonzero block is any block that as 1 or more nonzeros in it
543 
544    The block AIJ format is fully compatible with standard Fortran 77
545    storage.  That is, the stored row and column indices can begin at
546    either one (as in Fortran) or zero.  See the users' manual for details.
547 
548    Specify the preallocated storage with either nz or nnz (not both).
549    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
550    allocation.  See Users-Manual: ch_mat for details.
551    matrices.
552 
553 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
554 
555 @*/
556 PetscErrorCode  MatCreateSeqBAIJMKL(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
557 {
558   PetscErrorCode ierr;
559 
560   PetscFunctionBegin;
561   ierr = MatCreate(comm,A);CHKERRQ(ierr);
562   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
563 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
564   ierr = MatSetType(*A,MATSEQBAIJMKL);CHKERRQ(ierr);
565   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
566 #else
567   ierr = PetscInfo(A,"MKL baij routines are not supported for used version of MKL. Using PETSc default routines. \n Please use version of MKL 11.3 and higher. \n");
568   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
569   ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
570 #endif
571   PetscFunctionReturn(0);
572 }
573 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJMKL(Mat A)
574 {
575   PetscErrorCode ierr;
576 
577   PetscFunctionBegin;
578   ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr);
579 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
580   ierr = MatConvert_SeqBAIJ_SeqBAIJMKL(A,MATSEQBAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
581 #else
582   ierr = PetscInfo(A,"MKL baij routines are not supported for used version of MKL. Using PETSc default routines. \n Please use version of MKL 11.3 and higher. \n");
583 #endif
584   PetscFunctionReturn(0);
585 }
586