xref: /petsc/src/mat/impls/baij/seq/baijmkl/baijmkl.c (revision f13dfd9ea68e0ddeee984e65c377a1819eab8a8a)
1 /*
2   Defines basic operations for the MATSEQBAIJMKL matrix class.
3   Uses sparse BLAS operations from the Intel Math Kernel Library (MKL)
4   wherever possible. If used MKL version is older than 11.3 PETSc default
5   code for sparse matrix operations is used.
6 */
7 
8 #include <../src/mat/impls/baij/seq/baij.h>
9 #include <../src/mat/impls/baij/seq/baijmkl/baijmkl.h>
10 #if defined(PETSC_HAVE_MKL_INTEL_ILP64)
11   #define MKL_ILP64
12 #endif
13 #include <mkl_spblas.h>
14 
15 static PetscBool PetscSeqBAIJSupportsZeroBased(void)
16 {
17   static PetscBool set = PETSC_FALSE, value;
18   int              n   = 1, ia[1], ja[1];
19   float            a[1];
20   sparse_status_t  status;
21   sparse_matrix_t  A;
22 
23   if (!set) {
24     status = mkl_sparse_s_create_bsr(&A, SPARSE_INDEX_BASE_ZERO, SPARSE_LAYOUT_COLUMN_MAJOR, (MKL_INT)n, (MKL_INT)n, (MKL_INT)n, (MKL_INT *)ia, (MKL_INT *)ia, (MKL_INT *)ja, a);
25     value  = (status != SPARSE_STATUS_NOT_SUPPORTED) ? PETSC_TRUE : PETSC_FALSE;
26     (void)mkl_sparse_destroy(A);
27     set = PETSC_TRUE;
28   }
29   return value;
30 }
31 
32 typedef struct {
33   PetscBool           sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
34   sparse_matrix_t     bsrA;             /* "Handle" used by SpMV2 inspector-executor routines. */
35   struct matrix_descr descr;
36   PetscInt           *ai1;
37   PetscInt           *aj1;
38 } Mat_SeqBAIJMKL;
39 
40 static PetscErrorCode MatAssemblyEnd_SeqBAIJMKL(Mat A, MatAssemblyType mode);
41 extern PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat, MatAssemblyType);
42 
43 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJMKL_SeqBAIJ(Mat A, MatType type, MatReuse reuse, Mat *newmat)
44 {
45   /* This routine is only called to convert a MATBAIJMKL to its base PETSc type, */
46   /* so we will ignore 'MatType type'. */
47   Mat             B       = *newmat;
48   Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL *)A->spptr;
49 
50   PetscFunctionBegin;
51   if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B));
52 
53   /* Reset the original function pointers. */
54   B->ops->duplicate        = MatDuplicate_SeqBAIJ;
55   B->ops->assemblyend      = MatAssemblyEnd_SeqBAIJ;
56   B->ops->destroy          = MatDestroy_SeqBAIJ;
57   B->ops->multtranspose    = MatMultTranspose_SeqBAIJ;
58   B->ops->multtransposeadd = MatMultTransposeAdd_SeqBAIJ;
59   B->ops->scale            = MatScale_SeqBAIJ;
60   B->ops->diagonalscale    = MatDiagonalScale_SeqBAIJ;
61   B->ops->axpy             = MatAXPY_SeqBAIJ;
62 
63   switch (A->rmap->bs) {
64   case 1:
65     B->ops->mult    = MatMult_SeqBAIJ_1;
66     B->ops->multadd = MatMultAdd_SeqBAIJ_1;
67     break;
68   case 2:
69     B->ops->mult    = MatMult_SeqBAIJ_2;
70     B->ops->multadd = MatMultAdd_SeqBAIJ_2;
71     break;
72   case 3:
73     B->ops->mult    = MatMult_SeqBAIJ_3;
74     B->ops->multadd = MatMultAdd_SeqBAIJ_3;
75     break;
76   case 4:
77     B->ops->mult    = MatMult_SeqBAIJ_4;
78     B->ops->multadd = MatMultAdd_SeqBAIJ_4;
79     break;
80   case 5:
81     B->ops->mult    = MatMult_SeqBAIJ_5;
82     B->ops->multadd = MatMultAdd_SeqBAIJ_5;
83     break;
84   case 6:
85     B->ops->mult    = MatMult_SeqBAIJ_6;
86     B->ops->multadd = MatMultAdd_SeqBAIJ_6;
87     break;
88   case 7:
89     B->ops->mult    = MatMult_SeqBAIJ_7;
90     B->ops->multadd = MatMultAdd_SeqBAIJ_7;
91     break;
92   case 11:
93     B->ops->mult    = MatMult_SeqBAIJ_11;
94     B->ops->multadd = MatMultAdd_SeqBAIJ_11;
95     break;
96   case 15:
97     B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
98     B->ops->multadd = MatMultAdd_SeqBAIJ_N;
99     break;
100   default:
101     B->ops->mult    = MatMult_SeqBAIJ_N;
102     B->ops->multadd = MatMultAdd_SeqBAIJ_N;
103     break;
104   }
105   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaijmkl_seqbaij_C", NULL));
106 
107   /* Free everything in the Mat_SeqBAIJMKL data structure. Currently, this
108    * simply involves destroying the MKL sparse matrix handle and then freeing
109    * the spptr pointer. */
110   if (reuse == MAT_INITIAL_MATRIX) baijmkl = (Mat_SeqBAIJMKL *)B->spptr;
111 
112   if (baijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy, baijmkl->bsrA);
113   PetscCall(PetscFree2(baijmkl->ai1, baijmkl->aj1));
114   PetscCall(PetscFree(B->spptr));
115 
116   /* Change the type of B to MATSEQBAIJ. */
117   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJ));
118 
119   *newmat = B;
120   PetscFunctionReturn(PETSC_SUCCESS);
121 }
122 
123 static PetscErrorCode MatDestroy_SeqBAIJMKL(Mat A)
124 {
125   Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL *)A->spptr;
126 
127   PetscFunctionBegin;
128   if (baijmkl) {
129     /* Clean up everything in the Mat_SeqBAIJMKL data structure, then free A->spptr. */
130     if (baijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy, baijmkl->bsrA);
131     PetscCall(PetscFree2(baijmkl->ai1, baijmkl->aj1));
132     PetscCall(PetscFree(A->spptr));
133   }
134 
135   /* Change the type of A back to SEQBAIJ and use MatDestroy_SeqBAIJ()
136    * to destroy everything that remains. */
137   PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQBAIJ));
138   PetscCall(MatDestroy_SeqBAIJ(A));
139   PetscFunctionReturn(PETSC_SUCCESS);
140 }
141 
142 static PetscErrorCode MatSeqBAIJMKL_create_mkl_handle(Mat A)
143 {
144   Mat_SeqBAIJ    *a       = (Mat_SeqBAIJ *)A->data;
145   Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL *)A->spptr;
146   PetscInt        mbs, nbs, nz, bs;
147   MatScalar      *aa;
148   PetscInt       *aj, *ai;
149   PetscInt        i;
150 
151   PetscFunctionBegin;
152   if (baijmkl->sparse_optimized) {
153     /* Matrix has been previously assembled and optimized. Must destroy old
154      * matrix handle before running the optimization step again. */
155     PetscCall(PetscFree2(baijmkl->ai1, baijmkl->aj1));
156     PetscCallMKL(mkl_sparse_destroy(baijmkl->bsrA));
157   }
158   baijmkl->sparse_optimized = PETSC_FALSE;
159 
160   /* Now perform the SpMV2 setup and matrix optimization. */
161   baijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
162   baijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
163   baijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
164   mbs                 = a->mbs;
165   nbs                 = a->nbs;
166   nz                  = a->nz;
167   bs                  = A->rmap->bs;
168   aa                  = a->a;
169 
170   if ((nz != 0) & !A->structure_only) {
171     /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
172      * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
173     if (PetscSeqBAIJSupportsZeroBased()) {
174       aj = a->j;
175       ai = a->i;
176       PetscCallMKL(mkl_sparse_x_create_bsr(&baijmkl->bsrA, SPARSE_INDEX_BASE_ZERO, SPARSE_LAYOUT_COLUMN_MAJOR, (MKL_INT)mbs, (MKL_INT)nbs, (MKL_INT)bs, (MKL_INT *)ai, (MKL_INT *)(ai + 1), (MKL_INT *)aj, aa));
177     } else {
178       PetscCall(PetscMalloc2(mbs + 1, &ai, nz, &aj));
179       for (i = 0; i < mbs + 1; i++) ai[i] = a->i[i] + 1;
180       for (i = 0; i < nz; i++) aj[i] = a->j[i] + 1;
181       aa = a->a;
182       PetscCallMKL(mkl_sparse_x_create_bsr(&baijmkl->bsrA, SPARSE_INDEX_BASE_ONE, SPARSE_LAYOUT_COLUMN_MAJOR, (MKL_INT)mbs, (MKL_INT)nbs, (MKL_INT)bs, (MKL_INT *)ai, (MKL_INT *)(ai + 1), (MKL_INT *)aj, aa));
183       baijmkl->ai1 = ai;
184       baijmkl->aj1 = aj;
185     }
186     PetscCallMKL(mkl_sparse_set_mv_hint(baijmkl->bsrA, SPARSE_OPERATION_NON_TRANSPOSE, baijmkl->descr, 1000));
187     PetscCallMKL(mkl_sparse_set_memory_hint(baijmkl->bsrA, SPARSE_MEMORY_AGGRESSIVE));
188     PetscCallMKL(mkl_sparse_optimize(baijmkl->bsrA));
189     baijmkl->sparse_optimized = PETSC_TRUE;
190   }
191   PetscFunctionReturn(PETSC_SUCCESS);
192 }
193 
194 static PetscErrorCode MatDuplicate_SeqBAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
195 {
196   Mat_SeqBAIJMKL *baijmkl;
197   Mat_SeqBAIJMKL *baijmkl_dest;
198 
199   PetscFunctionBegin;
200   PetscCall(MatDuplicate_SeqBAIJ(A, op, M));
201   baijmkl = (Mat_SeqBAIJMKL *)A->spptr;
202   PetscCall(PetscNew(&baijmkl_dest));
203   (*M)->spptr = (void *)baijmkl_dest;
204   PetscCall(PetscMemcpy(baijmkl_dest, baijmkl, sizeof(Mat_SeqBAIJMKL)));
205   baijmkl_dest->sparse_optimized = PETSC_FALSE;
206   PetscCall(MatSeqBAIJMKL_create_mkl_handle(A));
207   PetscFunctionReturn(PETSC_SUCCESS);
208 }
209 
210 static PetscErrorCode MatMult_SeqBAIJMKL_SpMV2(Mat A, Vec xx, Vec yy)
211 {
212   Mat_SeqBAIJ       *a       = (Mat_SeqBAIJ *)A->data;
213   Mat_SeqBAIJMKL    *baijmkl = (Mat_SeqBAIJMKL *)A->spptr;
214   const PetscScalar *x;
215   PetscScalar       *y;
216 
217   PetscFunctionBegin;
218   /* If there are no nonzero entries, zero yy and return immediately. */
219   if (!a->nz) {
220     PetscCall(VecSet(yy, 0.0));
221     PetscFunctionReturn(PETSC_SUCCESS);
222   }
223 
224   PetscCall(VecGetArrayRead(xx, &x));
225   PetscCall(VecGetArray(yy, &y));
226 
227   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
228    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
229    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
230   if (!baijmkl->sparse_optimized) PetscCall(MatSeqBAIJMKL_create_mkl_handle(A));
231 
232   /* Call MKL SpMV2 executor routine to do the MatMult. */
233   PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE, 1.0, baijmkl->bsrA, baijmkl->descr, x, 0.0, y));
234 
235   PetscCall(PetscLogFlops(2.0 * a->bs2 * a->nz - a->nonzerorowcnt * A->rmap->bs));
236   PetscCall(VecRestoreArrayRead(xx, &x));
237   PetscCall(VecRestoreArray(yy, &y));
238   PetscFunctionReturn(PETSC_SUCCESS);
239 }
240 
241 static PetscErrorCode MatMultTranspose_SeqBAIJMKL_SpMV2(Mat A, Vec xx, Vec yy)
242 {
243   Mat_SeqBAIJ       *a       = (Mat_SeqBAIJ *)A->data;
244   Mat_SeqBAIJMKL    *baijmkl = (Mat_SeqBAIJMKL *)A->spptr;
245   const PetscScalar *x;
246   PetscScalar       *y;
247 
248   PetscFunctionBegin;
249   /* If there are no nonzero entries, zero yy and return immediately. */
250   if (!a->nz) {
251     PetscCall(VecSet(yy, 0.0));
252     PetscFunctionReturn(PETSC_SUCCESS);
253   }
254 
255   PetscCall(VecGetArrayRead(xx, &x));
256   PetscCall(VecGetArray(yy, &y));
257 
258   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
259    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
260    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
261   if (!baijmkl->sparse_optimized) PetscCall(MatSeqBAIJMKL_create_mkl_handle(A));
262 
263   /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
264   PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE, 1.0, baijmkl->bsrA, baijmkl->descr, x, 0.0, y));
265 
266   PetscCall(PetscLogFlops(2.0 * a->bs2 * a->nz - a->nonzerorowcnt * A->rmap->bs));
267   PetscCall(VecRestoreArrayRead(xx, &x));
268   PetscCall(VecRestoreArray(yy, &y));
269   PetscFunctionReturn(PETSC_SUCCESS);
270 }
271 
272 static PetscErrorCode MatMultAdd_SeqBAIJMKL_SpMV2(Mat A, Vec xx, Vec yy, Vec zz)
273 {
274   Mat_SeqBAIJ       *a       = (Mat_SeqBAIJ *)A->data;
275   Mat_SeqBAIJMKL    *baijmkl = (Mat_SeqBAIJMKL *)A->spptr;
276   const PetscScalar *x;
277   PetscScalar       *y, *z;
278   PetscInt           m = a->mbs * A->rmap->bs;
279   PetscInt           i;
280 
281   PetscFunctionBegin;
282   /* If there are no nonzero entries, set zz = yy and return immediately. */
283   if (!a->nz) {
284     PetscCall(VecCopy(yy, zz));
285     PetscFunctionReturn(PETSC_SUCCESS);
286   }
287 
288   PetscCall(VecGetArrayRead(xx, &x));
289   PetscCall(VecGetArrayPair(yy, zz, &y, &z));
290 
291   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
292    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
293    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
294   if (!baijmkl->sparse_optimized) PetscCall(MatSeqBAIJMKL_create_mkl_handle(A));
295 
296   /* Call MKL sparse BLAS routine to do the MatMult. */
297   if (zz == yy) {
298     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
299      * with alpha and beta both set to 1.0. */
300     PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE, 1.0, baijmkl->bsrA, baijmkl->descr, x, 1.0, z));
301   } else {
302     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
303      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
304     PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE, 1.0, baijmkl->bsrA, baijmkl->descr, x, 0.0, z));
305     for (i = 0; i < m; i++) z[i] += y[i];
306   }
307 
308   PetscCall(PetscLogFlops(2.0 * a->bs2 * a->nz));
309   PetscCall(VecRestoreArrayRead(xx, &x));
310   PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
311   PetscFunctionReturn(PETSC_SUCCESS);
312 }
313 
314 static PetscErrorCode MatMultTransposeAdd_SeqBAIJMKL_SpMV2(Mat A, Vec xx, Vec yy, Vec zz)
315 {
316   Mat_SeqBAIJ       *a       = (Mat_SeqBAIJ *)A->data;
317   Mat_SeqBAIJMKL    *baijmkl = (Mat_SeqBAIJMKL *)A->spptr;
318   const PetscScalar *x;
319   PetscScalar       *y, *z;
320   PetscInt           n = a->nbs * A->rmap->bs;
321   PetscInt           i;
322   /* Variables not in MatMultTransposeAdd_SeqBAIJ. */
323 
324   PetscFunctionBegin;
325   /* If there are no nonzero entries, set zz = yy and return immediately. */
326   if (!a->nz) {
327     PetscCall(VecCopy(yy, zz));
328     PetscFunctionReturn(PETSC_SUCCESS);
329   }
330 
331   PetscCall(VecGetArrayRead(xx, &x));
332   PetscCall(VecGetArrayPair(yy, zz, &y, &z));
333 
334   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
335    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
336    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
337   if (!baijmkl->sparse_optimized) PetscCall(MatSeqBAIJMKL_create_mkl_handle(A));
338 
339   /* Call MKL sparse BLAS routine to do the MatMult. */
340   if (zz == yy) {
341     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
342      * with alpha and beta both set to 1.0. */
343     PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE, 1.0, baijmkl->bsrA, baijmkl->descr, x, 1.0, z));
344   } else {
345     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
346      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
347     PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE, 1.0, baijmkl->bsrA, baijmkl->descr, x, 0.0, z));
348     for (i = 0; i < n; i++) z[i] += y[i];
349   }
350 
351   PetscCall(PetscLogFlops(2.0 * a->bs2 * a->nz));
352   PetscCall(VecRestoreArrayRead(xx, &x));
353   PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
354   PetscFunctionReturn(PETSC_SUCCESS);
355 }
356 
357 static PetscErrorCode MatScale_SeqBAIJMKL(Mat inA, PetscScalar alpha)
358 {
359   PetscFunctionBegin;
360   PetscCall(MatScale_SeqBAIJ(inA, alpha));
361   PetscCall(MatSeqBAIJMKL_create_mkl_handle(inA));
362   PetscFunctionReturn(PETSC_SUCCESS);
363 }
364 
365 static PetscErrorCode MatDiagonalScale_SeqBAIJMKL(Mat A, Vec ll, Vec rr)
366 {
367   PetscFunctionBegin;
368   PetscCall(MatDiagonalScale_SeqBAIJ(A, ll, rr));
369   PetscCall(MatSeqBAIJMKL_create_mkl_handle(A));
370   PetscFunctionReturn(PETSC_SUCCESS);
371 }
372 
373 static PetscErrorCode MatAXPY_SeqBAIJMKL(Mat Y, PetscScalar a, Mat X, MatStructure str)
374 {
375   PetscFunctionBegin;
376   PetscCall(MatAXPY_SeqBAIJ(Y, a, X, str));
377   if (str == SAME_NONZERO_PATTERN) {
378     /* MatAssemblyEnd() is not called if SAME_NONZERO_PATTERN, so we need to force update of the MKL matrix handle. */
379     PetscCall(MatSeqBAIJMKL_create_mkl_handle(Y));
380   }
381   PetscFunctionReturn(PETSC_SUCCESS);
382 }
383 /* MatConvert_SeqBAIJ_SeqBAIJMKL converts a SeqBAIJ matrix into a
384  * SeqBAIJMKL matrix.  This routine is called by the MatCreate_SeqMKLBAIJ()
385  * routine, but can also be used to convert an assembled SeqBAIJ matrix
386  * into a SeqBAIJMKL one. */
387 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat A, MatType type, MatReuse reuse, Mat *newmat)
388 {
389   Mat             B = *newmat;
390   Mat_SeqBAIJMKL *baijmkl;
391   PetscBool       sametype;
392 
393   PetscFunctionBegin;
394   if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B));
395 
396   PetscCall(PetscObjectTypeCompare((PetscObject)A, type, &sametype));
397   if (sametype) PetscFunctionReturn(PETSC_SUCCESS);
398 
399   PetscCall(PetscNew(&baijmkl));
400   B->spptr = (void *)baijmkl;
401 
402   /* Set function pointers for methods that we inherit from BAIJ but override.
403    * We also parse some command line options below, since those determine some of the methods we point to. */
404   B->ops->assemblyend = MatAssemblyEnd_SeqBAIJMKL;
405 
406   baijmkl->sparse_optimized = PETSC_FALSE;
407 
408   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatScale_C", MatScale_SeqBAIJMKL));
409   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaijmkl_seqbaij_C", MatConvert_SeqBAIJMKL_SeqBAIJ));
410 
411   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJMKL));
412   *newmat = B;
413   PetscFunctionReturn(PETSC_SUCCESS);
414 }
415 
416 static PetscErrorCode MatAssemblyEnd_SeqBAIJMKL(Mat A, MatAssemblyType mode)
417 {
418   PetscFunctionBegin;
419   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(PETSC_SUCCESS);
420   PetscCall(MatAssemblyEnd_SeqBAIJ(A, mode));
421   PetscCall(MatSeqBAIJMKL_create_mkl_handle(A));
422   A->ops->destroy          = MatDestroy_SeqBAIJMKL;
423   A->ops->mult             = MatMult_SeqBAIJMKL_SpMV2;
424   A->ops->multtranspose    = MatMultTranspose_SeqBAIJMKL_SpMV2;
425   A->ops->multadd          = MatMultAdd_SeqBAIJMKL_SpMV2;
426   A->ops->multtransposeadd = MatMultTransposeAdd_SeqBAIJMKL_SpMV2;
427   A->ops->scale            = MatScale_SeqBAIJMKL;
428   A->ops->diagonalscale    = MatDiagonalScale_SeqBAIJMKL;
429   A->ops->axpy             = MatAXPY_SeqBAIJMKL;
430   A->ops->duplicate        = MatDuplicate_SeqBAIJMKL;
431   PetscFunctionReturn(PETSC_SUCCESS);
432 }
433 
434 /*@C
435   MatCreateSeqBAIJMKL - Creates a sparse matrix of type `MATSEQBAIJMKL`.
436   This type inherits from `MATSEQBAIJ` and is largely identical, but uses sparse BLAS
437   routines from Intel MKL whenever possible.
438 
439   Input Parameters:
440 + comm - MPI communicator, set to `PETSC_COMM_SELF`
441 . bs   - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
442           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
443 . m    - number of rows
444 . n    - number of columns
445 . nz   - number of nonzero blocks  per block row (same for all rows)
446 - nnz  - array containing the number of nonzero blocks in the various block rows
447          (possibly different for each block row) or `NULL`
448 
449   Output Parameter:
450 . A - the matrix
451 
452    It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
453    MatXXXXSetPreallocation() paradigm instead of this routine directly.
454    [MatXXXXSetPreallocation() is, for example, `MatSeqBAIJSetPreallocation()`]
455 
456   Options Database Keys:
457 + -mat_no_unroll  - uses code that does not unroll the loops in the block calculations (much slower)
458 - -mat_block_size - size of the blocks to use
459 
460   Level: intermediate
461 
462   Notes:
463   The number of rows and columns must be divisible by blocksize.
464 
465   If the `nnz` parameter is given then the `nz` parameter is ignored
466 
467   A nonzero block is any block that as 1 or more nonzeros in it
468 
469   `MatMult()`, `MatMultAdd()`, `MatMultTranspose()`, and `MatMultTransposeAdd()`
470   operations are currently supported.
471   If the installed version of MKL supports the "SpMV2" sparse
472   inspector-executor routines, then those are used by default.
473   Default PETSc kernels are used otherwise.
474 
475   The `MATSEQBAIJ` format is fully compatible with standard Fortran
476   storage.  That is, the stored row and column indices can begin at
477   either one (as in Fortran) or zero.  See the users' manual for details.
478 
479   Specify the preallocated storage with either nz or nnz (not both).
480   Set nz = `PETSC_DEFAULT` and nnz = NULL for PETSc to control dynamic memory
481   allocation.  See [Sparse Matrices](sec_matsparse) for details.
482   matrices.
483 
484 .seealso: [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`
485 @*/
486 PetscErrorCode MatCreateSeqBAIJMKL(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A)
487 {
488   PetscFunctionBegin;
489   PetscCall(MatCreate(comm, A));
490   PetscCall(MatSetSizes(*A, m, n, m, n));
491   PetscCall(MatSetType(*A, MATSEQBAIJMKL));
492   PetscCall(MatSeqBAIJSetPreallocation_SeqBAIJ(*A, bs, nz, (PetscInt *)nnz));
493   PetscFunctionReturn(PETSC_SUCCESS);
494 }
495 
496 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJMKL(Mat A)
497 {
498   PetscFunctionBegin;
499   PetscCall(MatSetType(A, MATSEQBAIJ));
500   PetscCall(MatConvert_SeqBAIJ_SeqBAIJMKL(A, MATSEQBAIJMKL, MAT_INPLACE_MATRIX, &A));
501   PetscFunctionReturn(PETSC_SUCCESS);
502 }
503