xref: /petsc/src/mat/impls/baij/seq/baijmkl/baijmkl.c (revision bd412c90fba8895e9763ccee76c7dd2e09a982d7)
1 /*
2   Defines basic operations for the MATSEQBAIJMKL matrix class.
3   Uses sparse BLAS operations from the Intel Math Kernel Library (MKL)
4   wherever possible. If used MKL verion is older than 11.3 PETSc default
5   code for sparse matrix operations is used.
6 */
7 
8 #include <../src/mat/impls/baij/seq/baij.h>
9 #include <../src/mat/impls/baij/seq/baijmkl/baijmkl.h>
10 #include <mkl_spblas.h>
11 
12 static PetscBool PetscSeqBAIJSupportsZeroBased(void)
13 {
14   static PetscBool set = PETSC_FALSE, value;
15   int              n   = 1, ia[1], ja[1];
16   float            a[1];
17   sparse_status_t  status;
18   sparse_matrix_t  A;
19 
20   if (!set) {
21     status = mkl_sparse_s_create_bsr(&A, SPARSE_INDEX_BASE_ZERO, SPARSE_LAYOUT_COLUMN_MAJOR, n, n, n, ia, ia, ja, a);
22     value  = (status != SPARSE_STATUS_NOT_SUPPORTED) ? PETSC_TRUE : PETSC_FALSE;
23     (void)mkl_sparse_destroy(A);
24     set = PETSC_TRUE;
25   }
26   return value;
27 }
28 
29 typedef struct {
30   PetscBool           sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
31   sparse_matrix_t     bsrA;             /* "Handle" used by SpMV2 inspector-executor routines. */
32   struct matrix_descr descr;
33   PetscInt           *ai1;
34   PetscInt           *aj1;
35 } Mat_SeqBAIJMKL;
36 
37 static PetscErrorCode MatAssemblyEnd_SeqBAIJMKL(Mat A, MatAssemblyType mode);
38 extern PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat, MatAssemblyType);
39 
40 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJMKL_SeqBAIJ(Mat A, MatType type, MatReuse reuse, Mat *newmat)
41 {
42   /* This routine is only called to convert a MATBAIJMKL to its base PETSc type, */
43   /* so we will ignore 'MatType type'. */
44   Mat             B       = *newmat;
45   Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL *)A->spptr;
46 
47   PetscFunctionBegin;
48   if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B));
49 
50   /* Reset the original function pointers. */
51   B->ops->duplicate        = MatDuplicate_SeqBAIJ;
52   B->ops->assemblyend      = MatAssemblyEnd_SeqBAIJ;
53   B->ops->destroy          = MatDestroy_SeqBAIJ;
54   B->ops->multtranspose    = MatMultTranspose_SeqBAIJ;
55   B->ops->multtransposeadd = MatMultTransposeAdd_SeqBAIJ;
56   B->ops->scale            = MatScale_SeqBAIJ;
57   B->ops->diagonalscale    = MatDiagonalScale_SeqBAIJ;
58   B->ops->axpy             = MatAXPY_SeqBAIJ;
59 
60   switch (A->rmap->bs) {
61   case 1:
62     B->ops->mult    = MatMult_SeqBAIJ_1;
63     B->ops->multadd = MatMultAdd_SeqBAIJ_1;
64     break;
65   case 2:
66     B->ops->mult    = MatMult_SeqBAIJ_2;
67     B->ops->multadd = MatMultAdd_SeqBAIJ_2;
68     break;
69   case 3:
70     B->ops->mult    = MatMult_SeqBAIJ_3;
71     B->ops->multadd = MatMultAdd_SeqBAIJ_3;
72     break;
73   case 4:
74     B->ops->mult    = MatMult_SeqBAIJ_4;
75     B->ops->multadd = MatMultAdd_SeqBAIJ_4;
76     break;
77   case 5:
78     B->ops->mult    = MatMult_SeqBAIJ_5;
79     B->ops->multadd = MatMultAdd_SeqBAIJ_5;
80     break;
81   case 6:
82     B->ops->mult    = MatMult_SeqBAIJ_6;
83     B->ops->multadd = MatMultAdd_SeqBAIJ_6;
84     break;
85   case 7:
86     B->ops->mult    = MatMult_SeqBAIJ_7;
87     B->ops->multadd = MatMultAdd_SeqBAIJ_7;
88     break;
89   case 11:
90     B->ops->mult    = MatMult_SeqBAIJ_11;
91     B->ops->multadd = MatMultAdd_SeqBAIJ_11;
92     break;
93   case 15:
94     B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
95     B->ops->multadd = MatMultAdd_SeqBAIJ_N;
96     break;
97   default:
98     B->ops->mult    = MatMult_SeqBAIJ_N;
99     B->ops->multadd = MatMultAdd_SeqBAIJ_N;
100     break;
101   }
102   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaijmkl_seqbaij_C", NULL));
103 
104   /* Free everything in the Mat_SeqBAIJMKL data structure. Currently, this
105    * simply involves destroying the MKL sparse matrix handle and then freeing
106    * the spptr pointer. */
107   if (reuse == MAT_INITIAL_MATRIX) baijmkl = (Mat_SeqBAIJMKL *)B->spptr;
108 
109   if (baijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy, baijmkl->bsrA);
110   PetscCall(PetscFree2(baijmkl->ai1, baijmkl->aj1));
111   PetscCall(PetscFree(B->spptr));
112 
113   /* Change the type of B to MATSEQBAIJ. */
114   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJ));
115 
116   *newmat = B;
117   PetscFunctionReturn(0);
118 }
119 
120 static PetscErrorCode MatDestroy_SeqBAIJMKL(Mat A)
121 {
122   Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL *)A->spptr;
123 
124   PetscFunctionBegin;
125   if (baijmkl) {
126     /* Clean up everything in the Mat_SeqBAIJMKL data structure, then free A->spptr. */
127     if (baijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy, baijmkl->bsrA);
128     PetscCall(PetscFree2(baijmkl->ai1, baijmkl->aj1));
129     PetscCall(PetscFree(A->spptr));
130   }
131 
132   /* Change the type of A back to SEQBAIJ and use MatDestroy_SeqBAIJ()
133    * to destroy everything that remains. */
134   PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQBAIJ));
135   PetscCall(MatDestroy_SeqBAIJ(A));
136   PetscFunctionReturn(0);
137 }
138 
139 static PetscErrorCode MatSeqBAIJMKL_create_mkl_handle(Mat A)
140 {
141   Mat_SeqBAIJ    *a       = (Mat_SeqBAIJ *)A->data;
142   Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL *)A->spptr;
143   PetscInt        mbs, nbs, nz, bs;
144   MatScalar      *aa;
145   PetscInt       *aj, *ai;
146   PetscInt        i;
147 
148   PetscFunctionBegin;
149   if (baijmkl->sparse_optimized) {
150     /* Matrix has been previously assembled and optimized. Must destroy old
151      * matrix handle before running the optimization step again. */
152     PetscCall(PetscFree2(baijmkl->ai1, baijmkl->aj1));
153     PetscCallMKL(mkl_sparse_destroy(baijmkl->bsrA));
154   }
155   baijmkl->sparse_optimized = PETSC_FALSE;
156 
157   /* Now perform the SpMV2 setup and matrix optimization. */
158   baijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
159   baijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
160   baijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
161   mbs                 = a->mbs;
162   nbs                 = a->nbs;
163   nz                  = a->nz;
164   bs                  = A->rmap->bs;
165   aa                  = a->a;
166 
167   if ((nz != 0) & !(A->structure_only)) {
168     /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
169      * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
170     if (PetscSeqBAIJSupportsZeroBased()) {
171       aj = a->j;
172       ai = a->i;
173       PetscCallMKL(mkl_sparse_x_create_bsr(&(baijmkl->bsrA), SPARSE_INDEX_BASE_ZERO, SPARSE_LAYOUT_COLUMN_MAJOR, mbs, nbs, bs, ai, ai + 1, aj, aa));
174     } else {
175       PetscCall(PetscMalloc2(mbs + 1, &ai, nz, &aj));
176       for (i = 0; i < mbs + 1; i++) ai[i] = a->i[i] + 1;
177       for (i = 0; i < nz; i++) aj[i] = a->j[i] + 1;
178       aa = a->a;
179       PetscCallMKL(mkl_sparse_x_create_bsr(&baijmkl->bsrA, SPARSE_INDEX_BASE_ONE, SPARSE_LAYOUT_COLUMN_MAJOR, mbs, nbs, bs, ai, ai + 1, aj, aa));
180       baijmkl->ai1 = ai;
181       baijmkl->aj1 = aj;
182     }
183     PetscCallMKL(mkl_sparse_set_mv_hint(baijmkl->bsrA, SPARSE_OPERATION_NON_TRANSPOSE, baijmkl->descr, 1000));
184     PetscCallMKL(mkl_sparse_set_memory_hint(baijmkl->bsrA, SPARSE_MEMORY_AGGRESSIVE));
185     PetscCallMKL(mkl_sparse_optimize(baijmkl->bsrA));
186     baijmkl->sparse_optimized = PETSC_TRUE;
187   }
188   PetscFunctionReturn(0);
189 }
190 
191 static PetscErrorCode MatDuplicate_SeqBAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
192 {
193   Mat_SeqBAIJMKL *baijmkl;
194   Mat_SeqBAIJMKL *baijmkl_dest;
195 
196   PetscFunctionBegin;
197   PetscCall(MatDuplicate_SeqBAIJ(A, op, M));
198   baijmkl = (Mat_SeqBAIJMKL *)A->spptr;
199   PetscCall(PetscNew(&baijmkl_dest));
200   (*M)->spptr = (void *)baijmkl_dest;
201   PetscCall(PetscMemcpy(baijmkl_dest, baijmkl, sizeof(Mat_SeqBAIJMKL)));
202   baijmkl_dest->sparse_optimized = PETSC_FALSE;
203   PetscCall(MatSeqBAIJMKL_create_mkl_handle(A));
204   PetscFunctionReturn(0);
205 }
206 
207 static PetscErrorCode MatMult_SeqBAIJMKL_SpMV2(Mat A, Vec xx, Vec yy)
208 {
209   Mat_SeqBAIJ       *a       = (Mat_SeqBAIJ *)A->data;
210   Mat_SeqBAIJMKL    *baijmkl = (Mat_SeqBAIJMKL *)A->spptr;
211   const PetscScalar *x;
212   PetscScalar       *y;
213 
214   PetscFunctionBegin;
215   /* If there are no nonzero entries, zero yy and return immediately. */
216   if (!a->nz) {
217     PetscCall(VecSet(yy, 0.0));
218     PetscFunctionReturn(0);
219   }
220 
221   PetscCall(VecGetArrayRead(xx, &x));
222   PetscCall(VecGetArray(yy, &y));
223 
224   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
225    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
226    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
227   if (!baijmkl->sparse_optimized) PetscCall(MatSeqBAIJMKL_create_mkl_handle(A));
228 
229   /* Call MKL SpMV2 executor routine to do the MatMult. */
230   PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE, 1.0, baijmkl->bsrA, baijmkl->descr, x, 0.0, y));
231 
232   PetscCall(PetscLogFlops(2.0 * a->bs2 * a->nz - a->nonzerorowcnt * A->rmap->bs));
233   PetscCall(VecRestoreArrayRead(xx, &x));
234   PetscCall(VecRestoreArray(yy, &y));
235   PetscFunctionReturn(0);
236 }
237 
238 static PetscErrorCode MatMultTranspose_SeqBAIJMKL_SpMV2(Mat A, Vec xx, Vec yy)
239 {
240   Mat_SeqBAIJ       *a       = (Mat_SeqBAIJ *)A->data;
241   Mat_SeqBAIJMKL    *baijmkl = (Mat_SeqBAIJMKL *)A->spptr;
242   const PetscScalar *x;
243   PetscScalar       *y;
244 
245   PetscFunctionBegin;
246   /* If there are no nonzero entries, zero yy and return immediately. */
247   if (!a->nz) {
248     PetscCall(VecSet(yy, 0.0));
249     PetscFunctionReturn(0);
250   }
251 
252   PetscCall(VecGetArrayRead(xx, &x));
253   PetscCall(VecGetArray(yy, &y));
254 
255   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
256    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
257    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
258   if (!baijmkl->sparse_optimized) PetscCall(MatSeqBAIJMKL_create_mkl_handle(A));
259 
260   /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
261   PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE, 1.0, baijmkl->bsrA, baijmkl->descr, x, 0.0, y));
262 
263   PetscCall(PetscLogFlops(2.0 * a->bs2 * a->nz - a->nonzerorowcnt * A->rmap->bs));
264   PetscCall(VecRestoreArrayRead(xx, &x));
265   PetscCall(VecRestoreArray(yy, &y));
266   PetscFunctionReturn(0);
267 }
268 
269 static PetscErrorCode MatMultAdd_SeqBAIJMKL_SpMV2(Mat A, Vec xx, Vec yy, Vec zz)
270 {
271   Mat_SeqBAIJ       *a       = (Mat_SeqBAIJ *)A->data;
272   Mat_SeqBAIJMKL    *baijmkl = (Mat_SeqBAIJMKL *)A->spptr;
273   const PetscScalar *x;
274   PetscScalar       *y, *z;
275   PetscInt           m = a->mbs * A->rmap->bs;
276   PetscInt           i;
277 
278   PetscFunctionBegin;
279   /* If there are no nonzero entries, set zz = yy and return immediately. */
280   if (!a->nz) {
281     PetscCall(VecCopy(yy, zz));
282     PetscFunctionReturn(0);
283   }
284 
285   PetscCall(VecGetArrayRead(xx, &x));
286   PetscCall(VecGetArrayPair(yy, zz, &y, &z));
287 
288   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
289    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
290    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
291   if (!baijmkl->sparse_optimized) PetscCall(MatSeqBAIJMKL_create_mkl_handle(A));
292 
293   /* Call MKL sparse BLAS routine to do the MatMult. */
294   if (zz == yy) {
295     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
296      * with alpha and beta both set to 1.0. */
297     PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE, 1.0, baijmkl->bsrA, baijmkl->descr, x, 1.0, z));
298   } else {
299     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
300      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
301     PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE, 1.0, baijmkl->bsrA, baijmkl->descr, x, 0.0, z));
302     for (i = 0; i < m; i++) z[i] += y[i];
303   }
304 
305   PetscCall(PetscLogFlops(2.0 * a->bs2 * a->nz));
306   PetscCall(VecRestoreArrayRead(xx, &x));
307   PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
308   PetscFunctionReturn(0);
309 }
310 
311 static PetscErrorCode MatMultTransposeAdd_SeqBAIJMKL_SpMV2(Mat A, Vec xx, Vec yy, Vec zz)
312 {
313   Mat_SeqBAIJ       *a       = (Mat_SeqBAIJ *)A->data;
314   Mat_SeqBAIJMKL    *baijmkl = (Mat_SeqBAIJMKL *)A->spptr;
315   const PetscScalar *x;
316   PetscScalar       *y, *z;
317   PetscInt           n = a->nbs * A->rmap->bs;
318   PetscInt           i;
319   /* Variables not in MatMultTransposeAdd_SeqBAIJ. */
320 
321   PetscFunctionBegin;
322   /* If there are no nonzero entries, set zz = yy and return immediately. */
323   if (!a->nz) {
324     PetscCall(VecCopy(yy, zz));
325     PetscFunctionReturn(0);
326   }
327 
328   PetscCall(VecGetArrayRead(xx, &x));
329   PetscCall(VecGetArrayPair(yy, zz, &y, &z));
330 
331   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
332    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
333    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
334   if (!baijmkl->sparse_optimized) PetscCall(MatSeqBAIJMKL_create_mkl_handle(A));
335 
336   /* Call MKL sparse BLAS routine to do the MatMult. */
337   if (zz == yy) {
338     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
339      * with alpha and beta both set to 1.0. */
340     PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE, 1.0, baijmkl->bsrA, baijmkl->descr, x, 1.0, z));
341   } else {
342     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
343      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
344     PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE, 1.0, baijmkl->bsrA, baijmkl->descr, x, 0.0, z));
345     for (i = 0; i < n; i++) z[i] += y[i];
346   }
347 
348   PetscCall(PetscLogFlops(2.0 * a->bs2 * a->nz));
349   PetscCall(VecRestoreArrayRead(xx, &x));
350   PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
351   PetscFunctionReturn(0);
352 }
353 
354 static PetscErrorCode MatScale_SeqBAIJMKL(Mat inA, PetscScalar alpha)
355 {
356   PetscFunctionBegin;
357   PetscCall(MatScale_SeqBAIJ(inA, alpha));
358   PetscCall(MatSeqBAIJMKL_create_mkl_handle(inA));
359   PetscFunctionReturn(0);
360 }
361 
362 static PetscErrorCode MatDiagonalScale_SeqBAIJMKL(Mat A, Vec ll, Vec rr)
363 {
364   PetscFunctionBegin;
365   PetscCall(MatDiagonalScale_SeqBAIJ(A, ll, rr));
366   PetscCall(MatSeqBAIJMKL_create_mkl_handle(A));
367   PetscFunctionReturn(0);
368 }
369 
370 static PetscErrorCode MatAXPY_SeqBAIJMKL(Mat Y, PetscScalar a, Mat X, MatStructure str)
371 {
372   PetscFunctionBegin;
373   PetscCall(MatAXPY_SeqBAIJ(Y, a, X, str));
374   if (str == SAME_NONZERO_PATTERN) {
375     /* MatAssemblyEnd() is not called if SAME_NONZERO_PATTERN, so we need to force update of the MKL matrix handle. */
376     PetscCall(MatSeqBAIJMKL_create_mkl_handle(Y));
377   }
378   PetscFunctionReturn(0);
379 }
380 /* MatConvert_SeqBAIJ_SeqBAIJMKL converts a SeqBAIJ matrix into a
381  * SeqBAIJMKL matrix.  This routine is called by the MatCreate_SeqMKLBAIJ()
382  * routine, but can also be used to convert an assembled SeqBAIJ matrix
383  * into a SeqBAIJMKL one. */
384 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat A, MatType type, MatReuse reuse, Mat *newmat)
385 {
386   Mat             B = *newmat;
387   Mat_SeqBAIJMKL *baijmkl;
388   PetscBool       sametype;
389 
390   PetscFunctionBegin;
391   if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B));
392 
393   PetscCall(PetscObjectTypeCompare((PetscObject)A, type, &sametype));
394   if (sametype) PetscFunctionReturn(0);
395 
396   PetscCall(PetscNew(&baijmkl));
397   B->spptr = (void *)baijmkl;
398 
399   /* Set function pointers for methods that we inherit from BAIJ but override.
400    * We also parse some command line options below, since those determine some of the methods we point to. */
401   B->ops->assemblyend = MatAssemblyEnd_SeqBAIJMKL;
402 
403   baijmkl->sparse_optimized = PETSC_FALSE;
404 
405   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatScale_SeqBAIJMKL_C", MatScale_SeqBAIJMKL));
406   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaijmkl_seqbaij_C", MatConvert_SeqBAIJMKL_SeqBAIJ));
407 
408   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJMKL));
409   *newmat = B;
410   PetscFunctionReturn(0);
411 }
412 
413 static PetscErrorCode MatAssemblyEnd_SeqBAIJMKL(Mat A, MatAssemblyType mode)
414 {
415   PetscFunctionBegin;
416   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
417   PetscCall(MatAssemblyEnd_SeqBAIJ(A, mode));
418   PetscCall(MatSeqBAIJMKL_create_mkl_handle(A));
419   A->ops->destroy          = MatDestroy_SeqBAIJMKL;
420   A->ops->mult             = MatMult_SeqBAIJMKL_SpMV2;
421   A->ops->multtranspose    = MatMultTranspose_SeqBAIJMKL_SpMV2;
422   A->ops->multadd          = MatMultAdd_SeqBAIJMKL_SpMV2;
423   A->ops->multtransposeadd = MatMultTransposeAdd_SeqBAIJMKL_SpMV2;
424   A->ops->scale            = MatScale_SeqBAIJMKL;
425   A->ops->diagonalscale    = MatDiagonalScale_SeqBAIJMKL;
426   A->ops->axpy             = MatAXPY_SeqBAIJMKL;
427   A->ops->duplicate        = MatDuplicate_SeqBAIJMKL;
428   PetscFunctionReturn(0);
429 }
430 
431 /*@C
432    MatCreateSeqBAIJMKL - Creates a sparse matrix of type `MATSEQBAIJMKL`.
433    This type inherits from `MATSEQBAIJ` and is largely identical, but uses sparse BLAS
434    routines from Intel MKL whenever possible.
435    `MatMult()`, `MatMultAdd()`, `MatMultTranspose()`, and `MatMultTransposeAdd()`
436    operations are currently supported.
437    If the installed version of MKL supports the "SpMV2" sparse
438    inspector-executor routines, then those are used by default.
439    Default PETSc kernels are used otherwise.
440 
441    Input Parameters:
442 +  comm - MPI communicator, set to `PETSC_COMM_SELF`
443 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
444           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
445 .  m - number of rows
446 .  n - number of columns
447 .  nz - number of nonzero blocks  per block row (same for all rows)
448 -  nnz - array containing the number of nonzero blocks in the various block rows
449          (possibly different for each block row) or NULL
450 
451    Output Parameter:
452 .  A - the matrix
453 
454    It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
455    MatXXXXSetPreallocation() paradigm instead of this routine directly.
456    [MatXXXXSetPreallocation() is, for example, `MatSeqBAIJSetPreallocation()`]
457 
458    Options Database Keys:
459 +   -mat_no_unroll - uses code that does not unroll the loops in the block calculations (much slower)
460 -   -mat_block_size - size of the blocks to use
461 
462    Level: intermediate
463 
464    Notes:
465    The number of rows and columns must be divisible by blocksize.
466 
467    If the nnz parameter is given then the nz parameter is ignored
468 
469    A nonzero block is any block that as 1 or more nonzeros in it
470 
471    The `MATSEQBAIJ` format is fully compatible with standard Fortran 77
472    storage.  That is, the stored row and column indices can begin at
473    either one (as in Fortran) or zero.  See the users' manual for details.
474 
475    Specify the preallocated storage with either nz or nnz (not both).
476    Set nz = `PETSC_DEFAULT` and nnz = NULL for PETSc to control dynamic memory
477    allocation.  See [Sparse Matrices](sec_matsparse) for details.
478    matrices.
479 
480 .seealso: [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`
481 @*/
482 PetscErrorCode MatCreateSeqBAIJMKL(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A)
483 {
484   PetscFunctionBegin;
485   PetscCall(MatCreate(comm, A));
486   PetscCall(MatSetSizes(*A, m, n, m, n));
487   PetscCall(MatSetType(*A, MATSEQBAIJMKL));
488   PetscCall(MatSeqBAIJSetPreallocation_SeqBAIJ(*A, bs, nz, (PetscInt *)nnz));
489   PetscFunctionReturn(0);
490 }
491 
492 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJMKL(Mat A)
493 {
494   PetscFunctionBegin;
495   PetscCall(MatSetType(A, MATSEQBAIJ));
496   PetscCall(MatConvert_SeqBAIJ_SeqBAIJMKL(A, MATSEQBAIJMKL, MAT_INPLACE_MATRIX, &A));
497   PetscFunctionReturn(0);
498 }
499