xref: /petsc/src/mat/impls/baij/seq/baijmkl/baijmkl.c (revision 503c0ea9b45bcfbcebbb1ea5341243bbc69f0bea)
1 /*
2   Defines basic operations for the MATSEQBAIJMKL matrix class.
3   Uses sparse BLAS operations from the Intel Math Kernel Library (MKL)
4   wherever possible. If used MKL verion is older than 11.3 PETSc default
5   code for sparse matrix operations is used.
6 */
7 
8 #include <../src/mat/impls/baij/seq/baij.h>
9 #include <../src/mat/impls/baij/seq/baijmkl/baijmkl.h>
10 #include <mkl_spblas.h>
11 
12 static PetscBool PetscSeqBAIJSupportsZeroBased(void)
13 {
14   static PetscBool set = PETSC_FALSE,value;
15   int              n=1,ia[1],ja[1];
16   float            a[1];
17   sparse_status_t  status;
18   sparse_matrix_t  A;
19 
20   if (!set) {
21     status = mkl_sparse_s_create_bsr(&A,SPARSE_INDEX_BASE_ZERO,SPARSE_LAYOUT_COLUMN_MAJOR,n,n,n,ia,ia,ja,a);
22     value  = (status != SPARSE_STATUS_NOT_SUPPORTED) ? PETSC_TRUE : PETSC_FALSE;
23     (void)   mkl_sparse_destroy(A);
24     set    = PETSC_TRUE;
25   }
26   return value;
27 }
28 
29 typedef struct {
30   PetscBool           sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
31   sparse_matrix_t     bsrA; /* "Handle" used by SpMV2 inspector-executor routines. */
32   struct matrix_descr descr;
33   PetscInt            *ai1;
34   PetscInt            *aj1;
35 } Mat_SeqBAIJMKL;
36 
37 static PetscErrorCode MatAssemblyEnd_SeqBAIJMKL(Mat A, MatAssemblyType mode);
38 extern PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat,MatAssemblyType);
39 
40 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJMKL_SeqBAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat)
41 {
42   /* This routine is only called to convert a MATBAIJMKL to its base PETSc type, */
43   /* so we will ignore 'MatType type'. */
44   Mat            B        = *newmat;
45   Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL*)A->spptr;
46 
47   PetscFunctionBegin;
48   if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A,MAT_COPY_VALUES,&B));
49 
50   /* Reset the original function pointers. */
51   B->ops->duplicate        = MatDuplicate_SeqBAIJ;
52   B->ops->assemblyend      = MatAssemblyEnd_SeqBAIJ;
53   B->ops->destroy          = MatDestroy_SeqBAIJ;
54   B->ops->multtranspose    = MatMultTranspose_SeqBAIJ;
55   B->ops->multtransposeadd = MatMultTransposeAdd_SeqBAIJ;
56   B->ops->scale            = MatScale_SeqBAIJ;
57   B->ops->diagonalscale    = MatDiagonalScale_SeqBAIJ;
58   B->ops->axpy             = MatAXPY_SeqBAIJ;
59 
60   switch (A->rmap->bs) {
61     case 1:
62       B->ops->mult    = MatMult_SeqBAIJ_1;
63       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
64       break;
65     case 2:
66       B->ops->mult    = MatMult_SeqBAIJ_2;
67       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
68       break;
69     case 3:
70       B->ops->mult    = MatMult_SeqBAIJ_3;
71       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
72       break;
73     case 4:
74       B->ops->mult    = MatMult_SeqBAIJ_4;
75       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
76       break;
77     case 5:
78       B->ops->mult    = MatMult_SeqBAIJ_5;
79       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
80       break;
81     case 6:
82       B->ops->mult    = MatMult_SeqBAIJ_6;
83       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
84       break;
85     case 7:
86       B->ops->mult    = MatMult_SeqBAIJ_7;
87       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
88       break;
89     case 11:
90       B->ops->mult    = MatMult_SeqBAIJ_11;
91       B->ops->multadd = MatMultAdd_SeqBAIJ_11;
92       break;
93     case 15:
94       B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
95       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
96       break;
97     default:
98       B->ops->mult    = MatMult_SeqBAIJ_N;
99       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
100       break;
101   }
102   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaijmkl_seqbaij_C",NULL));
103 
104   /* Free everything in the Mat_SeqBAIJMKL data structure. Currently, this
105    * simply involves destroying the MKL sparse matrix handle and then freeing
106    * the spptr pointer. */
107   if (reuse == MAT_INITIAL_MATRIX) baijmkl = (Mat_SeqBAIJMKL*)B->spptr;
108 
109   if (baijmkl->sparse_optimized) PetscStackCallStandard(mkl_sparse_destroy,baijmkl->bsrA);
110   PetscCall(PetscFree2(baijmkl->ai1,baijmkl->aj1));
111   PetscCall(PetscFree(B->spptr));
112 
113   /* Change the type of B to MATSEQBAIJ. */
114   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJ));
115 
116   *newmat = B;
117   PetscFunctionReturn(0);
118 }
119 
120 static PetscErrorCode MatDestroy_SeqBAIJMKL(Mat A)
121 {
122   Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL*) A->spptr;
123 
124   PetscFunctionBegin;
125   if (baijmkl) {
126     /* Clean up everything in the Mat_SeqBAIJMKL data structure, then free A->spptr. */
127     if (baijmkl->sparse_optimized) PetscStackCallStandard(mkl_sparse_destroy,baijmkl->bsrA);
128     PetscCall(PetscFree2(baijmkl->ai1,baijmkl->aj1));
129     PetscCall(PetscFree(A->spptr));
130   }
131 
132   /* Change the type of A back to SEQBAIJ and use MatDestroy_SeqBAIJ()
133    * to destroy everything that remains. */
134   PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQBAIJ));
135   PetscCall(MatDestroy_SeqBAIJ(A));
136   PetscFunctionReturn(0);
137 }
138 
139 static PetscErrorCode MatSeqBAIJMKL_create_mkl_handle(Mat A)
140 {
141   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
142   Mat_SeqBAIJMKL  *baijmkl = (Mat_SeqBAIJMKL*)A->spptr;
143   PetscInt        mbs, nbs, nz, bs;
144   MatScalar       *aa;
145   PetscInt        *aj,*ai;
146   PetscInt        i;
147 
148   PetscFunctionBegin;
149   if (baijmkl->sparse_optimized) {
150     /* Matrix has been previously assembled and optimized. Must destroy old
151      * matrix handle before running the optimization step again. */
152     PetscCall(PetscFree2(baijmkl->ai1,baijmkl->aj1));
153     PetscCallMKL(mkl_sparse_destroy(baijmkl->bsrA));
154   }
155   baijmkl->sparse_optimized = PETSC_FALSE;
156 
157   /* Now perform the SpMV2 setup and matrix optimization. */
158   baijmkl->descr.type        = SPARSE_MATRIX_TYPE_GENERAL;
159   baijmkl->descr.mode        = SPARSE_FILL_MODE_LOWER;
160   baijmkl->descr.diag        = SPARSE_DIAG_NON_UNIT;
161   mbs = a->mbs;
162   nbs = a->nbs;
163   nz  = a->nz;
164   bs  = A->rmap->bs;
165   aa  = a->a;
166 
167   if ((nz!=0) & !(A->structure_only)) {
168     /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
169      * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
170     if (PetscSeqBAIJSupportsZeroBased()) {
171       aj   = a->j;
172       ai   = a->i;
173       PetscCallMKL(mkl_sparse_x_create_bsr(&(baijmkl->bsrA),SPARSE_INDEX_BASE_ZERO,SPARSE_LAYOUT_COLUMN_MAJOR,mbs,nbs,bs,ai,ai+1,aj,aa));
174     } else {
175       PetscCall(PetscMalloc2(mbs+1,&ai,nz,&aj));
176       for (i=0;i<mbs+1;i++) ai[i] = a->i[i]+1;
177       for (i=0;i<nz;i++) aj[i] = a->j[i]+1;
178       aa   = a->a;
179       PetscCallMKL(mkl_sparse_x_create_bsr(&baijmkl->bsrA,SPARSE_INDEX_BASE_ONE,SPARSE_LAYOUT_COLUMN_MAJOR,mbs,nbs,bs,ai,ai+1,aj,aa));
180       baijmkl->ai1 = ai;
181       baijmkl->aj1 = aj;
182     }
183     PetscCallMKL(mkl_sparse_set_mv_hint(baijmkl->bsrA,SPARSE_OPERATION_NON_TRANSPOSE,baijmkl->descr,1000));
184     PetscCallMKL(mkl_sparse_set_memory_hint(baijmkl->bsrA,SPARSE_MEMORY_AGGRESSIVE));
185     PetscCallMKL(mkl_sparse_optimize(baijmkl->bsrA));
186     baijmkl->sparse_optimized = PETSC_TRUE;
187   }
188   PetscFunctionReturn(0);
189 }
190 
191 static PetscErrorCode MatDuplicate_SeqBAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
192 {
193   Mat_SeqBAIJMKL *baijmkl;
194   Mat_SeqBAIJMKL *baijmkl_dest;
195 
196   PetscFunctionBegin;
197   PetscCall(MatDuplicate_SeqBAIJ(A,op,M));
198   baijmkl = (Mat_SeqBAIJMKL*) A->spptr;
199   PetscCall(PetscNewLog((*M),&baijmkl_dest));
200   (*M)->spptr = (void*)baijmkl_dest;
201   PetscCall(PetscMemcpy(baijmkl_dest,baijmkl,sizeof(Mat_SeqBAIJMKL)));
202   baijmkl_dest->sparse_optimized = PETSC_FALSE;
203   PetscCall(MatSeqBAIJMKL_create_mkl_handle(A));
204   PetscFunctionReturn(0);
205 }
206 
207 static PetscErrorCode MatMult_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
208 {
209   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
210   Mat_SeqBAIJMKL     *baijmkl=(Mat_SeqBAIJMKL*)A->spptr;
211   const PetscScalar  *x;
212   PetscScalar        *y;
213 
214   PetscFunctionBegin;
215   /* If there are no nonzero entries, zero yy and return immediately. */
216   if (!a->nz) {
217     PetscCall(VecSet(yy,0.0));
218     PetscFunctionReturn(0);
219   }
220 
221   PetscCall(VecGetArrayRead(xx,&x));
222   PetscCall(VecGetArray(yy,&y));
223 
224   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
225    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
226    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
227   if (!baijmkl->sparse_optimized) {
228     PetscCall(MatSeqBAIJMKL_create_mkl_handle(A));
229   }
230 
231   /* Call MKL SpMV2 executor routine to do the MatMult. */
232   PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,y));
233 
234   PetscCall(PetscLogFlops(2.0*a->bs2*a->nz - a->nonzerorowcnt*A->rmap->bs));
235   PetscCall(VecRestoreArrayRead(xx,&x));
236   PetscCall(VecRestoreArray(yy,&y));
237   PetscFunctionReturn(0);
238 }
239 
240 static PetscErrorCode MatMultTranspose_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
241 {
242   Mat_SeqBAIJ       *a       = (Mat_SeqBAIJ*)A->data;
243   Mat_SeqBAIJMKL    *baijmkl = (Mat_SeqBAIJMKL*)A->spptr;
244   const PetscScalar *x;
245   PetscScalar       *y;
246 
247   PetscFunctionBegin;
248   /* If there are no nonzero entries, zero yy and return immediately. */
249   if (!a->nz) {
250     PetscCall(VecSet(yy,0.0));
251     PetscFunctionReturn(0);
252   }
253 
254   PetscCall(VecGetArrayRead(xx,&x));
255   PetscCall(VecGetArray(yy,&y));
256 
257   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
258    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
259    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
260   if (!baijmkl->sparse_optimized) {
261     PetscCall(MatSeqBAIJMKL_create_mkl_handle(A));
262   }
263 
264   /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
265   PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,y));
266 
267   PetscCall(PetscLogFlops(2.0*a->bs2*a->nz - a->nonzerorowcnt*A->rmap->bs));
268   PetscCall(VecRestoreArrayRead(xx,&x));
269   PetscCall(VecRestoreArray(yy,&y));
270   PetscFunctionReturn(0);
271 }
272 
273 static PetscErrorCode MatMultAdd_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
274 {
275   Mat_SeqBAIJ        *a       = (Mat_SeqBAIJ*)A->data;
276   Mat_SeqBAIJMKL     *baijmkl = (Mat_SeqBAIJMKL*)A->spptr;
277   const PetscScalar  *x;
278   PetscScalar        *y,*z;
279   PetscInt           m=a->mbs*A->rmap->bs;
280   PetscInt           i;
281 
282   PetscFunctionBegin;
283   /* If there are no nonzero entries, set zz = yy and return immediately. */
284   if (!a->nz) {
285     PetscCall(VecCopy(yy,zz));
286     PetscFunctionReturn(0);
287   }
288 
289   PetscCall(VecGetArrayRead(xx,&x));
290   PetscCall(VecGetArrayPair(yy,zz,&y,&z));
291 
292   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
293    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
294    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
295   if (!baijmkl->sparse_optimized) {
296     PetscCall(MatSeqBAIJMKL_create_mkl_handle(A));
297   }
298 
299   /* Call MKL sparse BLAS routine to do the MatMult. */
300   if (zz == yy) {
301     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
302      * with alpha and beta both set to 1.0. */
303     PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,1.0,z));
304   } else {
305     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
306      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
307     PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,z));
308     for (i=0; i<m; i++) {
309       z[i] += y[i];
310     }
311   }
312 
313   PetscCall(PetscLogFlops(2.0*a->bs2*a->nz));
314   PetscCall(VecRestoreArrayRead(xx,&x));
315   PetscCall(VecRestoreArrayPair(yy,zz,&y,&z));
316   PetscFunctionReturn(0);
317 }
318 
319 static PetscErrorCode MatMultTransposeAdd_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
320 {
321   Mat_SeqBAIJ       *a       = (Mat_SeqBAIJ*)A->data;
322   Mat_SeqBAIJMKL    *baijmkl = (Mat_SeqBAIJMKL*)A->spptr;
323   const PetscScalar *x;
324   PetscScalar       *y,*z;
325   PetscInt          n=a->nbs*A->rmap->bs;
326   PetscInt          i;
327   /* Variables not in MatMultTransposeAdd_SeqBAIJ. */
328 
329   PetscFunctionBegin;
330   /* If there are no nonzero entries, set zz = yy and return immediately. */
331   if (!a->nz) {
332     PetscCall(VecCopy(yy,zz));
333     PetscFunctionReturn(0);
334   }
335 
336   PetscCall(VecGetArrayRead(xx,&x));
337   PetscCall(VecGetArrayPair(yy,zz,&y,&z));
338 
339   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
340    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
341    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
342   if (!baijmkl->sparse_optimized) {
343     PetscCall(MatSeqBAIJMKL_create_mkl_handle(A));
344   }
345 
346   /* Call MKL sparse BLAS routine to do the MatMult. */
347   if (zz == yy) {
348     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
349      * with alpha and beta both set to 1.0. */
350     PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,1.0,z));
351   } else {
352     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
353      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
354     PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,z));
355     for (i=0; i<n; i++) {
356       z[i] += y[i];
357     }
358   }
359 
360   PetscCall(PetscLogFlops(2.0*a->bs2*a->nz));
361   PetscCall(VecRestoreArrayRead(xx,&x));
362   PetscCall(VecRestoreArrayPair(yy,zz,&y,&z));
363   PetscFunctionReturn(0);
364 }
365 
366 static PetscErrorCode MatScale_SeqBAIJMKL(Mat inA,PetscScalar alpha)
367 {
368   PetscFunctionBegin;
369   PetscCall(MatScale_SeqBAIJ(inA,alpha));
370   PetscCall(MatSeqBAIJMKL_create_mkl_handle(inA));
371   PetscFunctionReturn(0);
372 }
373 
374 static PetscErrorCode MatDiagonalScale_SeqBAIJMKL(Mat A,Vec ll,Vec rr)
375 {
376   PetscFunctionBegin;
377   PetscCall(MatDiagonalScale_SeqBAIJ(A,ll,rr));
378   PetscCall(MatSeqBAIJMKL_create_mkl_handle(A));
379   PetscFunctionReturn(0);
380 }
381 
382 static PetscErrorCode MatAXPY_SeqBAIJMKL(Mat Y,PetscScalar a,Mat X,MatStructure str)
383 {
384   PetscFunctionBegin;
385   PetscCall(MatAXPY_SeqBAIJ(Y,a,X,str));
386   if (str == SAME_NONZERO_PATTERN) {
387     /* MatAssemblyEnd() is not called if SAME_NONZERO_PATTERN, so we need to force update of the MKL matrix handle. */
388     PetscCall(MatSeqBAIJMKL_create_mkl_handle(Y));
389   }
390   PetscFunctionReturn(0);
391 }
392 /* MatConvert_SeqBAIJ_SeqBAIJMKL converts a SeqBAIJ matrix into a
393  * SeqBAIJMKL matrix.  This routine is called by the MatCreate_SeqMKLBAIJ()
394  * routine, but can also be used to convert an assembled SeqBAIJ matrix
395  * into a SeqBAIJMKL one. */
396 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
397 {
398   Mat            B = *newmat;
399   Mat_SeqBAIJMKL *baijmkl;
400   PetscBool      sametype;
401 
402   PetscFunctionBegin;
403   if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A,MAT_COPY_VALUES,&B));
404 
405   PetscCall(PetscObjectTypeCompare((PetscObject)A,type,&sametype));
406   if (sametype) PetscFunctionReturn(0);
407 
408   PetscCall(PetscNewLog(B,&baijmkl));
409   B->spptr = (void*)baijmkl;
410 
411   /* Set function pointers for methods that we inherit from BAIJ but override.
412    * We also parse some command line options below, since those determine some of the methods we point to. */
413   B->ops->assemblyend      = MatAssemblyEnd_SeqBAIJMKL;
414 
415   baijmkl->sparse_optimized = PETSC_FALSE;
416 
417   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatScale_SeqBAIJMKL_C",MatScale_SeqBAIJMKL));
418   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaijmkl_seqbaij_C",MatConvert_SeqBAIJMKL_SeqBAIJ));
419 
420   PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJMKL));
421   *newmat = B;
422   PetscFunctionReturn(0);
423 }
424 
425 static PetscErrorCode MatAssemblyEnd_SeqBAIJMKL(Mat A, MatAssemblyType mode)
426 {
427   PetscFunctionBegin;
428   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
429   PetscCall(MatAssemblyEnd_SeqBAIJ(A, mode));
430   PetscCall(MatSeqBAIJMKL_create_mkl_handle(A));
431   A->ops->destroy          = MatDestroy_SeqBAIJMKL;
432   A->ops->mult             = MatMult_SeqBAIJMKL_SpMV2;
433   A->ops->multtranspose    = MatMultTranspose_SeqBAIJMKL_SpMV2;
434   A->ops->multadd          = MatMultAdd_SeqBAIJMKL_SpMV2;
435   A->ops->multtransposeadd = MatMultTransposeAdd_SeqBAIJMKL_SpMV2;
436   A->ops->scale            = MatScale_SeqBAIJMKL;
437   A->ops->diagonalscale    = MatDiagonalScale_SeqBAIJMKL;
438   A->ops->axpy             = MatAXPY_SeqBAIJMKL;
439   A->ops->duplicate        = MatDuplicate_SeqBAIJMKL;
440   PetscFunctionReturn(0);
441 }
442 
443 /*@C
444    MatCreateSeqBAIJMKL - Creates a sparse matrix of type SEQBAIJMKL.
445    This type inherits from BAIJ and is largely identical, but uses sparse BLAS
446    routines from Intel MKL whenever possible.
447    MatMult, MatMultAdd, MatMultTranspose, and MatMultTransposeAdd
448    operations are currently supported.
449    If the installed version of MKL supports the "SpMV2" sparse
450    inspector-executor routines, then those are used by default.
451    Default PETSc kernels are used otherwise.
452 
453    Input Parameters:
454 +  comm - MPI communicator, set to PETSC_COMM_SELF
455 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
456           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
457 .  m - number of rows
458 .  n - number of columns
459 .  nz - number of nonzero blocks  per block row (same for all rows)
460 -  nnz - array containing the number of nonzero blocks in the various block rows
461          (possibly different for each block row) or NULL
462 
463    Output Parameter:
464 .  A - the matrix
465 
466    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
467    MatXXXXSetPreallocation() paradigm instead of this routine directly.
468    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
469 
470    Options Database Keys:
471 +   -mat_no_unroll - uses code that does not unroll the loops in the
472                      block calculations (much slower)
473 -   -mat_block_size - size of the blocks to use
474 
475    Level: intermediate
476 
477    Notes:
478    The number of rows and columns must be divisible by blocksize.
479 
480    If the nnz parameter is given then the nz parameter is ignored
481 
482    A nonzero block is any block that as 1 or more nonzeros in it
483 
484    The block AIJ format is fully compatible with standard Fortran 77
485    storage.  That is, the stored row and column indices can begin at
486    either one (as in Fortran) or zero.  See the users' manual for details.
487 
488    Specify the preallocated storage with either nz or nnz (not both).
489    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
490    allocation.  See Users-Manual: ch_mat for details.
491    matrices.
492 
493 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
494 
495 @*/
496 PetscErrorCode  MatCreateSeqBAIJMKL(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
497 {
498   PetscFunctionBegin;
499   PetscCall(MatCreate(comm,A));
500   PetscCall(MatSetSizes(*A,m,n,m,n));
501   PetscCall(MatSetType(*A,MATSEQBAIJMKL));
502   PetscCall(MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz));
503   PetscFunctionReturn(0);
504 }
505 
506 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJMKL(Mat A)
507 {
508   PetscFunctionBegin;
509   PetscCall(MatSetType(A,MATSEQBAIJ));
510   PetscCall(MatConvert_SeqBAIJ_SeqBAIJMKL(A,MATSEQBAIJMKL,MAT_INPLACE_MATRIX,&A));
511   PetscFunctionReturn(0);
512 }
513