xref: /petsc/src/mat/impls/baij/seq/baijmkl/baijmkl.c (revision 7072be85568e48e0f9849a83abce647ecf3f07b4)
1*7072be85SIrina Sokolova /*
2*7072be85SIrina Sokolova   Defines basic operations for the MATSEQBAIJMKL matrix class.
3*7072be85SIrina Sokolova   Uses sparse BLAS operations from the Intel Math Kernel Library (MKL)
4*7072be85SIrina Sokolova   wherever possible. If used MKL verion is older than 11.3 PETSc default
5*7072be85SIrina Sokolova   code for sparse matrix operations is used.
6*7072be85SIrina Sokolova */
7*7072be85SIrina Sokolova 
8*7072be85SIrina Sokolova #include <../src/mat/impls/baij/seq/baij.h>
9*7072be85SIrina Sokolova #include <../src/mat/impls/baij/seq/baijmkl/baijmkl.h>
10*7072be85SIrina Sokolova 
11*7072be85SIrina Sokolova 
12*7072be85SIrina Sokolova /* MKL include files. */
13*7072be85SIrina Sokolova #include <mkl_spblas.h>  /* Sparse BLAS */
14*7072be85SIrina Sokolova 
15*7072be85SIrina Sokolova #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
16*7072be85SIrina Sokolova typedef struct {
17*7072be85SIrina Sokolova   PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
18*7072be85SIrina Sokolova   sparse_matrix_t bsrA; /* "Handle" used by SpMV2 inspector-executor routines. */
19*7072be85SIrina Sokolova   struct matrix_descr descr;
20*7072be85SIrina Sokolova #ifndef PETSC_MKL_SUPPORTS_BAIJ_ZERO_BASED
21*7072be85SIrina Sokolova   PetscInt *ai1;
22*7072be85SIrina Sokolova   PetscInt *aj1;
23*7072be85SIrina Sokolova #endif
24*7072be85SIrina Sokolova } Mat_SeqBAIJMKL;
25*7072be85SIrina Sokolova 
26*7072be85SIrina Sokolova extern PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat,MatAssemblyType);
27*7072be85SIrina Sokolova 
28*7072be85SIrina Sokolova PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJMKL_SeqBAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat)
29*7072be85SIrina Sokolova {
30*7072be85SIrina Sokolova   /* This routine is only called to convert a MATBAIJMKL to its base PETSc type, */
31*7072be85SIrina Sokolova   /* so we will ignore 'MatType type'. */
32*7072be85SIrina Sokolova   PetscErrorCode ierr;
33*7072be85SIrina Sokolova   Mat            B       = *newmat;
34*7072be85SIrina Sokolova   Mat_SeqBAIJMKL  *baijmkl=(Mat_SeqBAIJMKL*)A->spptr;
35*7072be85SIrina Sokolova 
36*7072be85SIrina Sokolova   PetscFunctionBegin;
37*7072be85SIrina Sokolova   if (reuse == MAT_INITIAL_MATRIX) {
38*7072be85SIrina Sokolova     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
39*7072be85SIrina Sokolova   }
40*7072be85SIrina Sokolova 
41*7072be85SIrina Sokolova   /* Reset the original function pointers. */
42*7072be85SIrina Sokolova   B->ops->duplicate        = MatDuplicate_SeqBAIJ;
43*7072be85SIrina Sokolova   B->ops->assemblyend      = MatAssemblyEnd_SeqBAIJ;
44*7072be85SIrina Sokolova   B->ops->destroy          = MatDestroy_SeqBAIJ;
45*7072be85SIrina Sokolova   B->ops->multtranspose    = MatMultTranspose_SeqBAIJ;
46*7072be85SIrina Sokolova   B->ops->multtransposeadd = MatMultTransposeAdd_SeqBAIJ;
47*7072be85SIrina Sokolova   B->ops->scale            = MatScale_SeqBAIJ;
48*7072be85SIrina Sokolova   B->ops->diagonalscale    = MatDiagonalScale_SeqBAIJ;
49*7072be85SIrina Sokolova   B->ops->axpy             = MatAXPY_SeqBAIJ;
50*7072be85SIrina Sokolova 
51*7072be85SIrina Sokolova   switch (A->rmap->bs) {
52*7072be85SIrina Sokolova     case 1:
53*7072be85SIrina Sokolova       B->ops->mult    = MatMult_SeqBAIJ_1;
54*7072be85SIrina Sokolova       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
55*7072be85SIrina Sokolova       break;
56*7072be85SIrina Sokolova     case 2:
57*7072be85SIrina Sokolova       B->ops->mult    = MatMult_SeqBAIJ_2;
58*7072be85SIrina Sokolova       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
59*7072be85SIrina Sokolova       break;
60*7072be85SIrina Sokolova     case 3:
61*7072be85SIrina Sokolova       B->ops->mult    = MatMult_SeqBAIJ_3;
62*7072be85SIrina Sokolova       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
63*7072be85SIrina Sokolova       break;
64*7072be85SIrina Sokolova     case 4:
65*7072be85SIrina Sokolova       B->ops->mult    = MatMult_SeqBAIJ_4;
66*7072be85SIrina Sokolova       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
67*7072be85SIrina Sokolova       break;
68*7072be85SIrina Sokolova     case 5:
69*7072be85SIrina Sokolova       B->ops->mult    = MatMult_SeqBAIJ_5;
70*7072be85SIrina Sokolova       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
71*7072be85SIrina Sokolova       break;
72*7072be85SIrina Sokolova     case 6:
73*7072be85SIrina Sokolova       B->ops->mult    = MatMult_SeqBAIJ_6;
74*7072be85SIrina Sokolova       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
75*7072be85SIrina Sokolova       break;
76*7072be85SIrina Sokolova     case 7:
77*7072be85SIrina Sokolova       B->ops->mult    = MatMult_SeqBAIJ_7;
78*7072be85SIrina Sokolova       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
79*7072be85SIrina Sokolova       break;
80*7072be85SIrina Sokolova     case 11:
81*7072be85SIrina Sokolova       B->ops->mult    = MatMult_SeqBAIJ_11;
82*7072be85SIrina Sokolova       B->ops->multadd = MatMultAdd_SeqBAIJ_11;
83*7072be85SIrina Sokolova       break;
84*7072be85SIrina Sokolova     case 15:
85*7072be85SIrina Sokolova       B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
86*7072be85SIrina Sokolova       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
87*7072be85SIrina Sokolova       break;
88*7072be85SIrina Sokolova     default:
89*7072be85SIrina Sokolova       B->ops->mult    = MatMult_SeqBAIJ_N;
90*7072be85SIrina Sokolova       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
91*7072be85SIrina Sokolova       break;
92*7072be85SIrina Sokolova   }
93*7072be85SIrina Sokolova   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaijmkl_seqbaij_C",NULL);CHKERRQ(ierr);
94*7072be85SIrina Sokolova 
95*7072be85SIrina Sokolova   /* Free everything in the Mat_SeqBAIJMKL data structure. Currently, this
96*7072be85SIrina Sokolova    * simply involves destroying the MKL sparse matrix handle and then freeing
97*7072be85SIrina Sokolova    * the spptr pointer. */
98*7072be85SIrina Sokolova   if (reuse == MAT_INITIAL_MATRIX) baijmkl = (Mat_SeqBAIJMKL*)B->spptr;
99*7072be85SIrina Sokolova 
100*7072be85SIrina Sokolova   if (baijmkl->sparse_optimized) {
101*7072be85SIrina Sokolova     sparse_status_t stat;
102*7072be85SIrina Sokolova     stat = mkl_sparse_destroy(baijmkl->bsrA);
103*7072be85SIrina Sokolova     if (stat != SPARSE_STATUS_SUCCESS) {
104*7072be85SIrina Sokolova       PetscFunctionReturn(PETSC_ERR_LIB);
105*7072be85SIrina Sokolova     }
106*7072be85SIrina Sokolova   }
107*7072be85SIrina Sokolova #ifndef PETSC_MKL_SUPPORTS_BAIJ_ZERO_BASED
108*7072be85SIrina Sokolova    ierr = PetscFree(baijmkl->ai1);CHKERRQ(ierr);
109*7072be85SIrina Sokolova    ierr = PetscFree(baijmkl->aj1);CHKERRQ(ierr);
110*7072be85SIrina Sokolova #endif
111*7072be85SIrina Sokolova   ierr = PetscFree(B->spptr);CHKERRQ(ierr);
112*7072be85SIrina Sokolova 
113*7072be85SIrina Sokolova   /* Change the type of B to MATSEQBAIJ. */
114*7072be85SIrina Sokolova   ierr = PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJ);CHKERRQ(ierr);
115*7072be85SIrina Sokolova 
116*7072be85SIrina Sokolova   *newmat = B;
117*7072be85SIrina Sokolova   PetscFunctionReturn(0);
118*7072be85SIrina Sokolova }
119*7072be85SIrina Sokolova 
120*7072be85SIrina Sokolova PetscErrorCode MatDestroy_SeqBAIJMKL(Mat A)
121*7072be85SIrina Sokolova {
122*7072be85SIrina Sokolova   PetscErrorCode ierr;
123*7072be85SIrina Sokolova   Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL*) A->spptr;
124*7072be85SIrina Sokolova 
125*7072be85SIrina Sokolova   PetscFunctionBegin;
126*7072be85SIrina Sokolova 
127*7072be85SIrina Sokolova   if (baijmkl) {
128*7072be85SIrina Sokolova     /* Clean up everything in the Mat_SeqBAIJMKL data structure, then free A->spptr. */
129*7072be85SIrina Sokolova     if (baijmkl->sparse_optimized) {
130*7072be85SIrina Sokolova       sparse_status_t stat = SPARSE_STATUS_SUCCESS;
131*7072be85SIrina Sokolova       stat = mkl_sparse_destroy(baijmkl->bsrA);
132*7072be85SIrina Sokolova       if (stat != SPARSE_STATUS_SUCCESS) {
133*7072be85SIrina Sokolova         PetscFunctionReturn(PETSC_ERR_LIB);
134*7072be85SIrina Sokolova       }
135*7072be85SIrina Sokolova     }
136*7072be85SIrina Sokolova #ifndef PETSC_MKL_SUPPORTS_BAIJ_ZERO_BASED
137*7072be85SIrina Sokolova    ierr = PetscFree(baijmkl->ai1);CHKERRQ(ierr);
138*7072be85SIrina Sokolova    ierr = PetscFree(baijmkl->aj1);CHKERRQ(ierr);
139*7072be85SIrina Sokolova #endif
140*7072be85SIrina Sokolova     ierr = PetscFree(A->spptr);CHKERRQ(ierr);
141*7072be85SIrina Sokolova   }
142*7072be85SIrina Sokolova 
143*7072be85SIrina Sokolova   /* Change the type of A back to SEQBAIJ and use MatDestroy_SeqBAIJ()
144*7072be85SIrina Sokolova    * to destroy everything that remains. */
145*7072be85SIrina Sokolova   ierr = PetscObjectChangeTypeName((PetscObject)A, MATSEQBAIJ);CHKERRQ(ierr);
146*7072be85SIrina Sokolova   ierr = MatDestroy_SeqBAIJ(A);CHKERRQ(ierr);
147*7072be85SIrina Sokolova   PetscFunctionReturn(0);
148*7072be85SIrina Sokolova }
149*7072be85SIrina Sokolova 
150*7072be85SIrina Sokolova PETSC_INTERN PetscErrorCode MatSeqBAIJMKL_create_mkl_handle(Mat A)
151*7072be85SIrina Sokolova {
152*7072be85SIrina Sokolova   PetscErrorCode ierr;
153*7072be85SIrina Sokolova   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
154*7072be85SIrina Sokolova   Mat_SeqBAIJMKL  *baijmkl = (Mat_SeqBAIJMKL*)A->spptr;
155*7072be85SIrina Sokolova   PetscInt        mbs, nbs, nz, bs, i;
156*7072be85SIrina Sokolova   MatScalar       *aa;
157*7072be85SIrina Sokolova   PetscInt        *aj,*ai;
158*7072be85SIrina Sokolova   sparse_status_t stat;
159*7072be85SIrina Sokolova 
160*7072be85SIrina Sokolova   PetscFunctionBegin;
161*7072be85SIrina Sokolova   if (baijmkl->sparse_optimized) {
162*7072be85SIrina Sokolova     /* Matrix has been previously assembled and optimized. Must destroy old
163*7072be85SIrina Sokolova      * matrix handle before running the optimization step again. */
164*7072be85SIrina Sokolova     stat = mkl_sparse_destroy(baijmkl->bsrA);
165*7072be85SIrina Sokolova     if (stat != SPARSE_STATUS_SUCCESS) {
166*7072be85SIrina Sokolova       PetscFunctionReturn(PETSC_ERR_LIB);
167*7072be85SIrina Sokolova     }
168*7072be85SIrina Sokolova   }
169*7072be85SIrina Sokolova   baijmkl->sparse_optimized = PETSC_FALSE;
170*7072be85SIrina Sokolova 
171*7072be85SIrina Sokolova   /* Now perform the SpMV2 setup and matrix optimization. */
172*7072be85SIrina Sokolova   baijmkl->descr.type        = SPARSE_MATRIX_TYPE_GENERAL;
173*7072be85SIrina Sokolova   baijmkl->descr.mode        = SPARSE_FILL_MODE_LOWER;
174*7072be85SIrina Sokolova   baijmkl->descr.diag        = SPARSE_DIAG_NON_UNIT;
175*7072be85SIrina Sokolova   mbs = a->mbs;
176*7072be85SIrina Sokolova   nbs = a->nbs;
177*7072be85SIrina Sokolova   nz  = a->nz;
178*7072be85SIrina Sokolova   bs  = A->rmap->bs;
179*7072be85SIrina Sokolova   aa  = a->a;
180*7072be85SIrina Sokolova 
181*7072be85SIrina Sokolova   if (nz) {
182*7072be85SIrina Sokolova     /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
183*7072be85SIrina Sokolova      * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
184*7072be85SIrina Sokolova #ifdef PETSC_MKL_SUPPORTS_BAIJ_ZERO_BASED
185*7072be85SIrina Sokolova     aj   = a->j;
186*7072be85SIrina Sokolova     ai   = a->i;
187*7072be85SIrina Sokolova     stat = mkl_sparse_x_create_bsr(&(baijmkl->bsrA),SPARSE_INDEX_BASE_ZERO,SPARSE_LAYOUT_COLUMN_MAJOR,mbs,nbs,bs,ai,ai+1,aj,aa);
188*7072be85SIrina Sokolova #else
189*7072be85SIrina Sokolova     ierr = PetscMalloc1(mbs+1,&ai);CHKERRQ(ierr);
190*7072be85SIrina Sokolova     ierr = PetscMalloc1(nz,&aj);CHKERRQ(ierr);
191*7072be85SIrina Sokolova     for (i=0;i<mbs+1;i++)
192*7072be85SIrina Sokolova 	ai[i]=a->i[i]+1;
193*7072be85SIrina Sokolova     for (i=0;i<nz;i++)
194*7072be85SIrina Sokolova 	aj[i]=a->j[i]+1;
195*7072be85SIrina Sokolova     aa   = a->a;
196*7072be85SIrina Sokolova     stat = mkl_sparse_x_create_bsr(&baijmkl->bsrA,SPARSE_INDEX_BASE_ONE,SPARSE_LAYOUT_COLUMN_MAJOR,mbs,nbs,bs,ai,ai+1,aj,aa);
197*7072be85SIrina Sokolova     baijmkl->ai1 = ai;
198*7072be85SIrina Sokolova     baijmkl->aj1 = aj;
199*7072be85SIrina Sokolova #endif
200*7072be85SIrina Sokolova     stat = mkl_sparse_set_mv_hint(baijmkl->bsrA,SPARSE_OPERATION_NON_TRANSPOSE,baijmkl->descr,1000);
201*7072be85SIrina Sokolova     stat = mkl_sparse_set_memory_hint(baijmkl->bsrA,SPARSE_MEMORY_AGGRESSIVE);
202*7072be85SIrina Sokolova     stat = mkl_sparse_optimize(baijmkl->bsrA);
203*7072be85SIrina Sokolova     if (stat != SPARSE_STATUS_SUCCESS) {
204*7072be85SIrina Sokolova       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle/complete mkl_sparse_optimize");
205*7072be85SIrina Sokolova       PetscFunctionReturn(PETSC_ERR_LIB);
206*7072be85SIrina Sokolova     }
207*7072be85SIrina Sokolova     baijmkl->sparse_optimized = PETSC_TRUE;
208*7072be85SIrina Sokolova   }
209*7072be85SIrina Sokolova 
210*7072be85SIrina Sokolova   PetscFunctionReturn(0);
211*7072be85SIrina Sokolova }
212*7072be85SIrina Sokolova 
213*7072be85SIrina Sokolova PetscErrorCode MatDuplicate_SeqBAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
214*7072be85SIrina Sokolova {
215*7072be85SIrina Sokolova   PetscErrorCode ierr;
216*7072be85SIrina Sokolova   Mat_SeqBAIJMKL *baijmkl;
217*7072be85SIrina Sokolova   Mat_SeqBAIJMKL *baijmkl_dest;
218*7072be85SIrina Sokolova 
219*7072be85SIrina Sokolova   PetscFunctionBegin;
220*7072be85SIrina Sokolova   ierr = MatDuplicate_SeqBAIJ(A,op,M);CHKERRQ(ierr);
221*7072be85SIrina Sokolova   baijmkl      = (Mat_SeqBAIJMKL*) A->spptr;
222*7072be85SIrina Sokolova   baijmkl_dest = (Mat_SeqBAIJMKL*) (*M)->spptr;
223*7072be85SIrina Sokolova   ierr = PetscMemcpy(baijmkl_dest,baijmkl,sizeof(Mat_SeqBAIJMKL));CHKERRQ(ierr);
224*7072be85SIrina Sokolova   baijmkl_dest->sparse_optimized = PETSC_FALSE;
225*7072be85SIrina Sokolova   ierr = MatSeqBAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
226*7072be85SIrina Sokolova   PetscFunctionReturn(0);
227*7072be85SIrina Sokolova }
228*7072be85SIrina Sokolova 
229*7072be85SIrina Sokolova PetscErrorCode MatAssemblyEnd_SeqBAIJMKL(Mat A, MatAssemblyType mode)
230*7072be85SIrina Sokolova {
231*7072be85SIrina Sokolova   PetscErrorCode  ierr;
232*7072be85SIrina Sokolova   PetscFunctionBegin;
233*7072be85SIrina Sokolova   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
234*7072be85SIrina Sokolova 
235*7072be85SIrina Sokolova   ierr = MatAssemblyEnd_SeqBAIJ(A, mode);CHKERRQ(ierr);
236*7072be85SIrina Sokolova   ierr = MatSeqBAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
237*7072be85SIrina Sokolova 
238*7072be85SIrina Sokolova   PetscFunctionReturn(0);
239*7072be85SIrina Sokolova }
240*7072be85SIrina Sokolova 
241*7072be85SIrina Sokolova PetscErrorCode MatMult_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
242*7072be85SIrina Sokolova {
243*7072be85SIrina Sokolova   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
244*7072be85SIrina Sokolova   Mat_SeqBAIJMKL     *baijmkl=(Mat_SeqBAIJMKL*)A->spptr;
245*7072be85SIrina Sokolova   const PetscScalar  *x;
246*7072be85SIrina Sokolova   PetscScalar        *y;
247*7072be85SIrina Sokolova   PetscErrorCode     ierr;
248*7072be85SIrina Sokolova   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
249*7072be85SIrina Sokolova 
250*7072be85SIrina Sokolova   PetscFunctionBegin;
251*7072be85SIrina Sokolova 
252*7072be85SIrina Sokolova   /* If there are no nonzero entries, zero yy and return immediately. */
253*7072be85SIrina Sokolova   if(!a->nz) {
254*7072be85SIrina Sokolova     PetscInt i;
255*7072be85SIrina Sokolova     PetscInt m=a->mbs*A->rmap->bs;
256*7072be85SIrina Sokolova     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
257*7072be85SIrina Sokolova     for (i=0; i<m; i++) {
258*7072be85SIrina Sokolova       y[i] = 0.0;
259*7072be85SIrina Sokolova     }
260*7072be85SIrina Sokolova     ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
261*7072be85SIrina Sokolova     PetscFunctionReturn(0);
262*7072be85SIrina Sokolova   }
263*7072be85SIrina Sokolova 
264*7072be85SIrina Sokolova   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
265*7072be85SIrina Sokolova   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
266*7072be85SIrina Sokolova 
267*7072be85SIrina Sokolova   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
268*7072be85SIrina Sokolova    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
269*7072be85SIrina Sokolova    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
270*7072be85SIrina Sokolova   if (!baijmkl->sparse_optimized) {
271*7072be85SIrina Sokolova     MatSeqBAIJMKL_create_mkl_handle(A);
272*7072be85SIrina Sokolova   }
273*7072be85SIrina Sokolova 
274*7072be85SIrina Sokolova   /* Call MKL SpMV2 executor routine to do the MatMult. */
275*7072be85SIrina Sokolova   stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,y);
276*7072be85SIrina Sokolova 
277*7072be85SIrina Sokolova   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
278*7072be85SIrina Sokolova   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
279*7072be85SIrina Sokolova   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
280*7072be85SIrina Sokolova   if (stat != SPARSE_STATUS_SUCCESS) {
281*7072be85SIrina Sokolova     PetscFunctionReturn(PETSC_ERR_LIB);
282*7072be85SIrina Sokolova   }
283*7072be85SIrina Sokolova   PetscFunctionReturn(0);
284*7072be85SIrina Sokolova }
285*7072be85SIrina Sokolova 
286*7072be85SIrina Sokolova PetscErrorCode MatMultTranspose_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
287*7072be85SIrina Sokolova {
288*7072be85SIrina Sokolova   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
289*7072be85SIrina Sokolova   Mat_SeqBAIJMKL     *baijmkl=(Mat_SeqBAIJMKL*)A->spptr;
290*7072be85SIrina Sokolova   const PetscScalar *x;
291*7072be85SIrina Sokolova   PetscScalar       *y;
292*7072be85SIrina Sokolova   PetscErrorCode    ierr;
293*7072be85SIrina Sokolova   sparse_status_t   stat;
294*7072be85SIrina Sokolova 
295*7072be85SIrina Sokolova   PetscFunctionBegin;
296*7072be85SIrina Sokolova 
297*7072be85SIrina Sokolova   /* If there are no nonzero entries, zero yy and return immediately. */
298*7072be85SIrina Sokolova   if(!a->nz) {
299*7072be85SIrina Sokolova     PetscInt i;
300*7072be85SIrina Sokolova     PetscInt n=a->nbs;
301*7072be85SIrina Sokolova     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
302*7072be85SIrina Sokolova     for (i=0; i<n; i++) {
303*7072be85SIrina Sokolova       y[i] = 0.0;
304*7072be85SIrina Sokolova     }
305*7072be85SIrina Sokolova     ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
306*7072be85SIrina Sokolova     PetscFunctionReturn(0);
307*7072be85SIrina Sokolova   }
308*7072be85SIrina Sokolova 
309*7072be85SIrina Sokolova   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
310*7072be85SIrina Sokolova   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
311*7072be85SIrina Sokolova 
312*7072be85SIrina Sokolova   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
313*7072be85SIrina Sokolova    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
314*7072be85SIrina Sokolova    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
315*7072be85SIrina Sokolova   if (!baijmkl->sparse_optimized) {
316*7072be85SIrina Sokolova     MatSeqBAIJMKL_create_mkl_handle(A);
317*7072be85SIrina Sokolova   }
318*7072be85SIrina Sokolova 
319*7072be85SIrina Sokolova   /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
320*7072be85SIrina Sokolova   stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,y);
321*7072be85SIrina Sokolova 
322*7072be85SIrina Sokolova   ierr = PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
323*7072be85SIrina Sokolova   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
324*7072be85SIrina Sokolova   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
325*7072be85SIrina Sokolova   if (stat != SPARSE_STATUS_SUCCESS) {
326*7072be85SIrina Sokolova     PetscFunctionReturn(PETSC_ERR_LIB);
327*7072be85SIrina Sokolova   }
328*7072be85SIrina Sokolova   PetscFunctionReturn(0);
329*7072be85SIrina Sokolova }
330*7072be85SIrina Sokolova 
331*7072be85SIrina Sokolova PetscErrorCode MatMultAdd_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
332*7072be85SIrina Sokolova {
333*7072be85SIrina Sokolova   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
334*7072be85SIrina Sokolova   Mat_SeqBAIJMKL     *baijmkl=(Mat_SeqBAIJMKL*)A->spptr;
335*7072be85SIrina Sokolova   const PetscScalar  *x;
336*7072be85SIrina Sokolova   PetscScalar        *y,*z;
337*7072be85SIrina Sokolova   PetscErrorCode     ierr;
338*7072be85SIrina Sokolova   PetscInt           m=a->mbs*A->rmap->bs;
339*7072be85SIrina Sokolova   PetscInt           i;
340*7072be85SIrina Sokolova 
341*7072be85SIrina Sokolova   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
342*7072be85SIrina Sokolova 
343*7072be85SIrina Sokolova   PetscFunctionBegin;
344*7072be85SIrina Sokolova 
345*7072be85SIrina Sokolova   /* If there are no nonzero entries, set zz = yy and return immediately. */
346*7072be85SIrina Sokolova   if(!a->nz) {
347*7072be85SIrina Sokolova     PetscInt i;
348*7072be85SIrina Sokolova     ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
349*7072be85SIrina Sokolova     for (i=0; i<m; i++) {
350*7072be85SIrina Sokolova       z[i] = y[i];
351*7072be85SIrina Sokolova     }
352*7072be85SIrina Sokolova     ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
353*7072be85SIrina Sokolova     PetscFunctionReturn(0);
354*7072be85SIrina Sokolova   }
355*7072be85SIrina Sokolova 
356*7072be85SIrina Sokolova   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
357*7072be85SIrina Sokolova   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
358*7072be85SIrina Sokolova 
359*7072be85SIrina Sokolova   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
360*7072be85SIrina Sokolova    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
361*7072be85SIrina Sokolova    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
362*7072be85SIrina Sokolova   if (!baijmkl->sparse_optimized) {
363*7072be85SIrina Sokolova     MatSeqBAIJMKL_create_mkl_handle(A);
364*7072be85SIrina Sokolova   }
365*7072be85SIrina Sokolova 
366*7072be85SIrina Sokolova   /* Call MKL sparse BLAS routine to do the MatMult. */
367*7072be85SIrina Sokolova   if (zz == yy) {
368*7072be85SIrina Sokolova     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
369*7072be85SIrina Sokolova      * with alpha and beta both set to 1.0. */
370*7072be85SIrina Sokolova     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,1.0,z);
371*7072be85SIrina Sokolova   } else {
372*7072be85SIrina Sokolova     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
373*7072be85SIrina Sokolova      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
374*7072be85SIrina Sokolova     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,z);
375*7072be85SIrina Sokolova     for (i=0; i<m; i++) {
376*7072be85SIrina Sokolova       z[i] += y[i];
377*7072be85SIrina Sokolova     }
378*7072be85SIrina Sokolova   }
379*7072be85SIrina Sokolova 
380*7072be85SIrina Sokolova   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
381*7072be85SIrina Sokolova   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
382*7072be85SIrina Sokolova   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
383*7072be85SIrina Sokolova   if (stat != SPARSE_STATUS_SUCCESS) {
384*7072be85SIrina Sokolova     PetscFunctionReturn(PETSC_ERR_LIB);
385*7072be85SIrina Sokolova   }
386*7072be85SIrina Sokolova   PetscFunctionReturn(0);
387*7072be85SIrina Sokolova }
388*7072be85SIrina Sokolova 
389*7072be85SIrina Sokolova PetscErrorCode MatMultTransposeAdd_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
390*7072be85SIrina Sokolova {
391*7072be85SIrina Sokolova   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
392*7072be85SIrina Sokolova   Mat_SeqBAIJMKL     *baijmkl=(Mat_SeqBAIJMKL*)A->spptr;
393*7072be85SIrina Sokolova   const PetscScalar *x;
394*7072be85SIrina Sokolova   PetscScalar       *y,*z;
395*7072be85SIrina Sokolova   PetscErrorCode    ierr;
396*7072be85SIrina Sokolova   PetscInt          n=a->nbs*A->rmap->bs;
397*7072be85SIrina Sokolova   PetscInt          i;
398*7072be85SIrina Sokolova 
399*7072be85SIrina Sokolova   /* Variables not in MatMultTransposeAdd_SeqBAIJ. */
400*7072be85SIrina Sokolova   sparse_status_t stat = SPARSE_STATUS_SUCCESS;
401*7072be85SIrina Sokolova 
402*7072be85SIrina Sokolova   PetscFunctionBegin;
403*7072be85SIrina Sokolova 
404*7072be85SIrina Sokolova   /* If there are no nonzero entries, set zz = yy and return immediately. */
405*7072be85SIrina Sokolova   if(!a->nz) {
406*7072be85SIrina Sokolova     PetscInt i;
407*7072be85SIrina Sokolova     ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
408*7072be85SIrina Sokolova     for (i=0; i<n; i++) {
409*7072be85SIrina Sokolova       z[i] = y[i];
410*7072be85SIrina Sokolova     }
411*7072be85SIrina Sokolova     ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
412*7072be85SIrina Sokolova     PetscFunctionReturn(0);
413*7072be85SIrina Sokolova   }
414*7072be85SIrina Sokolova 
415*7072be85SIrina Sokolova   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
416*7072be85SIrina Sokolova   ierr = VecGetArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
417*7072be85SIrina Sokolova 
418*7072be85SIrina Sokolova   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
419*7072be85SIrina Sokolova    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
420*7072be85SIrina Sokolova    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
421*7072be85SIrina Sokolova   if (!baijmkl->sparse_optimized) {
422*7072be85SIrina Sokolova     MatSeqBAIJMKL_create_mkl_handle(A);
423*7072be85SIrina Sokolova   }
424*7072be85SIrina Sokolova 
425*7072be85SIrina Sokolova   /* Call MKL sparse BLAS routine to do the MatMult. */
426*7072be85SIrina Sokolova   if (zz == yy) {
427*7072be85SIrina Sokolova     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
428*7072be85SIrina Sokolova      * with alpha and beta both set to 1.0. */
429*7072be85SIrina Sokolova     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,1.0,z);
430*7072be85SIrina Sokolova   } else {
431*7072be85SIrina Sokolova     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
432*7072be85SIrina Sokolova      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
433*7072be85SIrina Sokolova     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,z);
434*7072be85SIrina Sokolova     for (i=0; i<n; i++) {
435*7072be85SIrina Sokolova       z[i] += y[i];
436*7072be85SIrina Sokolova     }
437*7072be85SIrina Sokolova   }
438*7072be85SIrina Sokolova 
439*7072be85SIrina Sokolova   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
440*7072be85SIrina Sokolova   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
441*7072be85SIrina Sokolova   ierr = VecRestoreArrayPair(yy,zz,&y,&z);CHKERRQ(ierr);
442*7072be85SIrina Sokolova   if (stat != SPARSE_STATUS_SUCCESS) {
443*7072be85SIrina Sokolova     PetscFunctionReturn(PETSC_ERR_LIB);
444*7072be85SIrina Sokolova   }
445*7072be85SIrina Sokolova   PetscFunctionReturn(0);
446*7072be85SIrina Sokolova }
447*7072be85SIrina Sokolova 
448*7072be85SIrina Sokolova PetscErrorCode MatScale_SeqBAIJMKL(Mat inA,PetscScalar alpha)
449*7072be85SIrina Sokolova {
450*7072be85SIrina Sokolova   PetscErrorCode ierr;
451*7072be85SIrina Sokolova 
452*7072be85SIrina Sokolova   PetscFunctionBegin;
453*7072be85SIrina Sokolova   ierr = MatScale_SeqBAIJ(inA,alpha);CHKERRQ(ierr);
454*7072be85SIrina Sokolova   ierr = MatSeqBAIJMKL_create_mkl_handle(inA);CHKERRQ(ierr);
455*7072be85SIrina Sokolova   PetscFunctionReturn(0);
456*7072be85SIrina Sokolova }
457*7072be85SIrina Sokolova 
458*7072be85SIrina Sokolova PetscErrorCode MatDiagonalScale_SeqBAIJMKL(Mat A,Vec ll,Vec rr)
459*7072be85SIrina Sokolova {
460*7072be85SIrina Sokolova   PetscErrorCode ierr;
461*7072be85SIrina Sokolova 
462*7072be85SIrina Sokolova   PetscFunctionBegin;
463*7072be85SIrina Sokolova   ierr = MatDiagonalScale_SeqBAIJ(A,ll,rr);CHKERRQ(ierr);
464*7072be85SIrina Sokolova   ierr = MatSeqBAIJMKL_create_mkl_handle(A);CHKERRQ(ierr);
465*7072be85SIrina Sokolova   PetscFunctionReturn(0);
466*7072be85SIrina Sokolova }
467*7072be85SIrina Sokolova 
468*7072be85SIrina Sokolova PetscErrorCode MatAXPY_SeqBAIJMKL(Mat Y,PetscScalar a,Mat X,MatStructure str)
469*7072be85SIrina Sokolova {
470*7072be85SIrina Sokolova   PetscErrorCode ierr;
471*7072be85SIrina Sokolova 
472*7072be85SIrina Sokolova   PetscFunctionBegin;
473*7072be85SIrina Sokolova   ierr = MatAXPY_SeqBAIJ(Y,a,X,str);CHKERRQ(ierr);
474*7072be85SIrina Sokolova   if (str == SAME_NONZERO_PATTERN) {
475*7072be85SIrina Sokolova     /* MatAssemblyEnd() is not called if SAME_NONZERO_PATTERN, so we need to force update of the MKL matrix handle. */
476*7072be85SIrina Sokolova     ierr = MatSeqBAIJMKL_create_mkl_handle(Y);CHKERRQ(ierr);
477*7072be85SIrina Sokolova   }
478*7072be85SIrina Sokolova   PetscFunctionReturn(0);
479*7072be85SIrina Sokolova }
480*7072be85SIrina Sokolova /* MatConvert_SeqBAIJ_SeqBAIJMKL converts a SeqBAIJ matrix into a
481*7072be85SIrina Sokolova  * SeqBAIJMKL matrix.  This routine is called by the MatCreate_SeqMKLBAIJ()
482*7072be85SIrina Sokolova  * routine, but can also be used to convert an assembled SeqBAIJ matrix
483*7072be85SIrina Sokolova  * into a SeqBAIJMKL one. */
484*7072be85SIrina Sokolova PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
485*7072be85SIrina Sokolova {
486*7072be85SIrina Sokolova   PetscErrorCode ierr;
487*7072be85SIrina Sokolova   Mat            B = *newmat;
488*7072be85SIrina Sokolova   Mat_SeqBAIJMKL  *baijmkl;
489*7072be85SIrina Sokolova   PetscBool      sametype;
490*7072be85SIrina Sokolova 
491*7072be85SIrina Sokolova   PetscFunctionBegin;
492*7072be85SIrina Sokolova   if (reuse == MAT_INITIAL_MATRIX) {
493*7072be85SIrina Sokolova     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
494*7072be85SIrina Sokolova   }
495*7072be85SIrina Sokolova 
496*7072be85SIrina Sokolova   ierr = PetscObjectTypeCompare((PetscObject)A,type,&sametype);CHKERRQ(ierr);
497*7072be85SIrina Sokolova   if (sametype) PetscFunctionReturn(0);
498*7072be85SIrina Sokolova 
499*7072be85SIrina Sokolova   ierr     = PetscNewLog(B,&baijmkl);CHKERRQ(ierr);
500*7072be85SIrina Sokolova   B->spptr = (void*) baijmkl;
501*7072be85SIrina Sokolova 
502*7072be85SIrina Sokolova   /* Set function pointers for methods that we inherit from BAIJ but override.
503*7072be85SIrina Sokolova    * We also parse some command line options below, since those determine some of the methods we point to. */
504*7072be85SIrina Sokolova   B->ops->duplicate        = MatDuplicate_SeqBAIJMKL;
505*7072be85SIrina Sokolova   B->ops->assemblyend      = MatAssemblyEnd_SeqBAIJMKL;
506*7072be85SIrina Sokolova   B->ops->destroy          = MatDestroy_SeqBAIJMKL;
507*7072be85SIrina Sokolova   B->ops->mult             = MatMult_SeqBAIJMKL_SpMV2;
508*7072be85SIrina Sokolova   B->ops->multtranspose    = MatMultTranspose_SeqBAIJMKL_SpMV2;
509*7072be85SIrina Sokolova   B->ops->multadd          = MatMultAdd_SeqBAIJMKL_SpMV2;
510*7072be85SIrina Sokolova   B->ops->multtransposeadd = MatMultTransposeAdd_SeqBAIJMKL_SpMV2;
511*7072be85SIrina Sokolova   B->ops->scale            = MatScale_SeqBAIJMKL;
512*7072be85SIrina Sokolova   B->ops->diagonalscale    = MatDiagonalScale_SeqBAIJMKL;
513*7072be85SIrina Sokolova   B->ops->axpy             = MatAXPY_SeqBAIJMKL;
514*7072be85SIrina Sokolova 
515*7072be85SIrina Sokolova   baijmkl->sparse_optimized = PETSC_FALSE;
516*7072be85SIrina Sokolova 
517*7072be85SIrina Sokolova   ierr = PetscObjectComposeFunction((PetscObject)B,"MatScale_SeqBAIJMKL_C",MatScale_SeqBAIJMKL);CHKERRQ(ierr);
518*7072be85SIrina Sokolova   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaijmkl_seqbaij_C",MatConvert_SeqBAIJMKL_SeqBAIJ);CHKERRQ(ierr);
519*7072be85SIrina Sokolova 
520*7072be85SIrina Sokolova   ierr    = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJMKL);CHKERRQ(ierr);
521*7072be85SIrina Sokolova   *newmat = B;
522*7072be85SIrina Sokolova   PetscFunctionReturn(0);
523*7072be85SIrina Sokolova }
524*7072be85SIrina Sokolova #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
525*7072be85SIrina Sokolova /*@C
526*7072be85SIrina Sokolova    MatCreateSeqBAIJMKL - Creates a sparse matrix of type SEQBAIJMKL.
527*7072be85SIrina Sokolova    This type inherits from BAIJ and is largely identical, but uses sparse BLAS
528*7072be85SIrina Sokolova    routines from Intel MKL whenever possible.
529*7072be85SIrina Sokolova    MatMult, MatMultAdd, MatMultTranspose, and MatMultTransposeAdd
530*7072be85SIrina Sokolova    operations are currently supported.
531*7072be85SIrina Sokolova    If the installed version of MKL supports the "SpMV2" sparse
532*7072be85SIrina Sokolova    inspector-executor routines, then those are used by default.
533*7072be85SIrina Sokolova    Default PETSc kernels are used otherwise.
534*7072be85SIrina Sokolova 
535*7072be85SIrina Sokolova    Input Parameters:
536*7072be85SIrina Sokolova +  comm - MPI communicator, set to PETSC_COMM_SELF
537*7072be85SIrina Sokolova .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
538*7072be85SIrina Sokolova           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
539*7072be85SIrina Sokolova .  m - number of rows
540*7072be85SIrina Sokolova .  n - number of columns
541*7072be85SIrina Sokolova .  nz - number of nonzero blocks  per block row (same for all rows)
542*7072be85SIrina Sokolova -  nnz - array containing the number of nonzero blocks in the various block rows
543*7072be85SIrina Sokolova          (possibly different for each block row) or NULL
544*7072be85SIrina Sokolova 
545*7072be85SIrina Sokolova 
546*7072be85SIrina Sokolova    Output Parameter:
547*7072be85SIrina Sokolova .  A - the matrix
548*7072be85SIrina Sokolova 
549*7072be85SIrina Sokolova    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
550*7072be85SIrina Sokolova    MatXXXXSetPreallocation() paradgm instead of this routine directly.
551*7072be85SIrina Sokolova    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
552*7072be85SIrina Sokolova 
553*7072be85SIrina Sokolova    Options Database Keys:
554*7072be85SIrina Sokolova .   -mat_no_unroll - uses code that does not unroll the loops in the
555*7072be85SIrina Sokolova                      block calculations (much slower)
556*7072be85SIrina Sokolova .    -mat_block_size - size of the blocks to use
557*7072be85SIrina Sokolova 
558*7072be85SIrina Sokolova    Level: intermediate
559*7072be85SIrina Sokolova 
560*7072be85SIrina Sokolova    Notes:
561*7072be85SIrina Sokolova    The number of rows and columns must be divisible by blocksize.
562*7072be85SIrina Sokolova 
563*7072be85SIrina Sokolova    If the nnz parameter is given then the nz parameter is ignored
564*7072be85SIrina Sokolova 
565*7072be85SIrina Sokolova    A nonzero block is any block that as 1 or more nonzeros in it
566*7072be85SIrina Sokolova 
567*7072be85SIrina Sokolova    The block AIJ format is fully compatible with standard Fortran 77
568*7072be85SIrina Sokolova    storage.  That is, the stored row and column indices can begin at
569*7072be85SIrina Sokolova    either one (as in Fortran) or zero.  See the users' manual for details.
570*7072be85SIrina Sokolova 
571*7072be85SIrina Sokolova    Specify the preallocated storage with either nz or nnz (not both).
572*7072be85SIrina Sokolova    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
573*7072be85SIrina Sokolova    allocation.  See Users-Manual: ch_mat for details.
574*7072be85SIrina Sokolova    matrices.
575*7072be85SIrina Sokolova 
576*7072be85SIrina Sokolova .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
577*7072be85SIrina Sokolova 
578*7072be85SIrina Sokolova @*/
579*7072be85SIrina Sokolova PetscErrorCode  MatCreateSeqBAIJMKL(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
580*7072be85SIrina Sokolova {
581*7072be85SIrina Sokolova   PetscErrorCode ierr;
582*7072be85SIrina Sokolova 
583*7072be85SIrina Sokolova   PetscFunctionBegin;
584*7072be85SIrina Sokolova   ierr = MatCreate(comm,A);CHKERRQ(ierr);
585*7072be85SIrina Sokolova   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
586*7072be85SIrina Sokolova #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
587*7072be85SIrina Sokolova   ierr = MatSetType(*A,MATSEQBAIJMKL);CHKERRQ(ierr);
588*7072be85SIrina Sokolova   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
589*7072be85SIrina Sokolova #else
590*7072be85SIrina Sokolova   ierr = PetscInfo(A,"MKL baij routines are not supported for used version of MKL. Using PETSc default routines. \n Please use version of MKL 11.3 and higher. \n");
591*7072be85SIrina Sokolova   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
592*7072be85SIrina Sokolova   ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
593*7072be85SIrina Sokolova #endif
594*7072be85SIrina Sokolova   PetscFunctionReturn(0);
595*7072be85SIrina Sokolova }
596*7072be85SIrina Sokolova PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJMKL(Mat A)
597*7072be85SIrina Sokolova {
598*7072be85SIrina Sokolova   PetscErrorCode ierr;
599*7072be85SIrina Sokolova 
600*7072be85SIrina Sokolova   PetscFunctionBegin;
601*7072be85SIrina Sokolova   ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr);
602*7072be85SIrina Sokolova #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
603*7072be85SIrina Sokolova   ierr = MatConvert_SeqBAIJ_SeqBAIJMKL(A,MATSEQBAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
604*7072be85SIrina Sokolova #else
605*7072be85SIrina Sokolova   ierr = PetscInfo(A,"MKL baij routines are not supported for used version of MKL. Using PETSc default routines. \n Please use version of MKL 11.3 and higher. \n");
606*7072be85SIrina Sokolova #endif
607*7072be85SIrina Sokolova   PetscFunctionReturn(0);
608*7072be85SIrina Sokolova }
609