xref: /petsc/src/mat/impls/baij/mpi/baijmkl/mpibaijmkl.c (revision 7072be85568e48e0f9849a83abce647ecf3f07b4)
1*7072be85SIrina Sokolova #include <../src/mat/impls/baij/mpi/mpibaij.h>
2*7072be85SIrina Sokolova 
3*7072be85SIrina Sokolova #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
4*7072be85SIrina Sokolova PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat,MatType,MatReuse,Mat*);
5*7072be85SIrina Sokolova 
6*7072be85SIrina Sokolova PetscErrorCode  MatMPIBAIJSetPreallocation_MPIBAIJMKL(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
7*7072be85SIrina Sokolova                                                    //(Mat B,            PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
8*7072be85SIrina Sokolova {
9*7072be85SIrina Sokolova   Mat_MPIBAIJ     *b = (Mat_MPIBAIJ*)B->data;
10*7072be85SIrina Sokolova   PetscErrorCode ierr;
11*7072be85SIrina Sokolova 
12*7072be85SIrina Sokolova   PetscFunctionBegin;
13*7072be85SIrina Sokolova   ierr = MatMPIBAIJSetPreallocation_MPIBAIJ(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
14*7072be85SIrina Sokolova   ierr = MatConvert_SeqBAIJ_SeqBAIJMKL(b->A, MATSEQBAIJMKL, MAT_INPLACE_MATRIX, &b->A);CHKERRQ(ierr);
15*7072be85SIrina Sokolova   ierr = MatConvert_SeqBAIJ_SeqBAIJMKL(b->B, MATSEQBAIJMKL, MAT_INPLACE_MATRIX, &b->B);CHKERRQ(ierr);
16*7072be85SIrina Sokolova   PetscFunctionReturn(0);
17*7072be85SIrina Sokolova }
18*7072be85SIrina Sokolova 
19*7072be85SIrina Sokolova PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
20*7072be85SIrina Sokolova {
21*7072be85SIrina Sokolova   PetscErrorCode ierr;
22*7072be85SIrina Sokolova   Mat            B = *newmat;
23*7072be85SIrina Sokolova 
24*7072be85SIrina Sokolova   PetscFunctionBegin;
25*7072be85SIrina Sokolova   if (reuse == MAT_INITIAL_MATRIX) {
26*7072be85SIrina Sokolova     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
27*7072be85SIrina Sokolova   }
28*7072be85SIrina Sokolova 
29*7072be85SIrina Sokolova   ierr = PetscObjectChangeTypeName((PetscObject) B, MATMPIBAIJMKL);CHKERRQ(ierr);
30*7072be85SIrina Sokolova   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJMKL);CHKERRQ(ierr);
31*7072be85SIrina Sokolova   *newmat = B;
32*7072be85SIrina Sokolova   PetscFunctionReturn(0);
33*7072be85SIrina Sokolova }
34*7072be85SIrina Sokolova #endif
35*7072be85SIrina Sokolova /*@C
36*7072be85SIrina Sokolova    MatCreateBAIJMKL - Creates a sparse parallel matrix in block AIJ format
37*7072be85SIrina Sokolova    (block compressed row).
38*7072be85SIrina Sokolova    This type inherits from BAIJ and is largely identical, but uses sparse BLAS
39*7072be85SIrina Sokolova    routines from Intel MKL whenever possible.
40*7072be85SIrina Sokolova    MatMult, MatMultAdd, MatMultTranspose, and MatMultTransposeAdd
41*7072be85SIrina Sokolova    operations are currently supported.
42*7072be85SIrina Sokolova    If the installed version of MKL supports the "SpMV2" sparse
43*7072be85SIrina Sokolova    inspector-executor routines, then those are used by default.
44*7072be85SIrina Sokolova    Default PETSc kernels are used otherwise.
45*7072be85SIrina Sokolova    For good matrix assembly performance the user should preallocate the matrix
46*7072be85SIrina Sokolova    storage by setting the parameters d_nz (or d_nnz) and o_nz (or o_nnz).
47*7072be85SIrina Sokolova    By setting these parameters accurately, performance can be increased by more
48*7072be85SIrina Sokolova    than a factor of 50.
49*7072be85SIrina Sokolova 
50*7072be85SIrina Sokolova    Collective on MPI_Comm
51*7072be85SIrina Sokolova 
52*7072be85SIrina Sokolova    Input Parameters:
53*7072be85SIrina Sokolova +  comm - MPI communicator
54*7072be85SIrina Sokolova .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
55*7072be85SIrina Sokolova           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
56*7072be85SIrina Sokolova .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
57*7072be85SIrina Sokolova            This value should be the same as the local size used in creating the
58*7072be85SIrina Sokolova            y vector for the matrix-vector product y = Ax.
59*7072be85SIrina Sokolova .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
60*7072be85SIrina Sokolova            This value should be the same as the local size used in creating the
61*7072be85SIrina Sokolova            x vector for the matrix-vector product y = Ax.
62*7072be85SIrina Sokolova .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
63*7072be85SIrina Sokolova .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
64*7072be85SIrina Sokolova .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
65*7072be85SIrina Sokolova            submatrix  (same for all local rows)
66*7072be85SIrina Sokolova .  d_nnz - array containing the number of nonzero blocks in the various block rows
67*7072be85SIrina Sokolova            of the in diagonal portion of the local (possibly different for each block
68*7072be85SIrina Sokolova            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
69*7072be85SIrina Sokolova            and set it even if it is zero.
70*7072be85SIrina Sokolova .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
71*7072be85SIrina Sokolova            submatrix (same for all local rows).
72*7072be85SIrina Sokolova -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
73*7072be85SIrina Sokolova            off-diagonal portion of the local submatrix (possibly different for
74*7072be85SIrina Sokolova            each block row) or NULL.
75*7072be85SIrina Sokolova 
76*7072be85SIrina Sokolova    Output Parameter:
77*7072be85SIrina Sokolova .  A - the matrix
78*7072be85SIrina Sokolova 
79*7072be85SIrina Sokolova    Options Database Keys:
80*7072be85SIrina Sokolova +   -mat_block_size - size of the blocks to use
81*7072be85SIrina Sokolova -   -mat_use_hash_table <fact>
82*7072be85SIrina Sokolova 
83*7072be85SIrina Sokolova    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
84*7072be85SIrina Sokolova    MatXXXXSetPreallocation() paradgm instead of this routine directly.
85*7072be85SIrina Sokolova    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
86*7072be85SIrina Sokolova 
87*7072be85SIrina Sokolova    Notes:
88*7072be85SIrina Sokolova    If the *_nnz parameter is given then the *_nz parameter is ignored
89*7072be85SIrina Sokolova 
90*7072be85SIrina Sokolova    A nonzero block is any block that as 1 or more nonzeros in it
91*7072be85SIrina Sokolova 
92*7072be85SIrina Sokolova    The user MUST specify either the local or global matrix dimensions
93*7072be85SIrina Sokolova    (possibly both).
94*7072be85SIrina Sokolova 
95*7072be85SIrina Sokolova    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
96*7072be85SIrina Sokolova    than it must be used on all processors that share the object for that argument.
97*7072be85SIrina Sokolova 
98*7072be85SIrina Sokolova    Storage Information:
99*7072be85SIrina Sokolova    For a square global matrix we define each processor's diagonal portion
100*7072be85SIrina Sokolova    to be its local rows and the corresponding columns (a square submatrix);
101*7072be85SIrina Sokolova    each processor's off-diagonal portion encompasses the remainder of the
102*7072be85SIrina Sokolova    local matrix (a rectangular submatrix).
103*7072be85SIrina Sokolova 
104*7072be85SIrina Sokolova    The user can specify preallocated storage for the diagonal part of
105*7072be85SIrina Sokolova    the local submatrix with either d_nz or d_nnz (not both).  Set
106*7072be85SIrina Sokolova    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
107*7072be85SIrina Sokolova    memory allocation.  Likewise, specify preallocated storage for the
108*7072be85SIrina Sokolova    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
109*7072be85SIrina Sokolova 
110*7072be85SIrina Sokolova    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
111*7072be85SIrina Sokolova    the figure below we depict these three local rows and all columns (0-11).
112*7072be85SIrina Sokolova 
113*7072be85SIrina Sokolova .vb
114*7072be85SIrina Sokolova            0 1 2 3 4 5 6 7 8 9 10 11
115*7072be85SIrina Sokolova           --------------------------
116*7072be85SIrina Sokolova    row 3  |o o o d d d o o o o  o  o
117*7072be85SIrina Sokolova    row 4  |o o o d d d o o o o  o  o
118*7072be85SIrina Sokolova    row 5  |o o o d d d o o o o  o  o
119*7072be85SIrina Sokolova           --------------------------
120*7072be85SIrina Sokolova .ve
121*7072be85SIrina Sokolova 
122*7072be85SIrina Sokolova    Thus, any entries in the d locations are stored in the d (diagonal)
123*7072be85SIrina Sokolova    submatrix, and any entries in the o locations are stored in the
124*7072be85SIrina Sokolova    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
125*7072be85SIrina Sokolova    stored simply in the MATSEQBAIJMKL format for compressed row storage.
126*7072be85SIrina Sokolova 
127*7072be85SIrina Sokolova    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
128*7072be85SIrina Sokolova    and o_nz should indicate the number of block nonzeros per row in the o matrix.
129*7072be85SIrina Sokolova    In general, for PDE problems in which most nonzeros are near the diagonal,
130*7072be85SIrina Sokolova    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
131*7072be85SIrina Sokolova    or you will get TERRIBLE performance; see the users' manual chapter on
132*7072be85SIrina Sokolova    matrices.
133*7072be85SIrina Sokolova 
134*7072be85SIrina Sokolova    Level: intermediate
135*7072be85SIrina Sokolova 
136*7072be85SIrina Sokolova .keywords: matrix, block, aij, compressed row, sparse, parallel
137*7072be85SIrina Sokolova 
138*7072be85SIrina Sokolova .seealso: MatCreate(), MatCreateSeqBAIJMKL(), MatSetValues(), MatCreateBAIJMKL(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
139*7072be85SIrina Sokolova @*/
140*7072be85SIrina Sokolova 
141*7072be85SIrina Sokolova PetscErrorCode  MatCreateBAIJMKL(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
142*7072be85SIrina Sokolova {
143*7072be85SIrina Sokolova   PetscErrorCode ierr;
144*7072be85SIrina Sokolova   PetscMPIInt    size;
145*7072be85SIrina Sokolova 
146*7072be85SIrina Sokolova   PetscFunctionBegin;
147*7072be85SIrina Sokolova   ierr = MatCreate(comm,A);CHKERRQ(ierr);
148*7072be85SIrina Sokolova   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
149*7072be85SIrina Sokolova   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
150*7072be85SIrina Sokolova   if (size > 1) {
151*7072be85SIrina Sokolova #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
152*7072be85SIrina Sokolova     ierr = MatSetType(*A,MATMPIBAIJMKL);CHKERRQ(ierr);
153*7072be85SIrina Sokolova #else
154*7072be85SIrina Sokolova     ierr = PetscInfo(A,"MKL baij routines are not supported for used version of MKL. Using PETSc default routines. \n Please use version of MKL 11.3 and higher. \n");
155*7072be85SIrina Sokolova     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
156*7072be85SIrina Sokolova #endif
157*7072be85SIrina Sokolova     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
158*7072be85SIrina Sokolova   } else {
159*7072be85SIrina Sokolova #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
160*7072be85SIrina Sokolova     ierr = MatSetType(*A,MATSEQBAIJMKL);CHKERRQ(ierr);
161*7072be85SIrina Sokolova #else
162*7072be85SIrina Sokolova     ierr = PetscInfo(A,"MKL baij routines are not supported for used version of MKL. Using PETSc default routines. \n Please use version of MKL 11.3 and higher. \n");
163*7072be85SIrina Sokolova     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
164*7072be85SIrina Sokolova #endif
165*7072be85SIrina Sokolova     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
166*7072be85SIrina Sokolova   }
167*7072be85SIrina Sokolova   PetscFunctionReturn(0);
168*7072be85SIrina Sokolova }
169*7072be85SIrina Sokolova 
170*7072be85SIrina Sokolova PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJMKL(Mat A)
171*7072be85SIrina Sokolova {
172*7072be85SIrina Sokolova   PetscErrorCode ierr;
173*7072be85SIrina Sokolova 
174*7072be85SIrina Sokolova   PetscFunctionBegin;
175*7072be85SIrina Sokolova   ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
176*7072be85SIrina Sokolova #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
177*7072be85SIrina Sokolova   ierr = MatConvert_MPIBAIJ_MPIBAIJMKL(A,MATMPIBAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
178*7072be85SIrina Sokolova #else
179*7072be85SIrina Sokolova   ierr = PetscInfo(A,"MKL baij routines are not supported for used version of MKL. Using PETSc default routines. \n Please use version of MKL 11.3 and higher. \n");
180*7072be85SIrina Sokolova #endif
181*7072be85SIrina Sokolova   PetscFunctionReturn(0);
182*7072be85SIrina Sokolova }
183*7072be85SIrina Sokolova 
184*7072be85SIrina Sokolova /*MC
185*7072be85SIrina Sokolova    MATBAIJMKL - MATBAIJMKL = "BAIJMKL" - A matrix type to be used for sparse matrices.
186*7072be85SIrina Sokolova 
187*7072be85SIrina Sokolova    This matrix type is identical to MATSEQBAIJMKL when constructed with a single process communicator,
188*7072be85SIrina Sokolova    and MATMPIBAIJMKL otherwise.  As a result, for single process communicators,
189*7072be85SIrina Sokolova   MatSeqBAIJSetPreallocation() is supported, and similarly MatMPIBAIJSetPreallocation() is supported
190*7072be85SIrina Sokolova   for communicators controlling multiple processes.  It is recommended that you call both of
191*7072be85SIrina Sokolova   the above preallocation routines for simplicity.
192*7072be85SIrina Sokolova 
193*7072be85SIrina Sokolova    Options Database Keys:
194*7072be85SIrina Sokolova . -mat_type baijmkl - sets the matrix type to "BAIJMKL" during a call to MatSetFromOptions()
195*7072be85SIrina Sokolova 
196*7072be85SIrina Sokolova   Level: beginner
197*7072be85SIrina Sokolova 
198*7072be85SIrina Sokolova .seealso: MatCreateMPIBAIJMKL(), MATSEQBAIJMKL, MATMPIBAIJMKL
199*7072be85SIrina Sokolova M*/
200*7072be85SIrina Sokolova 
201