xref: /petsc/src/mat/impls/baij/mpi/baijmkl/mpibaijmkl.c (revision 37eeb8152ec6a2cf24186d3591c2c5de5dfd8fa5)
1 #include <../src/mat/impls/baij/mpi/mpibaij.h>
2 
3 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
4 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat,MatType,MatReuse,Mat*);
5 
6 PetscErrorCode  MatMPIBAIJSetPreallocation_MPIBAIJMKL(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
7 {
8   Mat_MPIBAIJ     *b = (Mat_MPIBAIJ*)B->data;
9   PetscErrorCode ierr;
10 
11   PetscFunctionBegin;
12   ierr = MatMPIBAIJSetPreallocation_MPIBAIJ(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
13   ierr = MatConvert_SeqBAIJ_SeqBAIJMKL(b->A,MATSEQBAIJMKL,MAT_INPLACE_MATRIX,&b->A);CHKERRQ(ierr);
14   ierr = MatConvert_SeqBAIJ_SeqBAIJMKL(b->B,MATSEQBAIJMKL,MAT_INPLACE_MATRIX,&b->B);CHKERRQ(ierr);
15   PetscFunctionReturn(0);
16 }
17 
18 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
19 {
20   PetscErrorCode ierr;
21   Mat            B = *newmat;
22 
23   PetscFunctionBegin;
24   if (reuse == MAT_INITIAL_MATRIX) {
25     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
26   }
27 
28   ierr = PetscObjectChangeTypeName((PetscObject) B, MATMPIBAIJMKL);CHKERRQ(ierr);
29   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJMKL);CHKERRQ(ierr);
30   *newmat = B;
31   PetscFunctionReturn(0);
32 }
33 #endif
34 /*@C
35    MatCreateBAIJMKL - Creates a sparse parallel matrix in block AIJ format
36    (block compressed row).
37    This type inherits from BAIJ and is largely identical, but uses sparse BLAS
38    routines from Intel MKL whenever possible.
39    MatMult, MatMultAdd, MatMultTranspose, and MatMultTransposeAdd
40    operations are currently supported.
41    If the installed version of MKL supports the "SpMV2" sparse
42    inspector-executor routines, then those are used by default.
43    Default PETSc kernels are used otherwise.
44    For good matrix assembly performance the user should preallocate the matrix
45    storage by setting the parameters d_nz (or d_nnz) and o_nz (or o_nnz).
46    By setting these parameters accurately, performance can be increased by more
47    than a factor of 50.
48 
49    Collective
50 
51    Input Parameters:
52 +  comm - MPI communicator
53 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
54           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
55 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
56            This value should be the same as the local size used in creating the
57            y vector for the matrix-vector product y = Ax.
58 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
59            This value should be the same as the local size used in creating the
60            x vector for the matrix-vector product y = Ax.
61 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
62 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
63 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
64            submatrix  (same for all local rows)
65 .  d_nnz - array containing the number of nonzero blocks in the various block rows
66            of the in diagonal portion of the local (possibly different for each block
67            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
68            and set it even if it is zero.
69 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
70            submatrix (same for all local rows).
71 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
72            off-diagonal portion of the local submatrix (possibly different for
73            each block row) or NULL.
74 
75    Output Parameter:
76 .  A - the matrix
77 
78    Options Database Keys:
79 +   -mat_block_size - size of the blocks to use
80 -   -mat_use_hash_table <fact>
81 
82    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
83    MatXXXXSetPreallocation() paradigm instead of this routine directly.
84    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
85 
86    Notes:
87    If the *_nnz parameter is given then the *_nz parameter is ignored
88 
89    A nonzero block is any block that as 1 or more nonzeros in it
90 
91    The user MUST specify either the local or global matrix dimensions
92    (possibly both).
93 
94    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
95    than it must be used on all processors that share the object for that argument.
96 
97    Storage Information:
98    For a square global matrix we define each processor's diagonal portion
99    to be its local rows and the corresponding columns (a square submatrix);
100    each processor's off-diagonal portion encompasses the remainder of the
101    local matrix (a rectangular submatrix).
102 
103    The user can specify preallocated storage for the diagonal part of
104    the local submatrix with either d_nz or d_nnz (not both).  Set
105    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
106    memory allocation.  Likewise, specify preallocated storage for the
107    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
108 
109    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
110    the figure below we depict these three local rows and all columns (0-11).
111 
112 .vb
113            0 1 2 3 4 5 6 7 8 9 10 11
114           --------------------------
115    row 3  |o o o d d d o o o o  o  o
116    row 4  |o o o d d d o o o o  o  o
117    row 5  |o o o d d d o o o o  o  o
118           --------------------------
119 .ve
120 
121    Thus, any entries in the d locations are stored in the d (diagonal)
122    submatrix, and any entries in the o locations are stored in the
123    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
124    stored simply in the MATSEQBAIJMKL format for compressed row storage.
125 
126    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
127    and o_nz should indicate the number of block nonzeros per row in the o matrix.
128    In general, for PDE problems in which most nonzeros are near the diagonal,
129    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
130    or you will get TERRIBLE performance; see the users' manual chapter on
131    matrices.
132 
133    Level: intermediate
134 
135 .seealso: MatCreate(), MatCreateSeqBAIJMKL(), MatSetValues(), MatCreateBAIJMKL(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
136 @*/
137 
138 PetscErrorCode  MatCreateBAIJMKL(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
139 {
140   PetscErrorCode ierr;
141   PetscMPIInt    size;
142 
143   PetscFunctionBegin;
144   ierr = MatCreate(comm,A);CHKERRQ(ierr);
145   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
146   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
147   if (size > 1) {
148 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
149     ierr = MatSetType(*A,MATMPIBAIJMKL);CHKERRQ(ierr);
150 #else
151     ierr = PetscInfo(A,"MKL baij routines are not supported for used version of MKL. Using PETSc default routines. \n Please use version of MKL 11.3 and higher. \n");
152     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
153 #endif
154     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
155   } else {
156 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
157     ierr = MatSetType(*A,MATSEQBAIJMKL);CHKERRQ(ierr);
158 #else
159     ierr = PetscInfo(A,"MKL baij routines are not supported for used version of MKL. Using PETSc default routines. \n Please use version of MKL 11.3 and higher. \n");
160     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
161 #endif
162     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
163   }
164   PetscFunctionReturn(0);
165 }
166 
167 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJMKL(Mat A)
168 {
169   PetscErrorCode ierr;
170 
171   PetscFunctionBegin;
172   ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
173 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
174   ierr = MatConvert_MPIBAIJ_MPIBAIJMKL(A,MATMPIBAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
175 #else
176   ierr = PetscInfo(A,"MKL baij routines are not supported for used version of MKL. Using PETSc default routines. \n Please use version of MKL 11.3 and higher. \n");
177 #endif
178   PetscFunctionReturn(0);
179 }
180 
181 /*MC
182    MATBAIJMKL - MATBAIJMKL = "BAIJMKL" - A matrix type to be used for sparse matrices.
183 
184    This matrix type is identical to MATSEQBAIJMKL when constructed with a single process communicator,
185    and MATMPIBAIJMKL otherwise.  As a result, for single process communicators,
186   MatSeqBAIJSetPreallocation() is supported, and similarly MatMPIBAIJSetPreallocation() is supported
187   for communicators controlling multiple processes.  It is recommended that you call both of
188   the above preallocation routines for simplicity.
189 
190    Options Database Keys:
191 . -mat_type baijmkl - sets the matrix type to "BAIJMKL" during a call to MatSetFromOptions()
192 
193   Level: beginner
194 
195 .seealso: MatCreateBAIJMKL(), MATSEQBAIJMKL, MATMPIBAIJMKL
196 M*/
197 
198