xref: /petsc/src/mat/impls/baij/mpi/baijmkl/mpibaijmkl.c (revision 2d30e087755efd99e28fdfe792ffbeb2ee1ea928)
1 #include <../src/mat/impls/baij/mpi/mpibaij.h>
2 
3 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat, MatType, MatReuse, Mat *);
4 
5 static PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJMKL(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz) {
6   Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data;
7 
8   PetscFunctionBegin;
9   PetscCall(MatMPIBAIJSetPreallocation_MPIBAIJ(B, bs, d_nz, d_nnz, o_nz, o_nnz));
10   PetscCall(MatConvert_SeqBAIJ_SeqBAIJMKL(b->A, MATSEQBAIJMKL, MAT_INPLACE_MATRIX, &b->A));
11   PetscCall(MatConvert_SeqBAIJ_SeqBAIJMKL(b->B, MATSEQBAIJMKL, MAT_INPLACE_MATRIX, &b->B));
12   PetscFunctionReturn(0);
13 }
14 
15 static PetscErrorCode MatConvert_MPIBAIJ_MPIBAIJMKL(Mat A, MatType type, MatReuse reuse, Mat *newmat) {
16   Mat B = *newmat;
17 
18   PetscFunctionBegin;
19   if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B));
20 
21   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIBAIJMKL));
22   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocation_C", MatMPIBAIJSetPreallocation_MPIBAIJMKL));
23   *newmat = B;
24   PetscFunctionReturn(0);
25 }
26 
27 /*@C
28    MatCreateBAIJMKL - Creates a sparse parallel matrix in `MATBAIJMKL` format
29    (block compressed row).
30    This type inherits from `MATBAIJ` and is largely identical, but uses sparse BLAS
31    routines from Intel MKL whenever possible.
32    `MatMult()`, `MatMultAdd()`, `MatMultTranspose()`, and `MatMultTransposeAdd()`
33    operations are currently supported.
34    If the installed version of MKL supports the "SpMV2" sparse
35    inspector-executor routines, then those are used by default.
36    Default PETSc kernels are used otherwise.
37    For good matrix assembly performance the user should preallocate the matrix
38    storage by setting the parameters d_nz (or d_nnz) and o_nz (or o_nnz).
39    By setting these parameters accurately, performance can be increased by more
40    than a factor of 50.
41 
42    Collective
43 
44    Input Parameters:
45 +  comm - MPI communicator
46 .  bs   - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
47           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
48 .  m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
49            This value should be the same as the local size used in creating the
50            y vector for the matrix-vector product y = Ax.
51 .  n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given)
52            This value should be the same as the local size used in creating the
53            x vector for the matrix-vector product y = Ax.
54 .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
55 .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
56 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
57            submatrix  (same for all local rows)
58 .  d_nnz - array containing the number of nonzero blocks in the various block rows
59            of the in diagonal portion of the local (possibly different for each block
60            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
61            and set it even if it is zero.
62 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
63            submatrix (same for all local rows).
64 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
65            off-diagonal portion of the local submatrix (possibly different for
66            each block row) or NULL.
67 
68    Output Parameter:
69 .  A - the matrix
70 
71    Options Database Keys:
72 +   -mat_block_size - size of the blocks to use
73 -   -mat_use_hash_table <fact> - set hash table factor
74 
75    It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
76    MatXXXXSetPreallocation() paradigm instead of this routine directly.
77    [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
78 
79    Notes:
80    If the *_nnz parameter is given then the *_nz parameter is ignored
81 
82    A nonzero block is any block that as 1 or more nonzeros in it
83 
84    The user MUST specify either the local or global matrix dimensions
85    (possibly both).
86 
87    If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one processor
88    than it must be used on all processors that share the object for that argument.
89 
90    Storage Information:
91    For a square global matrix we define each processor's diagonal portion
92    to be its local rows and the corresponding columns (a square submatrix);
93    each processor's off-diagonal portion encompasses the remainder of the
94    local matrix (a rectangular submatrix).
95 
96    The user can specify preallocated storage for the diagonal part of
97    the local submatrix with either d_nz or d_nnz (not both).  Set
98    d_nz = `PETSC_DEFAULT` and d_nnz = NULL for PETSc to control dynamic
99    memory allocation.  Likewise, specify preallocated storage for the
100    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
101 
102    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
103    the figure below we depict these three local rows and all columns (0-11).
104 
105 .vb
106            0 1 2 3 4 5 6 7 8 9 10 11
107           --------------------------
108    row 3  |o o o d d d o o o o  o  o
109    row 4  |o o o d d d o o o o  o  o
110    row 5  |o o o d d d o o o o  o  o
111           --------------------------
112 .ve
113 
114    Thus, any entries in the d locations are stored in the d (diagonal)
115    submatrix, and any entries in the o locations are stored in the
116    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
117    stored simply in the `MATSEQBAIJMKL` format for compressed row storage.
118 
119    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
120    and o_nz should indicate the number of block nonzeros per row in the o matrix.
121    In general, for PDE problems in which most nonzeros are near the diagonal,
122    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
123    or you will get TERRIBLE performance; see the users' manual chapter on
124    matrices.
125 
126    Level: intermediate
127 
128 .seealso: `MATBAIJMKL`, `MATBAIJ`, `MatCreate()`, `MatCreateSeqBAIJMKL()`, `MatSetValues()`, `MatCreateBAIJMKL()`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`
129 @*/
130 
131 PetscErrorCode MatCreateBAIJMKL(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) {
132   PetscMPIInt size;
133 
134   PetscFunctionBegin;
135   PetscCall(MatCreate(comm, A));
136   PetscCall(MatSetSizes(*A, m, n, M, N));
137   PetscCallMPI(MPI_Comm_size(comm, &size));
138   if (size > 1) {
139     PetscCall(MatSetType(*A, MATMPIBAIJMKL));
140     PetscCall(MatMPIBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz));
141   } else {
142     PetscCall(MatSetType(*A, MATSEQBAIJMKL));
143     PetscCall(MatSeqBAIJSetPreallocation(*A, bs, d_nz, d_nnz));
144   }
145   PetscFunctionReturn(0);
146 }
147 
148 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJMKL(Mat A) {
149   PetscFunctionBegin;
150   PetscCall(MatSetType(A, MATMPIBAIJ));
151   PetscCall(MatConvert_MPIBAIJ_MPIBAIJMKL(A, MATMPIBAIJMKL, MAT_INPLACE_MATRIX, &A));
152   PetscFunctionReturn(0);
153 }
154 
155 /*MC
156    MATBAIJMKL - MATBAIJMKL = "BAIJMKL" - A matrix type to be used for sparse matrices.
157 
158    This matrix type is identical to `MATSEQBAIJMKL` when constructed with a single process communicator,
159    and `MATMPIBAIJMKL` otherwise.  As a result, for single process communicators,
160   `MatSeqBAIJSetPreallocation()` is supported, and similarly `MatMPIBAIJSetPreallocation()` is supported
161   for communicators controlling multiple processes.  It is recommended that you call both of
162   the above preallocation routines for simplicity.
163 
164    Options Database Keys:
165 . -mat_type baijmkl - sets the matrix type to `MATBAIJMKL` during a call to `MatSetFromOptions()`
166 
167   Level: beginner
168 
169 .seealso: `MatCreateBAIJMKL()`, `MATSEQBAIJMKL`, `MATMPIBAIJMKL`
170 M*/
171