xref: /petsc/src/mat/impls/baij/mpi/baijmkl/mpibaijmkl.c (revision e4094ef18e7e53fda86cf35f3a47fda48a8e77d8)
1 #include <../src/mat/impls/baij/mpi/mpibaij.h>
2 
3 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat, MatType, MatReuse, Mat *);
4 
5 static PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJMKL(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz)
6 {
7   Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data;
8 
9   PetscFunctionBegin;
10   PetscCall(MatMPIBAIJSetPreallocation_MPIBAIJ(B, bs, d_nz, d_nnz, o_nz, o_nnz));
11   PetscCall(MatConvert_SeqBAIJ_SeqBAIJMKL(b->A, MATSEQBAIJMKL, MAT_INPLACE_MATRIX, &b->A));
12   PetscCall(MatConvert_SeqBAIJ_SeqBAIJMKL(b->B, MATSEQBAIJMKL, MAT_INPLACE_MATRIX, &b->B));
13   PetscFunctionReturn(PETSC_SUCCESS);
14 }
15 
16 static PetscErrorCode MatConvert_MPIBAIJ_MPIBAIJMKL(Mat A, MatType type, MatReuse reuse, Mat *newmat)
17 {
18   Mat B = *newmat;
19 
20   PetscFunctionBegin;
21   if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B));
22 
23   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIBAIJMKL));
24   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocation_C", MatMPIBAIJSetPreallocation_MPIBAIJMKL));
25   *newmat = B;
26   PetscFunctionReturn(PETSC_SUCCESS);
27 }
28 
29 /*@C
30   MatCreateBAIJMKL - Creates a sparse parallel matrix in `MATBAIJMKL` format (block compressed row).
31 
32   Collective
33 
34   Input Parameters:
35 + comm  - MPI communicator
36 . bs    - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
37           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
38 . m     - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given)
39            This value should be the same as the local size used in creating the
40            y vector for the matrix-vector product y = Ax.
41 . n     - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given)
42            This value should be the same as the local size used in creating the
43            x vector for the matrix-vector product y = Ax.
44 . M     - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
45 . N     - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
46 . d_nz  - number of nonzero blocks per block row in diagonal portion of local
47            submatrix  (same for all local rows)
48 . d_nnz - array containing the number of nonzero blocks in the various block rows
49            of the in diagonal portion of the local (possibly different for each block
50            row) or `NULL`.  If you plan to factor the matrix you must leave room for the diagonal entry
51            and set it even if it is zero.
52 . o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
53            submatrix (same for all local rows).
54 - o_nnz - array containing the number of nonzero blocks in the various block rows of the
55            off-diagonal portion of the local submatrix (possibly different for
56            each block row) or `NULL`.
57 
58   Output Parameter:
59 . A - the matrix
60 
61   Options Database Keys:
62 + -mat_block_size            - size of the blocks to use
63 - -mat_use_hash_table <fact> - set hash table factor
64 
65   Level: intermediate
66 
67   Notes:
68   It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
69   MatXXXXSetPreallocation() paradigm instead of this routine directly.
70   [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
71 
72   This type inherits from `MATBAIJ` and is largely identical, but uses sparse BLAS
73   routines from Intel MKL whenever possible.
74   `MatMult()`, `MatMultAdd()`, `MatMultTranspose()`, and `MatMultTransposeAdd()`
75   operations are currently supported.
76   If the installed version of MKL supports the "SpMV2" sparse
77   inspector-executor routines, then those are used by default.
78   Default PETSc kernels are used otherwise.
79   For good matrix assembly performance the user should preallocate the matrix
80   storage by setting the parameters `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
81   By setting these parameters accurately, performance can be increased by more
82   than a factor of 50.
83 
84   If the *_nnz parameter is given then the *_nz parameter is ignored
85 
86   A nonzero block is any block that as 1 or more nonzeros in it
87 
88   The user MUST specify either the local or global matrix dimensions
89   (possibly both).
90 
91   If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one processor
92   than it must be used on all processors that share the object for that argument.
93 
94   Storage Information:
95   For a square global matrix we define each processor's diagonal portion
96   to be its local rows and the corresponding columns (a square submatrix);
97   each processor's off-diagonal portion encompasses the remainder of the
98   local matrix (a rectangular submatrix).
99 
100   The user can specify preallocated storage for the diagonal part of
101   the local submatrix with either `d_nz` or `d_nnz` (not both).  Set
102   `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
103   memory allocation.  Likewise, specify preallocated storage for the
104   off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).
105 
106   Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
107   the figure below we depict these three local rows and all columns (0-11).
108 
109 .vb
110            0 1 2 3 4 5 6 7 8 9 10 11
111           --------------------------
112    row 3  |o o o d d d o o o o  o  o
113    row 4  |o o o d d d o o o o  o  o
114    row 5  |o o o d d d o o o o  o  o
115           --------------------------
116 .ve
117 
118   Thus, any entries in the d locations are stored in the d (diagonal)
119   submatrix, and any entries in the o locations are stored in the
120   o (off-diagonal) submatrix.  Note that the d and the o submatrices are
121   stored simply in the `MATSEQBAIJMKL` format for compressed row storage.
122 
123   Now `d_nz` should indicate the number of block nonzeros per row in the d matrix,
124   and `o_nz` should indicate the number of block nonzeros per row in the o matrix.
125   In general, for PDE problems in which most nonzeros are near the diagonal,
126   one expects `d_nz` >> `o_nz`.
127 
128 .seealso: [](ch_matrices), `Mat`, `MATBAIJMKL`, `MATBAIJ`, `MatCreate()`, `MatCreateSeqBAIJMKL()`, `MatSetValues()`, `MatCreateBAIJMKL()`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`
129 @*/
130 PetscErrorCode MatCreateBAIJMKL(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
131 {
132   PetscMPIInt size;
133 
134   PetscFunctionBegin;
135   PetscCall(MatCreate(comm, A));
136   PetscCall(MatSetSizes(*A, m, n, M, N));
137   PetscCallMPI(MPI_Comm_size(comm, &size));
138   if (size > 1) {
139     PetscCall(MatSetType(*A, MATMPIBAIJMKL));
140     PetscCall(MatMPIBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz));
141   } else {
142     PetscCall(MatSetType(*A, MATSEQBAIJMKL));
143     PetscCall(MatSeqBAIJSetPreallocation(*A, bs, d_nz, d_nnz));
144   }
145   PetscFunctionReturn(PETSC_SUCCESS);
146 }
147 
148 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJMKL(Mat A)
149 {
150   PetscFunctionBegin;
151   PetscCall(MatSetType(A, MATMPIBAIJ));
152   PetscCall(MatConvert_MPIBAIJ_MPIBAIJMKL(A, MATMPIBAIJMKL, MAT_INPLACE_MATRIX, &A));
153   PetscFunctionReturn(PETSC_SUCCESS);
154 }
155 
156 /*MC
157    MATBAIJMKL - MATBAIJMKL = "BAIJMKL" - A matrix type to be used for sparse matrices.
158 
159    This matrix type is identical to `MATSEQBAIJMKL` when constructed with a single process communicator,
160    and `MATMPIBAIJMKL` otherwise.  As a result, for single process communicators,
161   `MatSeqBAIJSetPreallocation()` is supported, and similarly `MatMPIBAIJSetPreallocation()` is supported
162   for communicators controlling multiple processes.  It is recommended that you call both of
163   the above preallocation routines for simplicity.
164 
165    Options Database Key:
166 . -mat_type baijmkl - sets the matrix type to `MATBAIJMKL` during a call to `MatSetFromOptions()`
167 
168   Level: beginner
169 
170 .seealso: [](ch_matrices), `Mat`, `MatCreateBAIJMKL()`, `MATSEQBAIJMKL`, `MATMPIBAIJMKL`
171 M*/
172