xref: /petsc/src/mat/impls/baij/mpi/baijmkl/mpibaijmkl.c (revision a623e290c7eaa252b385564179837fe27521fbac)
1*5d83a8b1SBarry Smith #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I   "petscmat.h"   I*/
27072be85SIrina Sokolova 
37072be85SIrina Sokolova PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat, MatType, MatReuse, Mat *);
47072be85SIrina Sokolova 
MatMPIBAIJSetPreallocation_MPIBAIJMKL(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt * d_nnz,PetscInt o_nz,const PetscInt * o_nnz)5d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJMKL(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz)
6d71ae5a4SJacob Faibussowitsch {
77072be85SIrina Sokolova   Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data;
87072be85SIrina Sokolova 
97072be85SIrina Sokolova   PetscFunctionBegin;
109566063dSJacob Faibussowitsch   PetscCall(MatMPIBAIJSetPreallocation_MPIBAIJ(B, bs, d_nz, d_nnz, o_nz, o_nnz));
119566063dSJacob Faibussowitsch   PetscCall(MatConvert_SeqBAIJ_SeqBAIJMKL(b->A, MATSEQBAIJMKL, MAT_INPLACE_MATRIX, &b->A));
129566063dSJacob Faibussowitsch   PetscCall(MatConvert_SeqBAIJ_SeqBAIJMKL(b->B, MATSEQBAIJMKL, MAT_INPLACE_MATRIX, &b->B));
133ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
147072be85SIrina Sokolova }
157072be85SIrina Sokolova 
MatConvert_MPIBAIJ_MPIBAIJMKL(Mat A,MatType type,MatReuse reuse,Mat * newmat)16d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatConvert_MPIBAIJ_MPIBAIJMKL(Mat A, MatType type, MatReuse reuse, Mat *newmat)
17d71ae5a4SJacob Faibussowitsch {
187072be85SIrina Sokolova   Mat B = *newmat;
197072be85SIrina Sokolova 
207072be85SIrina Sokolova   PetscFunctionBegin;
2148a46eb9SPierre Jolivet   if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B));
227072be85SIrina Sokolova 
239566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIBAIJMKL));
249566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocation_C", MatMPIBAIJSetPreallocation_MPIBAIJMKL));
257072be85SIrina Sokolova   *newmat = B;
263ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
277072be85SIrina Sokolova }
28b9e7e5c1SBarry Smith 
292920cce0SJacob Faibussowitsch // PetscClangLinter pragma disable: -fdoc-section-header-unknown
30*5d83a8b1SBarry Smith /*@
3167be906fSBarry Smith   MatCreateBAIJMKL - Creates a sparse parallel matrix in `MATBAIJMKL` format (block compressed row).
327072be85SIrina Sokolova 
33d083f849SBarry Smith   Collective
347072be85SIrina Sokolova 
357072be85SIrina Sokolova   Input Parameters:
367072be85SIrina Sokolova + comm  - MPI communicator
3711a5261eSBarry Smith . bs    - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
3811a5261eSBarry Smith           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
392ef1f0ffSBarry Smith . m     - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given)
407072be85SIrina Sokolova           This value should be the same as the local size used in creating the
417072be85SIrina Sokolova           y vector for the matrix-vector product y = Ax.
422ef1f0ffSBarry Smith . n     - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given)
437072be85SIrina Sokolova           This value should be the same as the local size used in creating the
447072be85SIrina Sokolova           x vector for the matrix-vector product y = Ax.
452ef1f0ffSBarry Smith . M     - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
462ef1f0ffSBarry Smith . N     - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
477072be85SIrina Sokolova . d_nz  - number of nonzero blocks per block row in diagonal portion of local
487072be85SIrina Sokolova           submatrix  (same for all local rows)
497072be85SIrina Sokolova . d_nnz - array containing the number of nonzero blocks in the various block rows
507072be85SIrina Sokolova           of the in diagonal portion of the local (possibly different for each block
512ef1f0ffSBarry Smith           row) or `NULL`.  If you plan to factor the matrix you must leave room for the diagonal entry
527072be85SIrina Sokolova           and set it even if it is zero.
537072be85SIrina Sokolova . o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
547072be85SIrina Sokolova           submatrix (same for all local rows).
557072be85SIrina Sokolova - o_nnz - array containing the number of nonzero blocks in the various block rows of the
567072be85SIrina Sokolova           off-diagonal portion of the local submatrix (possibly different for
572ef1f0ffSBarry Smith           each block row) or `NULL`.
587072be85SIrina Sokolova 
597072be85SIrina Sokolova   Output Parameter:
607072be85SIrina Sokolova . A - the matrix
617072be85SIrina Sokolova 
627072be85SIrina Sokolova   Options Database Keys:
637072be85SIrina Sokolova + -mat_block_size            - size of the blocks to use
6467b8a455SSatish Balay - -mat_use_hash_table <fact> - set hash table factor
657072be85SIrina Sokolova 
662ef1f0ffSBarry Smith   Level: intermediate
672ef1f0ffSBarry Smith 
682ef1f0ffSBarry Smith   Notes:
6911a5261eSBarry Smith   It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
70f6f02116SRichard Tran Mills   MatXXXXSetPreallocation() paradigm instead of this routine directly.
7111a5261eSBarry Smith   [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
727072be85SIrina Sokolova 
7367be906fSBarry Smith   This type inherits from `MATBAIJ` and is largely identical, but uses sparse BLAS
7467be906fSBarry Smith   routines from Intel MKL whenever possible.
7567be906fSBarry Smith   `MatMult()`, `MatMultAdd()`, `MatMultTranspose()`, and `MatMultTransposeAdd()`
7667be906fSBarry Smith   operations are currently supported.
7767be906fSBarry Smith   If the installed version of MKL supports the "SpMV2" sparse
7867be906fSBarry Smith   inspector-executor routines, then those are used by default.
7967be906fSBarry Smith   Default PETSc kernels are used otherwise.
8067be906fSBarry Smith   For good matrix assembly performance the user should preallocate the matrix
8167be906fSBarry Smith   storage by setting the parameters `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
8267be906fSBarry Smith   By setting these parameters accurately, performance can be increased by more
8367be906fSBarry Smith   than a factor of 50.
8467be906fSBarry Smith 
857072be85SIrina Sokolova   If the *_nnz parameter is given then the *_nz parameter is ignored
867072be85SIrina Sokolova 
877072be85SIrina Sokolova   A nonzero block is any block that as 1 or more nonzeros in it
887072be85SIrina Sokolova 
897072be85SIrina Sokolova   The user MUST specify either the local or global matrix dimensions
907072be85SIrina Sokolova   (possibly both).
917072be85SIrina Sokolova 
9211a5261eSBarry Smith   If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one processor
937072be85SIrina Sokolova   than it must be used on all processors that share the object for that argument.
947072be85SIrina Sokolova 
957072be85SIrina Sokolova   Storage Information:
967072be85SIrina Sokolova   For a square global matrix we define each processor's diagonal portion
977072be85SIrina Sokolova   to be its local rows and the corresponding columns (a square submatrix);
987072be85SIrina Sokolova   each processor's off-diagonal portion encompasses the remainder of the
997072be85SIrina Sokolova   local matrix (a rectangular submatrix).
1007072be85SIrina Sokolova 
1017072be85SIrina Sokolova   The user can specify preallocated storage for the diagonal part of
10267be906fSBarry Smith   the local submatrix with either `d_nz` or `d_nnz` (not both).  Set
10367be906fSBarry Smith   `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
1047072be85SIrina Sokolova   memory allocation.  Likewise, specify preallocated storage for the
10567be906fSBarry Smith   off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).
1067072be85SIrina Sokolova 
1077072be85SIrina Sokolova   Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1087072be85SIrina Sokolova   the figure below we depict these three local rows and all columns (0-11).
1097072be85SIrina Sokolova 
1107072be85SIrina Sokolova .vb
1117072be85SIrina Sokolova            0 1 2 3 4 5 6 7 8 9 10 11
1127072be85SIrina Sokolova           --------------------------
1137072be85SIrina Sokolova    row 3  |o o o d d d o o o o  o  o
1147072be85SIrina Sokolova    row 4  |o o o d d d o o o o  o  o
1157072be85SIrina Sokolova    row 5  |o o o d d d o o o o  o  o
1167072be85SIrina Sokolova           --------------------------
1177072be85SIrina Sokolova .ve
1187072be85SIrina Sokolova 
1197072be85SIrina Sokolova   Thus, any entries in the d locations are stored in the d (diagonal)
1207072be85SIrina Sokolova   submatrix, and any entries in the o locations are stored in the
1217072be85SIrina Sokolova   o (off-diagonal) submatrix.  Note that the d and the o submatrices are
12211a5261eSBarry Smith   stored simply in the `MATSEQBAIJMKL` format for compressed row storage.
1237072be85SIrina Sokolova 
12467be906fSBarry Smith   Now `d_nz` should indicate the number of block nonzeros per row in the d matrix,
12567be906fSBarry Smith   and `o_nz` should indicate the number of block nonzeros per row in the o matrix.
1267072be85SIrina Sokolova   In general, for PDE problems in which most nonzeros are near the diagonal,
1272ef1f0ffSBarry Smith   one expects `d_nz` >> `o_nz`.
1287072be85SIrina Sokolova 
12942747ad1SJacob Faibussowitsch .seealso: [](ch_matrices), `Mat`, `MATBAIJMKL`, `MATBAIJ`, `MatCreate()`, `MatCreateSeqBAIJMKL()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`
1307072be85SIrina Sokolova @*/
MatCreateBAIJMKL(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat * A)131d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateBAIJMKL(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
132d71ae5a4SJacob Faibussowitsch {
1337072be85SIrina Sokolova   PetscMPIInt size;
1347072be85SIrina Sokolova 
1357072be85SIrina Sokolova   PetscFunctionBegin;
1369566063dSJacob Faibussowitsch   PetscCall(MatCreate(comm, A));
1379566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(*A, m, n, M, N));
1389566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
1397072be85SIrina Sokolova   if (size > 1) {
1409566063dSJacob Faibussowitsch     PetscCall(MatSetType(*A, MATMPIBAIJMKL));
1419566063dSJacob Faibussowitsch     PetscCall(MatMPIBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz));
1427072be85SIrina Sokolova   } else {
1439566063dSJacob Faibussowitsch     PetscCall(MatSetType(*A, MATSEQBAIJMKL));
1449566063dSJacob Faibussowitsch     PetscCall(MatSeqBAIJSetPreallocation(*A, bs, d_nz, d_nnz));
1457072be85SIrina Sokolova   }
1463ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1477072be85SIrina Sokolova }
1487072be85SIrina Sokolova 
MatCreate_MPIBAIJMKL(Mat A)149d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJMKL(Mat A)
150d71ae5a4SJacob Faibussowitsch {
1517072be85SIrina Sokolova   PetscFunctionBegin;
1529566063dSJacob Faibussowitsch   PetscCall(MatSetType(A, MATMPIBAIJ));
1539566063dSJacob Faibussowitsch   PetscCall(MatConvert_MPIBAIJ_MPIBAIJMKL(A, MATMPIBAIJMKL, MAT_INPLACE_MATRIX, &A));
1543ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1557072be85SIrina Sokolova }
1567072be85SIrina Sokolova 
1577072be85SIrina Sokolova /*MC
1587072be85SIrina Sokolova    MATBAIJMKL - MATBAIJMKL = "BAIJMKL" - A matrix type to be used for sparse matrices.
1597072be85SIrina Sokolova 
16011a5261eSBarry Smith    This matrix type is identical to `MATSEQBAIJMKL` when constructed with a single process communicator,
16111a5261eSBarry Smith    and `MATMPIBAIJMKL` otherwise.  As a result, for single process communicators,
16211a5261eSBarry Smith   `MatSeqBAIJSetPreallocation()` is supported, and similarly `MatMPIBAIJSetPreallocation()` is supported
1637072be85SIrina Sokolova   for communicators controlling multiple processes.  It is recommended that you call both of
1647072be85SIrina Sokolova   the above preallocation routines for simplicity.
1657072be85SIrina Sokolova 
1662ef1f0ffSBarry Smith    Options Database Key:
16711a5261eSBarry Smith . -mat_type baijmkl - sets the matrix type to `MATBAIJMKL` during a call to `MatSetFromOptions()`
1687072be85SIrina Sokolova 
1697072be85SIrina Sokolova   Level: beginner
1707072be85SIrina Sokolova 
1711cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatCreateBAIJMKL()`, `MATSEQBAIJMKL`, `MATMPIBAIJMKL`
1727072be85SIrina Sokolova M*/
173