xref: /petsc/src/mat/impls/baij/mpi/baijmkl/mpibaijmkl.c (revision d71ae5a4db6382e7f06317b8d368875286fe9008)
17072be85SIrina Sokolova #include <../src/mat/impls/baij/mpi/mpibaij.h>
27072be85SIrina Sokolova 
37072be85SIrina Sokolova PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat, MatType, MatReuse, Mat *);
47072be85SIrina Sokolova 
5*d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJMKL(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz)
6*d71ae5a4SJacob Faibussowitsch {
77072be85SIrina Sokolova   Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data;
87072be85SIrina Sokolova 
97072be85SIrina Sokolova   PetscFunctionBegin;
109566063dSJacob Faibussowitsch   PetscCall(MatMPIBAIJSetPreallocation_MPIBAIJ(B, bs, d_nz, d_nnz, o_nz, o_nnz));
119566063dSJacob Faibussowitsch   PetscCall(MatConvert_SeqBAIJ_SeqBAIJMKL(b->A, MATSEQBAIJMKL, MAT_INPLACE_MATRIX, &b->A));
129566063dSJacob Faibussowitsch   PetscCall(MatConvert_SeqBAIJ_SeqBAIJMKL(b->B, MATSEQBAIJMKL, MAT_INPLACE_MATRIX, &b->B));
137072be85SIrina Sokolova   PetscFunctionReturn(0);
147072be85SIrina Sokolova }
157072be85SIrina Sokolova 
16*d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatConvert_MPIBAIJ_MPIBAIJMKL(Mat A, MatType type, MatReuse reuse, Mat *newmat)
17*d71ae5a4SJacob Faibussowitsch {
187072be85SIrina Sokolova   Mat B = *newmat;
197072be85SIrina Sokolova 
207072be85SIrina Sokolova   PetscFunctionBegin;
2148a46eb9SPierre Jolivet   if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B));
227072be85SIrina Sokolova 
239566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIBAIJMKL));
249566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocation_C", MatMPIBAIJSetPreallocation_MPIBAIJMKL));
257072be85SIrina Sokolova   *newmat = B;
267072be85SIrina Sokolova   PetscFunctionReturn(0);
277072be85SIrina Sokolova }
28b9e7e5c1SBarry Smith 
297072be85SIrina Sokolova /*@C
3011a5261eSBarry Smith    MatCreateBAIJMKL - Creates a sparse parallel matrix in `MATBAIJMKL` format
317072be85SIrina Sokolova    (block compressed row).
3211a5261eSBarry Smith    This type inherits from `MATBAIJ` and is largely identical, but uses sparse BLAS
337072be85SIrina Sokolova    routines from Intel MKL whenever possible.
3411a5261eSBarry Smith    `MatMult()`, `MatMultAdd()`, `MatMultTranspose()`, and `MatMultTransposeAdd()`
357072be85SIrina Sokolova    operations are currently supported.
367072be85SIrina Sokolova    If the installed version of MKL supports the "SpMV2" sparse
377072be85SIrina Sokolova    inspector-executor routines, then those are used by default.
387072be85SIrina Sokolova    Default PETSc kernels are used otherwise.
397072be85SIrina Sokolova    For good matrix assembly performance the user should preallocate the matrix
407072be85SIrina Sokolova    storage by setting the parameters d_nz (or d_nnz) and o_nz (or o_nnz).
417072be85SIrina Sokolova    By setting these parameters accurately, performance can be increased by more
427072be85SIrina Sokolova    than a factor of 50.
437072be85SIrina Sokolova 
44d083f849SBarry Smith    Collective
457072be85SIrina Sokolova 
467072be85SIrina Sokolova    Input Parameters:
477072be85SIrina Sokolova +  comm - MPI communicator
4811a5261eSBarry Smith .  bs   - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
4911a5261eSBarry Smith           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
5011a5261eSBarry Smith .  m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
517072be85SIrina Sokolova            This value should be the same as the local size used in creating the
527072be85SIrina Sokolova            y vector for the matrix-vector product y = Ax.
5311a5261eSBarry Smith .  n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given)
547072be85SIrina Sokolova            This value should be the same as the local size used in creating the
557072be85SIrina Sokolova            x vector for the matrix-vector product y = Ax.
5611a5261eSBarry Smith .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
5711a5261eSBarry Smith .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
587072be85SIrina Sokolova .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
597072be85SIrina Sokolova            submatrix  (same for all local rows)
607072be85SIrina Sokolova .  d_nnz - array containing the number of nonzero blocks in the various block rows
617072be85SIrina Sokolova            of the in diagonal portion of the local (possibly different for each block
627072be85SIrina Sokolova            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
637072be85SIrina Sokolova            and set it even if it is zero.
647072be85SIrina Sokolova .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
657072be85SIrina Sokolova            submatrix (same for all local rows).
667072be85SIrina Sokolova -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
677072be85SIrina Sokolova            off-diagonal portion of the local submatrix (possibly different for
687072be85SIrina Sokolova            each block row) or NULL.
697072be85SIrina Sokolova 
707072be85SIrina Sokolova    Output Parameter:
717072be85SIrina Sokolova .  A - the matrix
727072be85SIrina Sokolova 
737072be85SIrina Sokolova    Options Database Keys:
747072be85SIrina Sokolova +   -mat_block_size - size of the blocks to use
7567b8a455SSatish Balay -   -mat_use_hash_table <fact> - set hash table factor
767072be85SIrina Sokolova 
7711a5261eSBarry Smith    It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
78f6f02116SRichard Tran Mills    MatXXXXSetPreallocation() paradigm instead of this routine directly.
7911a5261eSBarry Smith    [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
807072be85SIrina Sokolova 
817072be85SIrina Sokolova    Notes:
827072be85SIrina Sokolova    If the *_nnz parameter is given then the *_nz parameter is ignored
837072be85SIrina Sokolova 
847072be85SIrina Sokolova    A nonzero block is any block that as 1 or more nonzeros in it
857072be85SIrina Sokolova 
867072be85SIrina Sokolova    The user MUST specify either the local or global matrix dimensions
877072be85SIrina Sokolova    (possibly both).
887072be85SIrina Sokolova 
8911a5261eSBarry Smith    If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one processor
907072be85SIrina Sokolova    than it must be used on all processors that share the object for that argument.
917072be85SIrina Sokolova 
927072be85SIrina Sokolova    Storage Information:
937072be85SIrina Sokolova    For a square global matrix we define each processor's diagonal portion
947072be85SIrina Sokolova    to be its local rows and the corresponding columns (a square submatrix);
957072be85SIrina Sokolova    each processor's off-diagonal portion encompasses the remainder of the
967072be85SIrina Sokolova    local matrix (a rectangular submatrix).
977072be85SIrina Sokolova 
987072be85SIrina Sokolova    The user can specify preallocated storage for the diagonal part of
997072be85SIrina Sokolova    the local submatrix with either d_nz or d_nnz (not both).  Set
10011a5261eSBarry Smith    d_nz = `PETSC_DEFAULT` and d_nnz = NULL for PETSc to control dynamic
1017072be85SIrina Sokolova    memory allocation.  Likewise, specify preallocated storage for the
1027072be85SIrina Sokolova    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1037072be85SIrina Sokolova 
1047072be85SIrina Sokolova    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1057072be85SIrina Sokolova    the figure below we depict these three local rows and all columns (0-11).
1067072be85SIrina Sokolova 
1077072be85SIrina Sokolova .vb
1087072be85SIrina Sokolova            0 1 2 3 4 5 6 7 8 9 10 11
1097072be85SIrina Sokolova           --------------------------
1107072be85SIrina Sokolova    row 3  |o o o d d d o o o o  o  o
1117072be85SIrina Sokolova    row 4  |o o o d d d o o o o  o  o
1127072be85SIrina Sokolova    row 5  |o o o d d d o o o o  o  o
1137072be85SIrina Sokolova           --------------------------
1147072be85SIrina Sokolova .ve
1157072be85SIrina Sokolova 
1167072be85SIrina Sokolova    Thus, any entries in the d locations are stored in the d (diagonal)
1177072be85SIrina Sokolova    submatrix, and any entries in the o locations are stored in the
1187072be85SIrina Sokolova    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
11911a5261eSBarry Smith    stored simply in the `MATSEQBAIJMKL` format for compressed row storage.
1207072be85SIrina Sokolova 
1217072be85SIrina Sokolova    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
1227072be85SIrina Sokolova    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1237072be85SIrina Sokolova    In general, for PDE problems in which most nonzeros are near the diagonal,
1247072be85SIrina Sokolova    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1257072be85SIrina Sokolova    or you will get TERRIBLE performance; see the users' manual chapter on
1267072be85SIrina Sokolova    matrices.
1277072be85SIrina Sokolova 
1287072be85SIrina Sokolova    Level: intermediate
1297072be85SIrina Sokolova 
13011a5261eSBarry Smith .seealso: `MATBAIJMKL`, `MATBAIJ`, `MatCreate()`, `MatCreateSeqBAIJMKL()`, `MatSetValues()`, `MatCreateBAIJMKL()`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`
1317072be85SIrina Sokolova @*/
1327072be85SIrina Sokolova 
133*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateBAIJMKL(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
134*d71ae5a4SJacob Faibussowitsch {
1357072be85SIrina Sokolova   PetscMPIInt size;
1367072be85SIrina Sokolova 
1377072be85SIrina Sokolova   PetscFunctionBegin;
1389566063dSJacob Faibussowitsch   PetscCall(MatCreate(comm, A));
1399566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(*A, m, n, M, N));
1409566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
1417072be85SIrina Sokolova   if (size > 1) {
1429566063dSJacob Faibussowitsch     PetscCall(MatSetType(*A, MATMPIBAIJMKL));
1439566063dSJacob Faibussowitsch     PetscCall(MatMPIBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz));
1447072be85SIrina Sokolova   } else {
1459566063dSJacob Faibussowitsch     PetscCall(MatSetType(*A, MATSEQBAIJMKL));
1469566063dSJacob Faibussowitsch     PetscCall(MatSeqBAIJSetPreallocation(*A, bs, d_nz, d_nnz));
1477072be85SIrina Sokolova   }
1487072be85SIrina Sokolova   PetscFunctionReturn(0);
1497072be85SIrina Sokolova }
1507072be85SIrina Sokolova 
151*d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJMKL(Mat A)
152*d71ae5a4SJacob Faibussowitsch {
1537072be85SIrina Sokolova   PetscFunctionBegin;
1549566063dSJacob Faibussowitsch   PetscCall(MatSetType(A, MATMPIBAIJ));
1559566063dSJacob Faibussowitsch   PetscCall(MatConvert_MPIBAIJ_MPIBAIJMKL(A, MATMPIBAIJMKL, MAT_INPLACE_MATRIX, &A));
1567072be85SIrina Sokolova   PetscFunctionReturn(0);
1577072be85SIrina Sokolova }
1587072be85SIrina Sokolova 
1597072be85SIrina Sokolova /*MC
1607072be85SIrina Sokolova    MATBAIJMKL - MATBAIJMKL = "BAIJMKL" - A matrix type to be used for sparse matrices.
1617072be85SIrina Sokolova 
16211a5261eSBarry Smith    This matrix type is identical to `MATSEQBAIJMKL` when constructed with a single process communicator,
16311a5261eSBarry Smith    and `MATMPIBAIJMKL` otherwise.  As a result, for single process communicators,
16411a5261eSBarry Smith   `MatSeqBAIJSetPreallocation()` is supported, and similarly `MatMPIBAIJSetPreallocation()` is supported
1657072be85SIrina Sokolova   for communicators controlling multiple processes.  It is recommended that you call both of
1667072be85SIrina Sokolova   the above preallocation routines for simplicity.
1677072be85SIrina Sokolova 
1687072be85SIrina Sokolova    Options Database Keys:
16911a5261eSBarry Smith . -mat_type baijmkl - sets the matrix type to `MATBAIJMKL` during a call to `MatSetFromOptions()`
1707072be85SIrina Sokolova 
1717072be85SIrina Sokolova   Level: beginner
1727072be85SIrina Sokolova 
173db781477SPatrick Sanan .seealso: `MatCreateBAIJMKL()`, `MATSEQBAIJMKL`, `MATMPIBAIJMKL`
1747072be85SIrina Sokolova M*/
175