17072be85SIrina Sokolova #include <../src/mat/impls/baij/mpi/mpibaij.h> 27072be85SIrina Sokolova 37072be85SIrina Sokolova PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat, MatType, MatReuse, Mat *); 47072be85SIrina Sokolova 5d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJMKL(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz) 6d71ae5a4SJacob Faibussowitsch { 77072be85SIrina Sokolova Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 87072be85SIrina Sokolova 97072be85SIrina Sokolova PetscFunctionBegin; 109566063dSJacob Faibussowitsch PetscCall(MatMPIBAIJSetPreallocation_MPIBAIJ(B, bs, d_nz, d_nnz, o_nz, o_nnz)); 119566063dSJacob Faibussowitsch PetscCall(MatConvert_SeqBAIJ_SeqBAIJMKL(b->A, MATSEQBAIJMKL, MAT_INPLACE_MATRIX, &b->A)); 129566063dSJacob Faibussowitsch PetscCall(MatConvert_SeqBAIJ_SeqBAIJMKL(b->B, MATSEQBAIJMKL, MAT_INPLACE_MATRIX, &b->B)); 137072be85SIrina Sokolova PetscFunctionReturn(0); 147072be85SIrina Sokolova } 157072be85SIrina Sokolova 16d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatConvert_MPIBAIJ_MPIBAIJMKL(Mat A, MatType type, MatReuse reuse, Mat *newmat) 17d71ae5a4SJacob Faibussowitsch { 187072be85SIrina Sokolova Mat B = *newmat; 197072be85SIrina Sokolova 207072be85SIrina Sokolova PetscFunctionBegin; 2148a46eb9SPierre Jolivet if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B)); 227072be85SIrina Sokolova 239566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIBAIJMKL)); 249566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocation_C", MatMPIBAIJSetPreallocation_MPIBAIJMKL)); 257072be85SIrina Sokolova *newmat = B; 267072be85SIrina Sokolova PetscFunctionReturn(0); 277072be85SIrina Sokolova } 28b9e7e5c1SBarry Smith 297072be85SIrina Sokolova /*@C 30*67be906fSBarry Smith MatCreateBAIJMKL - Creates a sparse parallel matrix in `MATBAIJMKL` format (block compressed row). 317072be85SIrina Sokolova 32d083f849SBarry Smith Collective 337072be85SIrina Sokolova 347072be85SIrina Sokolova Input Parameters: 357072be85SIrina Sokolova + comm - MPI communicator 3611a5261eSBarry Smith . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 3711a5261eSBarry Smith blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 3811a5261eSBarry Smith . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 397072be85SIrina Sokolova This value should be the same as the local size used in creating the 407072be85SIrina Sokolova y vector for the matrix-vector product y = Ax. 4111a5261eSBarry Smith . n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given) 427072be85SIrina Sokolova This value should be the same as the local size used in creating the 437072be85SIrina Sokolova x vector for the matrix-vector product y = Ax. 4411a5261eSBarry Smith . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4511a5261eSBarry Smith . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 467072be85SIrina Sokolova . d_nz - number of nonzero blocks per block row in diagonal portion of local 477072be85SIrina Sokolova submatrix (same for all local rows) 487072be85SIrina Sokolova . d_nnz - array containing the number of nonzero blocks in the various block rows 497072be85SIrina Sokolova of the in diagonal portion of the local (possibly different for each block 507072be85SIrina Sokolova row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry 517072be85SIrina Sokolova and set it even if it is zero. 527072be85SIrina Sokolova . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 537072be85SIrina Sokolova submatrix (same for all local rows). 547072be85SIrina Sokolova - o_nnz - array containing the number of nonzero blocks in the various block rows of the 557072be85SIrina Sokolova off-diagonal portion of the local submatrix (possibly different for 567072be85SIrina Sokolova each block row) or NULL. 577072be85SIrina Sokolova 587072be85SIrina Sokolova Output Parameter: 597072be85SIrina Sokolova . A - the matrix 607072be85SIrina Sokolova 617072be85SIrina Sokolova Options Database Keys: 627072be85SIrina Sokolova + -mat_block_size - size of the blocks to use 6367b8a455SSatish Balay - -mat_use_hash_table <fact> - set hash table factor 647072be85SIrina Sokolova 6511a5261eSBarry Smith It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 66f6f02116SRichard Tran Mills MatXXXXSetPreallocation() paradigm instead of this routine directly. 6711a5261eSBarry Smith [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 687072be85SIrina Sokolova 697072be85SIrina Sokolova Notes: 70*67be906fSBarry Smith This type inherits from `MATBAIJ` and is largely identical, but uses sparse BLAS 71*67be906fSBarry Smith routines from Intel MKL whenever possible. 72*67be906fSBarry Smith `MatMult()`, `MatMultAdd()`, `MatMultTranspose()`, and `MatMultTransposeAdd()` 73*67be906fSBarry Smith operations are currently supported. 74*67be906fSBarry Smith If the installed version of MKL supports the "SpMV2" sparse 75*67be906fSBarry Smith inspector-executor routines, then those are used by default. 76*67be906fSBarry Smith Default PETSc kernels are used otherwise. 77*67be906fSBarry Smith For good matrix assembly performance the user should preallocate the matrix 78*67be906fSBarry Smith storage by setting the parameters `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 79*67be906fSBarry Smith By setting these parameters accurately, performance can be increased by more 80*67be906fSBarry Smith than a factor of 50. 81*67be906fSBarry Smith 827072be85SIrina Sokolova If the *_nnz parameter is given then the *_nz parameter is ignored 837072be85SIrina Sokolova 847072be85SIrina Sokolova A nonzero block is any block that as 1 or more nonzeros in it 857072be85SIrina Sokolova 867072be85SIrina Sokolova The user MUST specify either the local or global matrix dimensions 877072be85SIrina Sokolova (possibly both). 887072be85SIrina Sokolova 8911a5261eSBarry Smith If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 907072be85SIrina Sokolova than it must be used on all processors that share the object for that argument. 917072be85SIrina Sokolova 927072be85SIrina Sokolova Storage Information: 937072be85SIrina Sokolova For a square global matrix we define each processor's diagonal portion 947072be85SIrina Sokolova to be its local rows and the corresponding columns (a square submatrix); 957072be85SIrina Sokolova each processor's off-diagonal portion encompasses the remainder of the 967072be85SIrina Sokolova local matrix (a rectangular submatrix). 977072be85SIrina Sokolova 987072be85SIrina Sokolova The user can specify preallocated storage for the diagonal part of 99*67be906fSBarry Smith the local submatrix with either `d_nz` or `d_nnz` (not both). Set 100*67be906fSBarry Smith `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic 1017072be85SIrina Sokolova memory allocation. Likewise, specify preallocated storage for the 102*67be906fSBarry Smith off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both). 1037072be85SIrina Sokolova 1047072be85SIrina Sokolova Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1057072be85SIrina Sokolova the figure below we depict these three local rows and all columns (0-11). 1067072be85SIrina Sokolova 1077072be85SIrina Sokolova .vb 1087072be85SIrina Sokolova 0 1 2 3 4 5 6 7 8 9 10 11 1097072be85SIrina Sokolova -------------------------- 1107072be85SIrina Sokolova row 3 |o o o d d d o o o o o o 1117072be85SIrina Sokolova row 4 |o o o d d d o o o o o o 1127072be85SIrina Sokolova row 5 |o o o d d d o o o o o o 1137072be85SIrina Sokolova -------------------------- 1147072be85SIrina Sokolova .ve 1157072be85SIrina Sokolova 1167072be85SIrina Sokolova Thus, any entries in the d locations are stored in the d (diagonal) 1177072be85SIrina Sokolova submatrix, and any entries in the o locations are stored in the 1187072be85SIrina Sokolova o (off-diagonal) submatrix. Note that the d and the o submatrices are 11911a5261eSBarry Smith stored simply in the `MATSEQBAIJMKL` format for compressed row storage. 1207072be85SIrina Sokolova 121*67be906fSBarry Smith Now `d_nz` should indicate the number of block nonzeros per row in the d matrix, 122*67be906fSBarry Smith and `o_nz` should indicate the number of block nonzeros per row in the o matrix. 1237072be85SIrina Sokolova In general, for PDE problems in which most nonzeros are near the diagonal, 124*67be906fSBarry Smith one expects `d_nz` >> `o_nz`. For large problems you MUST preallocate memory 1257072be85SIrina Sokolova or you will get TERRIBLE performance; see the users' manual chapter on 1267072be85SIrina Sokolova matrices. 1277072be85SIrina Sokolova 1287072be85SIrina Sokolova Level: intermediate 1297072be85SIrina Sokolova 13011a5261eSBarry Smith .seealso: `MATBAIJMKL`, `MATBAIJ`, `MatCreate()`, `MatCreateSeqBAIJMKL()`, `MatSetValues()`, `MatCreateBAIJMKL()`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()` 1317072be85SIrina Sokolova @*/ 1327072be85SIrina Sokolova 133d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateBAIJMKL(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 134d71ae5a4SJacob Faibussowitsch { 1357072be85SIrina Sokolova PetscMPIInt size; 1367072be85SIrina Sokolova 1377072be85SIrina Sokolova PetscFunctionBegin; 1389566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, A)); 1399566063dSJacob Faibussowitsch PetscCall(MatSetSizes(*A, m, n, M, N)); 1409566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm, &size)); 1417072be85SIrina Sokolova if (size > 1) { 1429566063dSJacob Faibussowitsch PetscCall(MatSetType(*A, MATMPIBAIJMKL)); 1439566063dSJacob Faibussowitsch PetscCall(MatMPIBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz)); 1447072be85SIrina Sokolova } else { 1459566063dSJacob Faibussowitsch PetscCall(MatSetType(*A, MATSEQBAIJMKL)); 1469566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJSetPreallocation(*A, bs, d_nz, d_nnz)); 1477072be85SIrina Sokolova } 1487072be85SIrina Sokolova PetscFunctionReturn(0); 1497072be85SIrina Sokolova } 1507072be85SIrina Sokolova 151d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJMKL(Mat A) 152d71ae5a4SJacob Faibussowitsch { 1537072be85SIrina Sokolova PetscFunctionBegin; 1549566063dSJacob Faibussowitsch PetscCall(MatSetType(A, MATMPIBAIJ)); 1559566063dSJacob Faibussowitsch PetscCall(MatConvert_MPIBAIJ_MPIBAIJMKL(A, MATMPIBAIJMKL, MAT_INPLACE_MATRIX, &A)); 1567072be85SIrina Sokolova PetscFunctionReturn(0); 1577072be85SIrina Sokolova } 1587072be85SIrina Sokolova 1597072be85SIrina Sokolova /*MC 1607072be85SIrina Sokolova MATBAIJMKL - MATBAIJMKL = "BAIJMKL" - A matrix type to be used for sparse matrices. 1617072be85SIrina Sokolova 16211a5261eSBarry Smith This matrix type is identical to `MATSEQBAIJMKL` when constructed with a single process communicator, 16311a5261eSBarry Smith and `MATMPIBAIJMKL` otherwise. As a result, for single process communicators, 16411a5261eSBarry Smith `MatSeqBAIJSetPreallocation()` is supported, and similarly `MatMPIBAIJSetPreallocation()` is supported 1657072be85SIrina Sokolova for communicators controlling multiple processes. It is recommended that you call both of 1667072be85SIrina Sokolova the above preallocation routines for simplicity. 1677072be85SIrina Sokolova 1687072be85SIrina Sokolova Options Database Keys: 16911a5261eSBarry Smith . -mat_type baijmkl - sets the matrix type to `MATBAIJMKL` during a call to `MatSetFromOptions()` 1707072be85SIrina Sokolova 1717072be85SIrina Sokolova Level: beginner 1727072be85SIrina Sokolova 173db781477SPatrick Sanan .seealso: `MatCreateBAIJMKL()`, `MATSEQBAIJMKL`, `MATMPIBAIJMKL` 1747072be85SIrina Sokolova M*/ 175