1 #include <../src/mat/impls/baij/mpi/mpibaij.h> 2 3 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat, MatType, MatReuse, Mat *); 4 5 static PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJMKL(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz) 6 { 7 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 8 9 PetscFunctionBegin; 10 PetscCall(MatMPIBAIJSetPreallocation_MPIBAIJ(B, bs, d_nz, d_nnz, o_nz, o_nnz)); 11 PetscCall(MatConvert_SeqBAIJ_SeqBAIJMKL(b->A, MATSEQBAIJMKL, MAT_INPLACE_MATRIX, &b->A)); 12 PetscCall(MatConvert_SeqBAIJ_SeqBAIJMKL(b->B, MATSEQBAIJMKL, MAT_INPLACE_MATRIX, &b->B)); 13 PetscFunctionReturn(PETSC_SUCCESS); 14 } 15 16 static PetscErrorCode MatConvert_MPIBAIJ_MPIBAIJMKL(Mat A, MatType type, MatReuse reuse, Mat *newmat) 17 { 18 Mat B = *newmat; 19 20 PetscFunctionBegin; 21 if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B)); 22 23 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIBAIJMKL)); 24 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocation_C", MatMPIBAIJSetPreallocation_MPIBAIJMKL)); 25 *newmat = B; 26 PetscFunctionReturn(PETSC_SUCCESS); 27 } 28 29 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 30 /*@C 31 MatCreateBAIJMKL - Creates a sparse parallel matrix in `MATBAIJMKL` format (block compressed row). 32 33 Collective 34 35 Input Parameters: 36 + comm - MPI communicator 37 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 38 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 39 . m - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given) 40 This value should be the same as the local size used in creating the 41 y vector for the matrix-vector product y = Ax. 42 . n - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given) 43 This value should be the same as the local size used in creating the 44 x vector for the matrix-vector product y = Ax. 45 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 46 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 47 . d_nz - number of nonzero blocks per block row in diagonal portion of local 48 submatrix (same for all local rows) 49 . d_nnz - array containing the number of nonzero blocks in the various block rows 50 of the in diagonal portion of the local (possibly different for each block 51 row) or `NULL`. If you plan to factor the matrix you must leave room for the diagonal entry 52 and set it even if it is zero. 53 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 54 submatrix (same for all local rows). 55 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 56 off-diagonal portion of the local submatrix (possibly different for 57 each block row) or `NULL`. 58 59 Output Parameter: 60 . A - the matrix 61 62 Options Database Keys: 63 + -mat_block_size - size of the blocks to use 64 - -mat_use_hash_table <fact> - set hash table factor 65 66 Level: intermediate 67 68 Notes: 69 It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 70 MatXXXXSetPreallocation() paradigm instead of this routine directly. 71 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 72 73 This type inherits from `MATBAIJ` and is largely identical, but uses sparse BLAS 74 routines from Intel MKL whenever possible. 75 `MatMult()`, `MatMultAdd()`, `MatMultTranspose()`, and `MatMultTransposeAdd()` 76 operations are currently supported. 77 If the installed version of MKL supports the "SpMV2" sparse 78 inspector-executor routines, then those are used by default. 79 Default PETSc kernels are used otherwise. 80 For good matrix assembly performance the user should preallocate the matrix 81 storage by setting the parameters `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 82 By setting these parameters accurately, performance can be increased by more 83 than a factor of 50. 84 85 If the *_nnz parameter is given then the *_nz parameter is ignored 86 87 A nonzero block is any block that as 1 or more nonzeros in it 88 89 The user MUST specify either the local or global matrix dimensions 90 (possibly both). 91 92 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 93 than it must be used on all processors that share the object for that argument. 94 95 Storage Information: 96 For a square global matrix we define each processor's diagonal portion 97 to be its local rows and the corresponding columns (a square submatrix); 98 each processor's off-diagonal portion encompasses the remainder of the 99 local matrix (a rectangular submatrix). 100 101 The user can specify preallocated storage for the diagonal part of 102 the local submatrix with either `d_nz` or `d_nnz` (not both). Set 103 `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic 104 memory allocation. Likewise, specify preallocated storage for the 105 off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both). 106 107 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 108 the figure below we depict these three local rows and all columns (0-11). 109 110 .vb 111 0 1 2 3 4 5 6 7 8 9 10 11 112 -------------------------- 113 row 3 |o o o d d d o o o o o o 114 row 4 |o o o d d d o o o o o o 115 row 5 |o o o d d d o o o o o o 116 -------------------------- 117 .ve 118 119 Thus, any entries in the d locations are stored in the d (diagonal) 120 submatrix, and any entries in the o locations are stored in the 121 o (off-diagonal) submatrix. Note that the d and the o submatrices are 122 stored simply in the `MATSEQBAIJMKL` format for compressed row storage. 123 124 Now `d_nz` should indicate the number of block nonzeros per row in the d matrix, 125 and `o_nz` should indicate the number of block nonzeros per row in the o matrix. 126 In general, for PDE problems in which most nonzeros are near the diagonal, 127 one expects `d_nz` >> `o_nz`. 128 129 .seealso: [](ch_matrices), `Mat`, `MATBAIJMKL`, `MATBAIJ`, `MatCreate()`, `MatCreateSeqBAIJMKL()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()` 130 @*/ 131 PetscErrorCode MatCreateBAIJMKL(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 132 { 133 PetscMPIInt size; 134 135 PetscFunctionBegin; 136 PetscCall(MatCreate(comm, A)); 137 PetscCall(MatSetSizes(*A, m, n, M, N)); 138 PetscCallMPI(MPI_Comm_size(comm, &size)); 139 if (size > 1) { 140 PetscCall(MatSetType(*A, MATMPIBAIJMKL)); 141 PetscCall(MatMPIBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz)); 142 } else { 143 PetscCall(MatSetType(*A, MATSEQBAIJMKL)); 144 PetscCall(MatSeqBAIJSetPreallocation(*A, bs, d_nz, d_nnz)); 145 } 146 PetscFunctionReturn(PETSC_SUCCESS); 147 } 148 149 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJMKL(Mat A) 150 { 151 PetscFunctionBegin; 152 PetscCall(MatSetType(A, MATMPIBAIJ)); 153 PetscCall(MatConvert_MPIBAIJ_MPIBAIJMKL(A, MATMPIBAIJMKL, MAT_INPLACE_MATRIX, &A)); 154 PetscFunctionReturn(PETSC_SUCCESS); 155 } 156 157 /*MC 158 MATBAIJMKL - MATBAIJMKL = "BAIJMKL" - A matrix type to be used for sparse matrices. 159 160 This matrix type is identical to `MATSEQBAIJMKL` when constructed with a single process communicator, 161 and `MATMPIBAIJMKL` otherwise. As a result, for single process communicators, 162 `MatSeqBAIJSetPreallocation()` is supported, and similarly `MatMPIBAIJSetPreallocation()` is supported 163 for communicators controlling multiple processes. It is recommended that you call both of 164 the above preallocation routines for simplicity. 165 166 Options Database Key: 167 . -mat_type baijmkl - sets the matrix type to `MATBAIJMKL` during a call to `MatSetFromOptions()` 168 169 Level: beginner 170 171 .seealso: [](ch_matrices), `Mat`, `MatCreateBAIJMKL()`, `MATSEQBAIJMKL`, `MATMPIBAIJMKL` 172 M*/ 173