1 #include <../src/mat/impls/baij/mpi/mpibaij.h> 2 3 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat, MatType, MatReuse, Mat *); 4 5 static PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJMKL(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz) 6 { 7 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 8 9 PetscFunctionBegin; 10 PetscCall(MatMPIBAIJSetPreallocation_MPIBAIJ(B, bs, d_nz, d_nnz, o_nz, o_nnz)); 11 PetscCall(MatConvert_SeqBAIJ_SeqBAIJMKL(b->A, MATSEQBAIJMKL, MAT_INPLACE_MATRIX, &b->A)); 12 PetscCall(MatConvert_SeqBAIJ_SeqBAIJMKL(b->B, MATSEQBAIJMKL, MAT_INPLACE_MATRIX, &b->B)); 13 PetscFunctionReturn(0); 14 } 15 16 static PetscErrorCode MatConvert_MPIBAIJ_MPIBAIJMKL(Mat A, MatType type, MatReuse reuse, Mat *newmat) 17 { 18 Mat B = *newmat; 19 20 PetscFunctionBegin; 21 if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B)); 22 23 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIBAIJMKL)); 24 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocation_C", MatMPIBAIJSetPreallocation_MPIBAIJMKL)); 25 *newmat = B; 26 PetscFunctionReturn(0); 27 } 28 29 /*@C 30 MatCreateBAIJMKL - Creates a sparse parallel matrix in `MATBAIJMKL` format 31 (block compressed row). 32 This type inherits from `MATBAIJ` and is largely identical, but uses sparse BLAS 33 routines from Intel MKL whenever possible. 34 `MatMult()`, `MatMultAdd()`, `MatMultTranspose()`, and `MatMultTransposeAdd()` 35 operations are currently supported. 36 If the installed version of MKL supports the "SpMV2" sparse 37 inspector-executor routines, then those are used by default. 38 Default PETSc kernels are used otherwise. 39 For good matrix assembly performance the user should preallocate the matrix 40 storage by setting the parameters d_nz (or d_nnz) and o_nz (or o_nnz). 41 By setting these parameters accurately, performance can be increased by more 42 than a factor of 50. 43 44 Collective 45 46 Input Parameters: 47 + comm - MPI communicator 48 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 49 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 50 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 51 This value should be the same as the local size used in creating the 52 y vector for the matrix-vector product y = Ax. 53 . n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given) 54 This value should be the same as the local size used in creating the 55 x vector for the matrix-vector product y = Ax. 56 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 57 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 58 . d_nz - number of nonzero blocks per block row in diagonal portion of local 59 submatrix (same for all local rows) 60 . d_nnz - array containing the number of nonzero blocks in the various block rows 61 of the in diagonal portion of the local (possibly different for each block 62 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry 63 and set it even if it is zero. 64 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 65 submatrix (same for all local rows). 66 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 67 off-diagonal portion of the local submatrix (possibly different for 68 each block row) or NULL. 69 70 Output Parameter: 71 . A - the matrix 72 73 Options Database Keys: 74 + -mat_block_size - size of the blocks to use 75 - -mat_use_hash_table <fact> - set hash table factor 76 77 It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 78 MatXXXXSetPreallocation() paradigm instead of this routine directly. 79 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 80 81 Notes: 82 If the *_nnz parameter is given then the *_nz parameter is ignored 83 84 A nonzero block is any block that as 1 or more nonzeros in it 85 86 The user MUST specify either the local or global matrix dimensions 87 (possibly both). 88 89 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 90 than it must be used on all processors that share the object for that argument. 91 92 Storage Information: 93 For a square global matrix we define each processor's diagonal portion 94 to be its local rows and the corresponding columns (a square submatrix); 95 each processor's off-diagonal portion encompasses the remainder of the 96 local matrix (a rectangular submatrix). 97 98 The user can specify preallocated storage for the diagonal part of 99 the local submatrix with either d_nz or d_nnz (not both). Set 100 d_nz = `PETSC_DEFAULT` and d_nnz = NULL for PETSc to control dynamic 101 memory allocation. Likewise, specify preallocated storage for the 102 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 103 104 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 105 the figure below we depict these three local rows and all columns (0-11). 106 107 .vb 108 0 1 2 3 4 5 6 7 8 9 10 11 109 -------------------------- 110 row 3 |o o o d d d o o o o o o 111 row 4 |o o o d d d o o o o o o 112 row 5 |o o o d d d o o o o o o 113 -------------------------- 114 .ve 115 116 Thus, any entries in the d locations are stored in the d (diagonal) 117 submatrix, and any entries in the o locations are stored in the 118 o (off-diagonal) submatrix. Note that the d and the o submatrices are 119 stored simply in the `MATSEQBAIJMKL` format for compressed row storage. 120 121 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 122 and o_nz should indicate the number of block nonzeros per row in the o matrix. 123 In general, for PDE problems in which most nonzeros are near the diagonal, 124 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 125 or you will get TERRIBLE performance; see the users' manual chapter on 126 matrices. 127 128 Level: intermediate 129 130 .seealso: `MATBAIJMKL`, `MATBAIJ`, `MatCreate()`, `MatCreateSeqBAIJMKL()`, `MatSetValues()`, `MatCreateBAIJMKL()`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()` 131 @*/ 132 133 PetscErrorCode MatCreateBAIJMKL(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 134 { 135 PetscMPIInt size; 136 137 PetscFunctionBegin; 138 PetscCall(MatCreate(comm, A)); 139 PetscCall(MatSetSizes(*A, m, n, M, N)); 140 PetscCallMPI(MPI_Comm_size(comm, &size)); 141 if (size > 1) { 142 PetscCall(MatSetType(*A, MATMPIBAIJMKL)); 143 PetscCall(MatMPIBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz)); 144 } else { 145 PetscCall(MatSetType(*A, MATSEQBAIJMKL)); 146 PetscCall(MatSeqBAIJSetPreallocation(*A, bs, d_nz, d_nnz)); 147 } 148 PetscFunctionReturn(0); 149 } 150 151 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJMKL(Mat A) 152 { 153 PetscFunctionBegin; 154 PetscCall(MatSetType(A, MATMPIBAIJ)); 155 PetscCall(MatConvert_MPIBAIJ_MPIBAIJMKL(A, MATMPIBAIJMKL, MAT_INPLACE_MATRIX, &A)); 156 PetscFunctionReturn(0); 157 } 158 159 /*MC 160 MATBAIJMKL - MATBAIJMKL = "BAIJMKL" - A matrix type to be used for sparse matrices. 161 162 This matrix type is identical to `MATSEQBAIJMKL` when constructed with a single process communicator, 163 and `MATMPIBAIJMKL` otherwise. As a result, for single process communicators, 164 `MatSeqBAIJSetPreallocation()` is supported, and similarly `MatMPIBAIJSetPreallocation()` is supported 165 for communicators controlling multiple processes. It is recommended that you call both of 166 the above preallocation routines for simplicity. 167 168 Options Database Keys: 169 . -mat_type baijmkl - sets the matrix type to `MATBAIJMKL` during a call to `MatSetFromOptions()` 170 171 Level: beginner 172 173 .seealso: `MatCreateBAIJMKL()`, `MATSEQBAIJMKL`, `MATMPIBAIJMKL` 174 M*/ 175