1 #include <../src/mat/impls/baij/mpi/mpibaij.h> 2 3 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat, MatType, MatReuse, Mat *); 4 5 static PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJMKL(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz) { 6 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 7 8 PetscFunctionBegin; 9 PetscCall(MatMPIBAIJSetPreallocation_MPIBAIJ(B, bs, d_nz, d_nnz, o_nz, o_nnz)); 10 PetscCall(MatConvert_SeqBAIJ_SeqBAIJMKL(b->A, MATSEQBAIJMKL, MAT_INPLACE_MATRIX, &b->A)); 11 PetscCall(MatConvert_SeqBAIJ_SeqBAIJMKL(b->B, MATSEQBAIJMKL, MAT_INPLACE_MATRIX, &b->B)); 12 PetscFunctionReturn(0); 13 } 14 15 static PetscErrorCode MatConvert_MPIBAIJ_MPIBAIJMKL(Mat A, MatType type, MatReuse reuse, Mat *newmat) { 16 Mat B = *newmat; 17 18 PetscFunctionBegin; 19 if (reuse == MAT_INITIAL_MATRIX) { PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B)); } 20 21 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIBAIJMKL)); 22 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocation_C", MatMPIBAIJSetPreallocation_MPIBAIJMKL)); 23 *newmat = B; 24 PetscFunctionReturn(0); 25 } 26 27 /*@C 28 MatCreateBAIJMKL - Creates a sparse parallel matrix in block AIJ format 29 (block compressed row). 30 This type inherits from BAIJ and is largely identical, but uses sparse BLAS 31 routines from Intel MKL whenever possible. 32 MatMult, MatMultAdd, MatMultTranspose, and MatMultTransposeAdd 33 operations are currently supported. 34 If the installed version of MKL supports the "SpMV2" sparse 35 inspector-executor routines, then those are used by default. 36 Default PETSc kernels are used otherwise. 37 For good matrix assembly performance the user should preallocate the matrix 38 storage by setting the parameters d_nz (or d_nnz) and o_nz (or o_nnz). 39 By setting these parameters accurately, performance can be increased by more 40 than a factor of 50. 41 42 Collective 43 44 Input Parameters: 45 + comm - MPI communicator 46 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 47 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 48 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 49 This value should be the same as the local size used in creating the 50 y vector for the matrix-vector product y = Ax. 51 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 52 This value should be the same as the local size used in creating the 53 x vector for the matrix-vector product y = Ax. 54 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 55 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 56 . d_nz - number of nonzero blocks per block row in diagonal portion of local 57 submatrix (same for all local rows) 58 . d_nnz - array containing the number of nonzero blocks in the various block rows 59 of the in diagonal portion of the local (possibly different for each block 60 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry 61 and set it even if it is zero. 62 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 63 submatrix (same for all local rows). 64 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 65 off-diagonal portion of the local submatrix (possibly different for 66 each block row) or NULL. 67 68 Output Parameter: 69 . A - the matrix 70 71 Options Database Keys: 72 + -mat_block_size - size of the blocks to use 73 - -mat_use_hash_table <fact> - set hash table factor 74 75 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 76 MatXXXXSetPreallocation() paradigm instead of this routine directly. 77 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 78 79 Notes: 80 If the *_nnz parameter is given then the *_nz parameter is ignored 81 82 A nonzero block is any block that as 1 or more nonzeros in it 83 84 The user MUST specify either the local or global matrix dimensions 85 (possibly both). 86 87 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 88 than it must be used on all processors that share the object for that argument. 89 90 Storage Information: 91 For a square global matrix we define each processor's diagonal portion 92 to be its local rows and the corresponding columns (a square submatrix); 93 each processor's off-diagonal portion encompasses the remainder of the 94 local matrix (a rectangular submatrix). 95 96 The user can specify preallocated storage for the diagonal part of 97 the local submatrix with either d_nz or d_nnz (not both). Set 98 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 99 memory allocation. Likewise, specify preallocated storage for the 100 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 101 102 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 103 the figure below we depict these three local rows and all columns (0-11). 104 105 .vb 106 0 1 2 3 4 5 6 7 8 9 10 11 107 -------------------------- 108 row 3 |o o o d d d o o o o o o 109 row 4 |o o o d d d o o o o o o 110 row 5 |o o o d d d o o o o o o 111 -------------------------- 112 .ve 113 114 Thus, any entries in the d locations are stored in the d (diagonal) 115 submatrix, and any entries in the o locations are stored in the 116 o (off-diagonal) submatrix. Note that the d and the o submatrices are 117 stored simply in the MATSEQBAIJMKL format for compressed row storage. 118 119 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 120 and o_nz should indicate the number of block nonzeros per row in the o matrix. 121 In general, for PDE problems in which most nonzeros are near the diagonal, 122 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 123 or you will get TERRIBLE performance; see the users' manual chapter on 124 matrices. 125 126 Level: intermediate 127 128 .seealso: `MatCreate()`, `MatCreateSeqBAIJMKL()`, `MatSetValues()`, `MatCreateBAIJMKL()`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()` 129 @*/ 130 131 PetscErrorCode MatCreateBAIJMKL(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) { 132 PetscMPIInt size; 133 134 PetscFunctionBegin; 135 PetscCall(MatCreate(comm, A)); 136 PetscCall(MatSetSizes(*A, m, n, M, N)); 137 PetscCallMPI(MPI_Comm_size(comm, &size)); 138 if (size > 1) { 139 PetscCall(MatSetType(*A, MATMPIBAIJMKL)); 140 PetscCall(MatMPIBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz)); 141 } else { 142 PetscCall(MatSetType(*A, MATSEQBAIJMKL)); 143 PetscCall(MatSeqBAIJSetPreallocation(*A, bs, d_nz, d_nnz)); 144 } 145 PetscFunctionReturn(0); 146 } 147 148 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJMKL(Mat A) { 149 PetscFunctionBegin; 150 PetscCall(MatSetType(A, MATMPIBAIJ)); 151 PetscCall(MatConvert_MPIBAIJ_MPIBAIJMKL(A, MATMPIBAIJMKL, MAT_INPLACE_MATRIX, &A)); 152 PetscFunctionReturn(0); 153 } 154 155 /*MC 156 MATBAIJMKL - MATBAIJMKL = "BAIJMKL" - A matrix type to be used for sparse matrices. 157 158 This matrix type is identical to MATSEQBAIJMKL when constructed with a single process communicator, 159 and MATMPIBAIJMKL otherwise. As a result, for single process communicators, 160 MatSeqBAIJSetPreallocation() is supported, and similarly MatMPIBAIJSetPreallocation() is supported 161 for communicators controlling multiple processes. It is recommended that you call both of 162 the above preallocation routines for simplicity. 163 164 Options Database Keys: 165 . -mat_type baijmkl - sets the matrix type to "BAIJMKL" during a call to MatSetFromOptions() 166 167 Level: beginner 168 169 .seealso: `MatCreateBAIJMKL()`, `MATSEQBAIJMKL`, `MATMPIBAIJMKL` 170 M*/ 171