1 #include <../src/mat/impls/baij/mpi/mpibaij.h> 2 3 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 4 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat,MatType,MatReuse,Mat*); 5 6 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJMKL(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz) 7 //(Mat B, PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 8 { 9 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 10 PetscErrorCode ierr; 11 12 PetscFunctionBegin; 13 ierr = MatMPIBAIJSetPreallocation_MPIBAIJ(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 14 ierr = MatConvert_SeqBAIJ_SeqBAIJMKL(b->A, MATSEQBAIJMKL, MAT_INPLACE_MATRIX, &b->A);CHKERRQ(ierr); 15 ierr = MatConvert_SeqBAIJ_SeqBAIJMKL(b->B, MATSEQBAIJMKL, MAT_INPLACE_MATRIX, &b->B);CHKERRQ(ierr); 16 PetscFunctionReturn(0); 17 } 18 19 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 20 { 21 PetscErrorCode ierr; 22 Mat B = *newmat; 23 24 PetscFunctionBegin; 25 if (reuse == MAT_INITIAL_MATRIX) { 26 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 27 } 28 29 ierr = PetscObjectChangeTypeName((PetscObject) B, MATMPIBAIJMKL);CHKERRQ(ierr); 30 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJMKL);CHKERRQ(ierr); 31 *newmat = B; 32 PetscFunctionReturn(0); 33 } 34 #endif 35 /*@C 36 MatCreateBAIJMKL - Creates a sparse parallel matrix in block AIJ format 37 (block compressed row). 38 This type inherits from BAIJ and is largely identical, but uses sparse BLAS 39 routines from Intel MKL whenever possible. 40 MatMult, MatMultAdd, MatMultTranspose, and MatMultTransposeAdd 41 operations are currently supported. 42 If the installed version of MKL supports the "SpMV2" sparse 43 inspector-executor routines, then those are used by default. 44 Default PETSc kernels are used otherwise. 45 For good matrix assembly performance the user should preallocate the matrix 46 storage by setting the parameters d_nz (or d_nnz) and o_nz (or o_nnz). 47 By setting these parameters accurately, performance can be increased by more 48 than a factor of 50. 49 50 Collective on MPI_Comm 51 52 Input Parameters: 53 + comm - MPI communicator 54 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 55 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 56 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 57 This value should be the same as the local size used in creating the 58 y vector for the matrix-vector product y = Ax. 59 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 60 This value should be the same as the local size used in creating the 61 x vector for the matrix-vector product y = Ax. 62 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 63 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 64 . d_nz - number of nonzero blocks per block row in diagonal portion of local 65 submatrix (same for all local rows) 66 . d_nnz - array containing the number of nonzero blocks in the various block rows 67 of the in diagonal portion of the local (possibly different for each block 68 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry 69 and set it even if it is zero. 70 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 71 submatrix (same for all local rows). 72 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 73 off-diagonal portion of the local submatrix (possibly different for 74 each block row) or NULL. 75 76 Output Parameter: 77 . A - the matrix 78 79 Options Database Keys: 80 + -mat_block_size - size of the blocks to use 81 - -mat_use_hash_table <fact> 82 83 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 84 MatXXXXSetPreallocation() paradgm instead of this routine directly. 85 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 86 87 Notes: 88 If the *_nnz parameter is given then the *_nz parameter is ignored 89 90 A nonzero block is any block that as 1 or more nonzeros in it 91 92 The user MUST specify either the local or global matrix dimensions 93 (possibly both). 94 95 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 96 than it must be used on all processors that share the object for that argument. 97 98 Storage Information: 99 For a square global matrix we define each processor's diagonal portion 100 to be its local rows and the corresponding columns (a square submatrix); 101 each processor's off-diagonal portion encompasses the remainder of the 102 local matrix (a rectangular submatrix). 103 104 The user can specify preallocated storage for the diagonal part of 105 the local submatrix with either d_nz or d_nnz (not both). Set 106 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 107 memory allocation. Likewise, specify preallocated storage for the 108 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 109 110 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 111 the figure below we depict these three local rows and all columns (0-11). 112 113 .vb 114 0 1 2 3 4 5 6 7 8 9 10 11 115 -------------------------- 116 row 3 |o o o d d d o o o o o o 117 row 4 |o o o d d d o o o o o o 118 row 5 |o o o d d d o o o o o o 119 -------------------------- 120 .ve 121 122 Thus, any entries in the d locations are stored in the d (diagonal) 123 submatrix, and any entries in the o locations are stored in the 124 o (off-diagonal) submatrix. Note that the d and the o submatrices are 125 stored simply in the MATSEQBAIJMKL format for compressed row storage. 126 127 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 128 and o_nz should indicate the number of block nonzeros per row in the o matrix. 129 In general, for PDE problems in which most nonzeros are near the diagonal, 130 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 131 or you will get TERRIBLE performance; see the users' manual chapter on 132 matrices. 133 134 Level: intermediate 135 136 .keywords: matrix, block, aij, compressed row, sparse, parallel 137 138 .seealso: MatCreate(), MatCreateSeqBAIJMKL(), MatSetValues(), MatCreateBAIJMKL(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 139 @*/ 140 141 PetscErrorCode MatCreateBAIJMKL(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 142 { 143 PetscErrorCode ierr; 144 PetscMPIInt size; 145 146 PetscFunctionBegin; 147 ierr = MatCreate(comm,A);CHKERRQ(ierr); 148 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 149 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 150 if (size > 1) { 151 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 152 ierr = MatSetType(*A,MATMPIBAIJMKL);CHKERRQ(ierr); 153 #else 154 ierr = PetscInfo(A,"MKL baij routines are not supported for used version of MKL. Using PETSc default routines. \n Please use version of MKL 11.3 and higher. \n"); 155 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 156 #endif 157 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 158 } else { 159 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 160 ierr = MatSetType(*A,MATSEQBAIJMKL);CHKERRQ(ierr); 161 #else 162 ierr = PetscInfo(A,"MKL baij routines are not supported for used version of MKL. Using PETSc default routines. \n Please use version of MKL 11.3 and higher. \n"); 163 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 164 #endif 165 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 166 } 167 PetscFunctionReturn(0); 168 } 169 170 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJMKL(Mat A) 171 { 172 PetscErrorCode ierr; 173 174 PetscFunctionBegin; 175 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 176 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 177 ierr = MatConvert_MPIBAIJ_MPIBAIJMKL(A,MATMPIBAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 178 #else 179 ierr = PetscInfo(A,"MKL baij routines are not supported for used version of MKL. Using PETSc default routines. \n Please use version of MKL 11.3 and higher. \n"); 180 #endif 181 PetscFunctionReturn(0); 182 } 183 184 /*MC 185 MATBAIJMKL - MATBAIJMKL = "BAIJMKL" - A matrix type to be used for sparse matrices. 186 187 This matrix type is identical to MATSEQBAIJMKL when constructed with a single process communicator, 188 and MATMPIBAIJMKL otherwise. As a result, for single process communicators, 189 MatSeqBAIJSetPreallocation() is supported, and similarly MatMPIBAIJSetPreallocation() is supported 190 for communicators controlling multiple processes. It is recommended that you call both of 191 the above preallocation routines for simplicity. 192 193 Options Database Keys: 194 . -mat_type baijmkl - sets the matrix type to "BAIJMKL" during a call to MatSetFromOptions() 195 196 Level: beginner 197 198 .seealso: MatCreateMPIBAIJMKL(), MATSEQBAIJMKL, MATMPIBAIJMKL 199 M*/ 200 201