1*7072be85SIrina Sokolova #include <../src/mat/impls/baij/mpi/mpibaij.h> 2*7072be85SIrina Sokolova 3*7072be85SIrina Sokolova #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 4*7072be85SIrina Sokolova PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat,MatType,MatReuse,Mat*); 5*7072be85SIrina Sokolova 6*7072be85SIrina Sokolova PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJMKL(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz) 7*7072be85SIrina Sokolova //(Mat B, PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 8*7072be85SIrina Sokolova { 9*7072be85SIrina Sokolova Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 10*7072be85SIrina Sokolova PetscErrorCode ierr; 11*7072be85SIrina Sokolova 12*7072be85SIrina Sokolova PetscFunctionBegin; 13*7072be85SIrina Sokolova ierr = MatMPIBAIJSetPreallocation_MPIBAIJ(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 14*7072be85SIrina Sokolova ierr = MatConvert_SeqBAIJ_SeqBAIJMKL(b->A, MATSEQBAIJMKL, MAT_INPLACE_MATRIX, &b->A);CHKERRQ(ierr); 15*7072be85SIrina Sokolova ierr = MatConvert_SeqBAIJ_SeqBAIJMKL(b->B, MATSEQBAIJMKL, MAT_INPLACE_MATRIX, &b->B);CHKERRQ(ierr); 16*7072be85SIrina Sokolova PetscFunctionReturn(0); 17*7072be85SIrina Sokolova } 18*7072be85SIrina Sokolova 19*7072be85SIrina Sokolova PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 20*7072be85SIrina Sokolova { 21*7072be85SIrina Sokolova PetscErrorCode ierr; 22*7072be85SIrina Sokolova Mat B = *newmat; 23*7072be85SIrina Sokolova 24*7072be85SIrina Sokolova PetscFunctionBegin; 25*7072be85SIrina Sokolova if (reuse == MAT_INITIAL_MATRIX) { 26*7072be85SIrina Sokolova ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 27*7072be85SIrina Sokolova } 28*7072be85SIrina Sokolova 29*7072be85SIrina Sokolova ierr = PetscObjectChangeTypeName((PetscObject) B, MATMPIBAIJMKL);CHKERRQ(ierr); 30*7072be85SIrina Sokolova ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJMKL);CHKERRQ(ierr); 31*7072be85SIrina Sokolova *newmat = B; 32*7072be85SIrina Sokolova PetscFunctionReturn(0); 33*7072be85SIrina Sokolova } 34*7072be85SIrina Sokolova #endif 35*7072be85SIrina Sokolova /*@C 36*7072be85SIrina Sokolova MatCreateBAIJMKL - Creates a sparse parallel matrix in block AIJ format 37*7072be85SIrina Sokolova (block compressed row). 38*7072be85SIrina Sokolova This type inherits from BAIJ and is largely identical, but uses sparse BLAS 39*7072be85SIrina Sokolova routines from Intel MKL whenever possible. 40*7072be85SIrina Sokolova MatMult, MatMultAdd, MatMultTranspose, and MatMultTransposeAdd 41*7072be85SIrina Sokolova operations are currently supported. 42*7072be85SIrina Sokolova If the installed version of MKL supports the "SpMV2" sparse 43*7072be85SIrina Sokolova inspector-executor routines, then those are used by default. 44*7072be85SIrina Sokolova Default PETSc kernels are used otherwise. 45*7072be85SIrina Sokolova For good matrix assembly performance the user should preallocate the matrix 46*7072be85SIrina Sokolova storage by setting the parameters d_nz (or d_nnz) and o_nz (or o_nnz). 47*7072be85SIrina Sokolova By setting these parameters accurately, performance can be increased by more 48*7072be85SIrina Sokolova than a factor of 50. 49*7072be85SIrina Sokolova 50*7072be85SIrina Sokolova Collective on MPI_Comm 51*7072be85SIrina Sokolova 52*7072be85SIrina Sokolova Input Parameters: 53*7072be85SIrina Sokolova + comm - MPI communicator 54*7072be85SIrina Sokolova . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 55*7072be85SIrina Sokolova blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 56*7072be85SIrina Sokolova . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 57*7072be85SIrina Sokolova This value should be the same as the local size used in creating the 58*7072be85SIrina Sokolova y vector for the matrix-vector product y = Ax. 59*7072be85SIrina Sokolova . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 60*7072be85SIrina Sokolova This value should be the same as the local size used in creating the 61*7072be85SIrina Sokolova x vector for the matrix-vector product y = Ax. 62*7072be85SIrina Sokolova . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 63*7072be85SIrina Sokolova . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 64*7072be85SIrina Sokolova . d_nz - number of nonzero blocks per block row in diagonal portion of local 65*7072be85SIrina Sokolova submatrix (same for all local rows) 66*7072be85SIrina Sokolova . d_nnz - array containing the number of nonzero blocks in the various block rows 67*7072be85SIrina Sokolova of the in diagonal portion of the local (possibly different for each block 68*7072be85SIrina Sokolova row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry 69*7072be85SIrina Sokolova and set it even if it is zero. 70*7072be85SIrina Sokolova . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 71*7072be85SIrina Sokolova submatrix (same for all local rows). 72*7072be85SIrina Sokolova - o_nnz - array containing the number of nonzero blocks in the various block rows of the 73*7072be85SIrina Sokolova off-diagonal portion of the local submatrix (possibly different for 74*7072be85SIrina Sokolova each block row) or NULL. 75*7072be85SIrina Sokolova 76*7072be85SIrina Sokolova Output Parameter: 77*7072be85SIrina Sokolova . A - the matrix 78*7072be85SIrina Sokolova 79*7072be85SIrina Sokolova Options Database Keys: 80*7072be85SIrina Sokolova + -mat_block_size - size of the blocks to use 81*7072be85SIrina Sokolova - -mat_use_hash_table <fact> 82*7072be85SIrina Sokolova 83*7072be85SIrina Sokolova It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 84*7072be85SIrina Sokolova MatXXXXSetPreallocation() paradgm instead of this routine directly. 85*7072be85SIrina Sokolova [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 86*7072be85SIrina Sokolova 87*7072be85SIrina Sokolova Notes: 88*7072be85SIrina Sokolova If the *_nnz parameter is given then the *_nz parameter is ignored 89*7072be85SIrina Sokolova 90*7072be85SIrina Sokolova A nonzero block is any block that as 1 or more nonzeros in it 91*7072be85SIrina Sokolova 92*7072be85SIrina Sokolova The user MUST specify either the local or global matrix dimensions 93*7072be85SIrina Sokolova (possibly both). 94*7072be85SIrina Sokolova 95*7072be85SIrina Sokolova If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 96*7072be85SIrina Sokolova than it must be used on all processors that share the object for that argument. 97*7072be85SIrina Sokolova 98*7072be85SIrina Sokolova Storage Information: 99*7072be85SIrina Sokolova For a square global matrix we define each processor's diagonal portion 100*7072be85SIrina Sokolova to be its local rows and the corresponding columns (a square submatrix); 101*7072be85SIrina Sokolova each processor's off-diagonal portion encompasses the remainder of the 102*7072be85SIrina Sokolova local matrix (a rectangular submatrix). 103*7072be85SIrina Sokolova 104*7072be85SIrina Sokolova The user can specify preallocated storage for the diagonal part of 105*7072be85SIrina Sokolova the local submatrix with either d_nz or d_nnz (not both). Set 106*7072be85SIrina Sokolova d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 107*7072be85SIrina Sokolova memory allocation. Likewise, specify preallocated storage for the 108*7072be85SIrina Sokolova off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 109*7072be85SIrina Sokolova 110*7072be85SIrina Sokolova Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 111*7072be85SIrina Sokolova the figure below we depict these three local rows and all columns (0-11). 112*7072be85SIrina Sokolova 113*7072be85SIrina Sokolova .vb 114*7072be85SIrina Sokolova 0 1 2 3 4 5 6 7 8 9 10 11 115*7072be85SIrina Sokolova -------------------------- 116*7072be85SIrina Sokolova row 3 |o o o d d d o o o o o o 117*7072be85SIrina Sokolova row 4 |o o o d d d o o o o o o 118*7072be85SIrina Sokolova row 5 |o o o d d d o o o o o o 119*7072be85SIrina Sokolova -------------------------- 120*7072be85SIrina Sokolova .ve 121*7072be85SIrina Sokolova 122*7072be85SIrina Sokolova Thus, any entries in the d locations are stored in the d (diagonal) 123*7072be85SIrina Sokolova submatrix, and any entries in the o locations are stored in the 124*7072be85SIrina Sokolova o (off-diagonal) submatrix. Note that the d and the o submatrices are 125*7072be85SIrina Sokolova stored simply in the MATSEQBAIJMKL format for compressed row storage. 126*7072be85SIrina Sokolova 127*7072be85SIrina Sokolova Now d_nz should indicate the number of block nonzeros per row in the d matrix, 128*7072be85SIrina Sokolova and o_nz should indicate the number of block nonzeros per row in the o matrix. 129*7072be85SIrina Sokolova In general, for PDE problems in which most nonzeros are near the diagonal, 130*7072be85SIrina Sokolova one expects d_nz >> o_nz. For large problems you MUST preallocate memory 131*7072be85SIrina Sokolova or you will get TERRIBLE performance; see the users' manual chapter on 132*7072be85SIrina Sokolova matrices. 133*7072be85SIrina Sokolova 134*7072be85SIrina Sokolova Level: intermediate 135*7072be85SIrina Sokolova 136*7072be85SIrina Sokolova .keywords: matrix, block, aij, compressed row, sparse, parallel 137*7072be85SIrina Sokolova 138*7072be85SIrina Sokolova .seealso: MatCreate(), MatCreateSeqBAIJMKL(), MatSetValues(), MatCreateBAIJMKL(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 139*7072be85SIrina Sokolova @*/ 140*7072be85SIrina Sokolova 141*7072be85SIrina Sokolova PetscErrorCode MatCreateBAIJMKL(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 142*7072be85SIrina Sokolova { 143*7072be85SIrina Sokolova PetscErrorCode ierr; 144*7072be85SIrina Sokolova PetscMPIInt size; 145*7072be85SIrina Sokolova 146*7072be85SIrina Sokolova PetscFunctionBegin; 147*7072be85SIrina Sokolova ierr = MatCreate(comm,A);CHKERRQ(ierr); 148*7072be85SIrina Sokolova ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 149*7072be85SIrina Sokolova ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 150*7072be85SIrina Sokolova if (size > 1) { 151*7072be85SIrina Sokolova #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 152*7072be85SIrina Sokolova ierr = MatSetType(*A,MATMPIBAIJMKL);CHKERRQ(ierr); 153*7072be85SIrina Sokolova #else 154*7072be85SIrina Sokolova ierr = PetscInfo(A,"MKL baij routines are not supported for used version of MKL. Using PETSc default routines. \n Please use version of MKL 11.3 and higher. \n"); 155*7072be85SIrina Sokolova ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 156*7072be85SIrina Sokolova #endif 157*7072be85SIrina Sokolova ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 158*7072be85SIrina Sokolova } else { 159*7072be85SIrina Sokolova #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 160*7072be85SIrina Sokolova ierr = MatSetType(*A,MATSEQBAIJMKL);CHKERRQ(ierr); 161*7072be85SIrina Sokolova #else 162*7072be85SIrina Sokolova ierr = PetscInfo(A,"MKL baij routines are not supported for used version of MKL. Using PETSc default routines. \n Please use version of MKL 11.3 and higher. \n"); 163*7072be85SIrina Sokolova ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 164*7072be85SIrina Sokolova #endif 165*7072be85SIrina Sokolova ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 166*7072be85SIrina Sokolova } 167*7072be85SIrina Sokolova PetscFunctionReturn(0); 168*7072be85SIrina Sokolova } 169*7072be85SIrina Sokolova 170*7072be85SIrina Sokolova PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJMKL(Mat A) 171*7072be85SIrina Sokolova { 172*7072be85SIrina Sokolova PetscErrorCode ierr; 173*7072be85SIrina Sokolova 174*7072be85SIrina Sokolova PetscFunctionBegin; 175*7072be85SIrina Sokolova ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 176*7072be85SIrina Sokolova #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE 177*7072be85SIrina Sokolova ierr = MatConvert_MPIBAIJ_MPIBAIJMKL(A,MATMPIBAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 178*7072be85SIrina Sokolova #else 179*7072be85SIrina Sokolova ierr = PetscInfo(A,"MKL baij routines are not supported for used version of MKL. Using PETSc default routines. \n Please use version of MKL 11.3 and higher. \n"); 180*7072be85SIrina Sokolova #endif 181*7072be85SIrina Sokolova PetscFunctionReturn(0); 182*7072be85SIrina Sokolova } 183*7072be85SIrina Sokolova 184*7072be85SIrina Sokolova /*MC 185*7072be85SIrina Sokolova MATBAIJMKL - MATBAIJMKL = "BAIJMKL" - A matrix type to be used for sparse matrices. 186*7072be85SIrina Sokolova 187*7072be85SIrina Sokolova This matrix type is identical to MATSEQBAIJMKL when constructed with a single process communicator, 188*7072be85SIrina Sokolova and MATMPIBAIJMKL otherwise. As a result, for single process communicators, 189*7072be85SIrina Sokolova MatSeqBAIJSetPreallocation() is supported, and similarly MatMPIBAIJSetPreallocation() is supported 190*7072be85SIrina Sokolova for communicators controlling multiple processes. It is recommended that you call both of 191*7072be85SIrina Sokolova the above preallocation routines for simplicity. 192*7072be85SIrina Sokolova 193*7072be85SIrina Sokolova Options Database Keys: 194*7072be85SIrina Sokolova . -mat_type baijmkl - sets the matrix type to "BAIJMKL" during a call to MatSetFromOptions() 195*7072be85SIrina Sokolova 196*7072be85SIrina Sokolova Level: beginner 197*7072be85SIrina Sokolova 198*7072be85SIrina Sokolova .seealso: MatCreateMPIBAIJMKL(), MATSEQBAIJMKL, MATMPIBAIJMKL 199*7072be85SIrina Sokolova M*/ 200*7072be85SIrina Sokolova 201