17072be85SIrina Sokolova #include <../src/mat/impls/baij/mpi/mpibaij.h> 27072be85SIrina Sokolova 37072be85SIrina Sokolova PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat,MatType,MatReuse,Mat*); 47072be85SIrina Sokolova 5*b9e7e5c1SBarry Smith static PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJMKL(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz) 67072be85SIrina Sokolova { 77072be85SIrina Sokolova Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 87072be85SIrina Sokolova PetscErrorCode ierr; 97072be85SIrina Sokolova 107072be85SIrina Sokolova PetscFunctionBegin; 117072be85SIrina Sokolova ierr = MatMPIBAIJSetPreallocation_MPIBAIJ(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 127072be85SIrina Sokolova ierr = MatConvert_SeqBAIJ_SeqBAIJMKL(b->A,MATSEQBAIJMKL,MAT_INPLACE_MATRIX,&b->A);CHKERRQ(ierr); 137072be85SIrina Sokolova ierr = MatConvert_SeqBAIJ_SeqBAIJMKL(b->B,MATSEQBAIJMKL,MAT_INPLACE_MATRIX,&b->B);CHKERRQ(ierr); 147072be85SIrina Sokolova PetscFunctionReturn(0); 157072be85SIrina Sokolova } 167072be85SIrina Sokolova 17*b9e7e5c1SBarry Smith static PetscErrorCode MatConvert_MPIBAIJ_MPIBAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat) 187072be85SIrina Sokolova { 197072be85SIrina Sokolova PetscErrorCode ierr; 207072be85SIrina Sokolova Mat B = *newmat; 217072be85SIrina Sokolova 227072be85SIrina Sokolova PetscFunctionBegin; 237072be85SIrina Sokolova if (reuse == MAT_INITIAL_MATRIX) { 247072be85SIrina Sokolova ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 257072be85SIrina Sokolova } 267072be85SIrina Sokolova 277072be85SIrina Sokolova ierr = PetscObjectChangeTypeName((PetscObject) B, MATMPIBAIJMKL);CHKERRQ(ierr); 287072be85SIrina Sokolova ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJMKL);CHKERRQ(ierr); 297072be85SIrina Sokolova *newmat = B; 307072be85SIrina Sokolova PetscFunctionReturn(0); 317072be85SIrina Sokolova } 32*b9e7e5c1SBarry Smith 337072be85SIrina Sokolova /*@C 347072be85SIrina Sokolova MatCreateBAIJMKL - Creates a sparse parallel matrix in block AIJ format 357072be85SIrina Sokolova (block compressed row). 367072be85SIrina Sokolova This type inherits from BAIJ and is largely identical, but uses sparse BLAS 377072be85SIrina Sokolova routines from Intel MKL whenever possible. 387072be85SIrina Sokolova MatMult, MatMultAdd, MatMultTranspose, and MatMultTransposeAdd 397072be85SIrina Sokolova operations are currently supported. 407072be85SIrina Sokolova If the installed version of MKL supports the "SpMV2" sparse 417072be85SIrina Sokolova inspector-executor routines, then those are used by default. 427072be85SIrina Sokolova Default PETSc kernels are used otherwise. 437072be85SIrina Sokolova For good matrix assembly performance the user should preallocate the matrix 447072be85SIrina Sokolova storage by setting the parameters d_nz (or d_nnz) and o_nz (or o_nnz). 457072be85SIrina Sokolova By setting these parameters accurately, performance can be increased by more 467072be85SIrina Sokolova than a factor of 50. 477072be85SIrina Sokolova 48d083f849SBarry Smith Collective 497072be85SIrina Sokolova 507072be85SIrina Sokolova Input Parameters: 517072be85SIrina Sokolova + comm - MPI communicator 527072be85SIrina Sokolova . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 537072be85SIrina Sokolova blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 547072be85SIrina Sokolova . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 557072be85SIrina Sokolova This value should be the same as the local size used in creating the 567072be85SIrina Sokolova y vector for the matrix-vector product y = Ax. 577072be85SIrina Sokolova . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 587072be85SIrina Sokolova This value should be the same as the local size used in creating the 597072be85SIrina Sokolova x vector for the matrix-vector product y = Ax. 607072be85SIrina Sokolova . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 617072be85SIrina Sokolova . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 627072be85SIrina Sokolova . d_nz - number of nonzero blocks per block row in diagonal portion of local 637072be85SIrina Sokolova submatrix (same for all local rows) 647072be85SIrina Sokolova . d_nnz - array containing the number of nonzero blocks in the various block rows 657072be85SIrina Sokolova of the in diagonal portion of the local (possibly different for each block 667072be85SIrina Sokolova row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry 677072be85SIrina Sokolova and set it even if it is zero. 687072be85SIrina Sokolova . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 697072be85SIrina Sokolova submatrix (same for all local rows). 707072be85SIrina Sokolova - o_nnz - array containing the number of nonzero blocks in the various block rows of the 717072be85SIrina Sokolova off-diagonal portion of the local submatrix (possibly different for 727072be85SIrina Sokolova each block row) or NULL. 737072be85SIrina Sokolova 747072be85SIrina Sokolova Output Parameter: 757072be85SIrina Sokolova . A - the matrix 767072be85SIrina Sokolova 777072be85SIrina Sokolova Options Database Keys: 787072be85SIrina Sokolova + -mat_block_size - size of the blocks to use 797072be85SIrina Sokolova - -mat_use_hash_table <fact> 807072be85SIrina Sokolova 817072be85SIrina Sokolova It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 82f6f02116SRichard Tran Mills MatXXXXSetPreallocation() paradigm instead of this routine directly. 837072be85SIrina Sokolova [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 847072be85SIrina Sokolova 857072be85SIrina Sokolova Notes: 867072be85SIrina Sokolova If the *_nnz parameter is given then the *_nz parameter is ignored 877072be85SIrina Sokolova 887072be85SIrina Sokolova A nonzero block is any block that as 1 or more nonzeros in it 897072be85SIrina Sokolova 907072be85SIrina Sokolova The user MUST specify either the local or global matrix dimensions 917072be85SIrina Sokolova (possibly both). 927072be85SIrina Sokolova 937072be85SIrina Sokolova If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 947072be85SIrina Sokolova than it must be used on all processors that share the object for that argument. 957072be85SIrina Sokolova 967072be85SIrina Sokolova Storage Information: 977072be85SIrina Sokolova For a square global matrix we define each processor's diagonal portion 987072be85SIrina Sokolova to be its local rows and the corresponding columns (a square submatrix); 997072be85SIrina Sokolova each processor's off-diagonal portion encompasses the remainder of the 1007072be85SIrina Sokolova local matrix (a rectangular submatrix). 1017072be85SIrina Sokolova 1027072be85SIrina Sokolova The user can specify preallocated storage for the diagonal part of 1037072be85SIrina Sokolova the local submatrix with either d_nz or d_nnz (not both). Set 1047072be85SIrina Sokolova d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 1057072be85SIrina Sokolova memory allocation. Likewise, specify preallocated storage for the 1067072be85SIrina Sokolova off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1077072be85SIrina Sokolova 1087072be85SIrina Sokolova Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1097072be85SIrina Sokolova the figure below we depict these three local rows and all columns (0-11). 1107072be85SIrina Sokolova 1117072be85SIrina Sokolova .vb 1127072be85SIrina Sokolova 0 1 2 3 4 5 6 7 8 9 10 11 1137072be85SIrina Sokolova -------------------------- 1147072be85SIrina Sokolova row 3 |o o o d d d o o o o o o 1157072be85SIrina Sokolova row 4 |o o o d d d o o o o o o 1167072be85SIrina Sokolova row 5 |o o o d d d o o o o o o 1177072be85SIrina Sokolova -------------------------- 1187072be85SIrina Sokolova .ve 1197072be85SIrina Sokolova 1207072be85SIrina Sokolova Thus, any entries in the d locations are stored in the d (diagonal) 1217072be85SIrina Sokolova submatrix, and any entries in the o locations are stored in the 1227072be85SIrina Sokolova o (off-diagonal) submatrix. Note that the d and the o submatrices are 1237072be85SIrina Sokolova stored simply in the MATSEQBAIJMKL format for compressed row storage. 1247072be85SIrina Sokolova 1257072be85SIrina Sokolova Now d_nz should indicate the number of block nonzeros per row in the d matrix, 1267072be85SIrina Sokolova and o_nz should indicate the number of block nonzeros per row in the o matrix. 1277072be85SIrina Sokolova In general, for PDE problems in which most nonzeros are near the diagonal, 1287072be85SIrina Sokolova one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1297072be85SIrina Sokolova or you will get TERRIBLE performance; see the users' manual chapter on 1307072be85SIrina Sokolova matrices. 1317072be85SIrina Sokolova 1327072be85SIrina Sokolova Level: intermediate 1337072be85SIrina Sokolova 1347072be85SIrina Sokolova .seealso: MatCreate(), MatCreateSeqBAIJMKL(), MatSetValues(), MatCreateBAIJMKL(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 1357072be85SIrina Sokolova @*/ 1367072be85SIrina Sokolova 1377072be85SIrina Sokolova PetscErrorCode MatCreateBAIJMKL(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 1387072be85SIrina Sokolova { 1397072be85SIrina Sokolova PetscErrorCode ierr; 1407072be85SIrina Sokolova PetscMPIInt size; 1417072be85SIrina Sokolova 1427072be85SIrina Sokolova PetscFunctionBegin; 1437072be85SIrina Sokolova ierr = MatCreate(comm,A);CHKERRQ(ierr); 1447072be85SIrina Sokolova ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1457072be85SIrina Sokolova ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1467072be85SIrina Sokolova if (size > 1) { 1477072be85SIrina Sokolova ierr = MatSetType(*A,MATMPIBAIJMKL);CHKERRQ(ierr); 1487072be85SIrina Sokolova ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 1497072be85SIrina Sokolova } else { 1507072be85SIrina Sokolova ierr = MatSetType(*A,MATSEQBAIJMKL);CHKERRQ(ierr); 1517072be85SIrina Sokolova ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 1527072be85SIrina Sokolova } 1537072be85SIrina Sokolova PetscFunctionReturn(0); 1547072be85SIrina Sokolova } 1557072be85SIrina Sokolova 1567072be85SIrina Sokolova PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJMKL(Mat A) 1577072be85SIrina Sokolova { 1587072be85SIrina Sokolova PetscErrorCode ierr; 1597072be85SIrina Sokolova 1607072be85SIrina Sokolova PetscFunctionBegin; 1617072be85SIrina Sokolova ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1627072be85SIrina Sokolova ierr = MatConvert_MPIBAIJ_MPIBAIJMKL(A,MATMPIBAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 1637072be85SIrina Sokolova PetscFunctionReturn(0); 1647072be85SIrina Sokolova } 1657072be85SIrina Sokolova 1667072be85SIrina Sokolova /*MC 1677072be85SIrina Sokolova MATBAIJMKL - MATBAIJMKL = "BAIJMKL" - A matrix type to be used for sparse matrices. 1687072be85SIrina Sokolova 1697072be85SIrina Sokolova This matrix type is identical to MATSEQBAIJMKL when constructed with a single process communicator, 1707072be85SIrina Sokolova and MATMPIBAIJMKL otherwise. As a result, for single process communicators, 1717072be85SIrina Sokolova MatSeqBAIJSetPreallocation() is supported, and similarly MatMPIBAIJSetPreallocation() is supported 1727072be85SIrina Sokolova for communicators controlling multiple processes. It is recommended that you call both of 1737072be85SIrina Sokolova the above preallocation routines for simplicity. 1747072be85SIrina Sokolova 1757072be85SIrina Sokolova Options Database Keys: 1767072be85SIrina Sokolova . -mat_type baijmkl - sets the matrix type to "BAIJMKL" during a call to MatSetFromOptions() 1777072be85SIrina Sokolova 1787072be85SIrina Sokolova Level: beginner 1797072be85SIrina Sokolova 180ddee360bSSatish Balay .seealso: MatCreateBAIJMKL(), MATSEQBAIJMKL, MATMPIBAIJMKL 1817072be85SIrina Sokolova M*/ 1827072be85SIrina Sokolova 183