xref: /petsc/src/mat/impls/baij/mpi/baijmkl/mpibaijmkl.c (revision d083f849a86f1f43e18d534ee43954e2786cb29a)
17072be85SIrina Sokolova #include <../src/mat/impls/baij/mpi/mpibaij.h>
27072be85SIrina Sokolova 
37072be85SIrina Sokolova #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
47072be85SIrina Sokolova PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat,MatType,MatReuse,Mat*);
57072be85SIrina Sokolova 
67072be85SIrina Sokolova PetscErrorCode  MatMPIBAIJSetPreallocation_MPIBAIJMKL(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
77072be85SIrina Sokolova {
87072be85SIrina Sokolova   Mat_MPIBAIJ     *b = (Mat_MPIBAIJ*)B->data;
97072be85SIrina Sokolova   PetscErrorCode ierr;
107072be85SIrina Sokolova 
117072be85SIrina Sokolova   PetscFunctionBegin;
127072be85SIrina Sokolova   ierr = MatMPIBAIJSetPreallocation_MPIBAIJ(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
137072be85SIrina Sokolova   ierr = MatConvert_SeqBAIJ_SeqBAIJMKL(b->A,MATSEQBAIJMKL,MAT_INPLACE_MATRIX,&b->A);CHKERRQ(ierr);
147072be85SIrina Sokolova   ierr = MatConvert_SeqBAIJ_SeqBAIJMKL(b->B,MATSEQBAIJMKL,MAT_INPLACE_MATRIX,&b->B);CHKERRQ(ierr);
157072be85SIrina Sokolova   PetscFunctionReturn(0);
167072be85SIrina Sokolova }
177072be85SIrina Sokolova 
187072be85SIrina Sokolova PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
197072be85SIrina Sokolova {
207072be85SIrina Sokolova   PetscErrorCode ierr;
217072be85SIrina Sokolova   Mat            B = *newmat;
227072be85SIrina Sokolova 
237072be85SIrina Sokolova   PetscFunctionBegin;
247072be85SIrina Sokolova   if (reuse == MAT_INITIAL_MATRIX) {
257072be85SIrina Sokolova     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
267072be85SIrina Sokolova   }
277072be85SIrina Sokolova 
287072be85SIrina Sokolova   ierr = PetscObjectChangeTypeName((PetscObject) B, MATMPIBAIJMKL);CHKERRQ(ierr);
297072be85SIrina Sokolova   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJMKL);CHKERRQ(ierr);
307072be85SIrina Sokolova   *newmat = B;
317072be85SIrina Sokolova   PetscFunctionReturn(0);
327072be85SIrina Sokolova }
337072be85SIrina Sokolova #endif
347072be85SIrina Sokolova /*@C
357072be85SIrina Sokolova    MatCreateBAIJMKL - Creates a sparse parallel matrix in block AIJ format
367072be85SIrina Sokolova    (block compressed row).
377072be85SIrina Sokolova    This type inherits from BAIJ and is largely identical, but uses sparse BLAS
387072be85SIrina Sokolova    routines from Intel MKL whenever possible.
397072be85SIrina Sokolova    MatMult, MatMultAdd, MatMultTranspose, and MatMultTransposeAdd
407072be85SIrina Sokolova    operations are currently supported.
417072be85SIrina Sokolova    If the installed version of MKL supports the "SpMV2" sparse
427072be85SIrina Sokolova    inspector-executor routines, then those are used by default.
437072be85SIrina Sokolova    Default PETSc kernels are used otherwise.
447072be85SIrina Sokolova    For good matrix assembly performance the user should preallocate the matrix
457072be85SIrina Sokolova    storage by setting the parameters d_nz (or d_nnz) and o_nz (or o_nnz).
467072be85SIrina Sokolova    By setting these parameters accurately, performance can be increased by more
477072be85SIrina Sokolova    than a factor of 50.
487072be85SIrina Sokolova 
49*d083f849SBarry Smith    Collective
507072be85SIrina Sokolova 
517072be85SIrina Sokolova    Input Parameters:
527072be85SIrina Sokolova +  comm - MPI communicator
537072be85SIrina Sokolova .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
547072be85SIrina Sokolova           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
557072be85SIrina Sokolova .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
567072be85SIrina Sokolova            This value should be the same as the local size used in creating the
577072be85SIrina Sokolova            y vector for the matrix-vector product y = Ax.
587072be85SIrina Sokolova .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
597072be85SIrina Sokolova            This value should be the same as the local size used in creating the
607072be85SIrina Sokolova            x vector for the matrix-vector product y = Ax.
617072be85SIrina Sokolova .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
627072be85SIrina Sokolova .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
637072be85SIrina Sokolova .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
647072be85SIrina Sokolova            submatrix  (same for all local rows)
657072be85SIrina Sokolova .  d_nnz - array containing the number of nonzero blocks in the various block rows
667072be85SIrina Sokolova            of the in diagonal portion of the local (possibly different for each block
677072be85SIrina Sokolova            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
687072be85SIrina Sokolova            and set it even if it is zero.
697072be85SIrina Sokolova .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
707072be85SIrina Sokolova            submatrix (same for all local rows).
717072be85SIrina Sokolova -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
727072be85SIrina Sokolova            off-diagonal portion of the local submatrix (possibly different for
737072be85SIrina Sokolova            each block row) or NULL.
747072be85SIrina Sokolova 
757072be85SIrina Sokolova    Output Parameter:
767072be85SIrina Sokolova .  A - the matrix
777072be85SIrina Sokolova 
787072be85SIrina Sokolova    Options Database Keys:
797072be85SIrina Sokolova +   -mat_block_size - size of the blocks to use
807072be85SIrina Sokolova -   -mat_use_hash_table <fact>
817072be85SIrina Sokolova 
827072be85SIrina Sokolova    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
837072be85SIrina Sokolova    MatXXXXSetPreallocation() paradgm instead of this routine directly.
847072be85SIrina Sokolova    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
857072be85SIrina Sokolova 
867072be85SIrina Sokolova    Notes:
877072be85SIrina Sokolova    If the *_nnz parameter is given then the *_nz parameter is ignored
887072be85SIrina Sokolova 
897072be85SIrina Sokolova    A nonzero block is any block that as 1 or more nonzeros in it
907072be85SIrina Sokolova 
917072be85SIrina Sokolova    The user MUST specify either the local or global matrix dimensions
927072be85SIrina Sokolova    (possibly both).
937072be85SIrina Sokolova 
947072be85SIrina Sokolova    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
957072be85SIrina Sokolova    than it must be used on all processors that share the object for that argument.
967072be85SIrina Sokolova 
977072be85SIrina Sokolova    Storage Information:
987072be85SIrina Sokolova    For a square global matrix we define each processor's diagonal portion
997072be85SIrina Sokolova    to be its local rows and the corresponding columns (a square submatrix);
1007072be85SIrina Sokolova    each processor's off-diagonal portion encompasses the remainder of the
1017072be85SIrina Sokolova    local matrix (a rectangular submatrix).
1027072be85SIrina Sokolova 
1037072be85SIrina Sokolova    The user can specify preallocated storage for the diagonal part of
1047072be85SIrina Sokolova    the local submatrix with either d_nz or d_nnz (not both).  Set
1057072be85SIrina Sokolova    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
1067072be85SIrina Sokolova    memory allocation.  Likewise, specify preallocated storage for the
1077072be85SIrina Sokolova    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1087072be85SIrina Sokolova 
1097072be85SIrina Sokolova    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1107072be85SIrina Sokolova    the figure below we depict these three local rows and all columns (0-11).
1117072be85SIrina Sokolova 
1127072be85SIrina Sokolova .vb
1137072be85SIrina Sokolova            0 1 2 3 4 5 6 7 8 9 10 11
1147072be85SIrina Sokolova           --------------------------
1157072be85SIrina Sokolova    row 3  |o o o d d d o o o o  o  o
1167072be85SIrina Sokolova    row 4  |o o o d d d o o o o  o  o
1177072be85SIrina Sokolova    row 5  |o o o d d d o o o o  o  o
1187072be85SIrina Sokolova           --------------------------
1197072be85SIrina Sokolova .ve
1207072be85SIrina Sokolova 
1217072be85SIrina Sokolova    Thus, any entries in the d locations are stored in the d (diagonal)
1227072be85SIrina Sokolova    submatrix, and any entries in the o locations are stored in the
1237072be85SIrina Sokolova    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
1247072be85SIrina Sokolova    stored simply in the MATSEQBAIJMKL format for compressed row storage.
1257072be85SIrina Sokolova 
1267072be85SIrina Sokolova    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
1277072be85SIrina Sokolova    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1287072be85SIrina Sokolova    In general, for PDE problems in which most nonzeros are near the diagonal,
1297072be85SIrina Sokolova    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1307072be85SIrina Sokolova    or you will get TERRIBLE performance; see the users' manual chapter on
1317072be85SIrina Sokolova    matrices.
1327072be85SIrina Sokolova 
1337072be85SIrina Sokolova    Level: intermediate
1347072be85SIrina Sokolova 
1357072be85SIrina Sokolova .seealso: MatCreate(), MatCreateSeqBAIJMKL(), MatSetValues(), MatCreateBAIJMKL(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
1367072be85SIrina Sokolova @*/
1377072be85SIrina Sokolova 
1387072be85SIrina Sokolova PetscErrorCode  MatCreateBAIJMKL(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
1397072be85SIrina Sokolova {
1407072be85SIrina Sokolova   PetscErrorCode ierr;
1417072be85SIrina Sokolova   PetscMPIInt    size;
1427072be85SIrina Sokolova 
1437072be85SIrina Sokolova   PetscFunctionBegin;
1447072be85SIrina Sokolova   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1457072be85SIrina Sokolova   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1467072be85SIrina Sokolova   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1477072be85SIrina Sokolova   if (size > 1) {
148e1fdb2e4SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1497072be85SIrina Sokolova     ierr = MatSetType(*A,MATMPIBAIJMKL);CHKERRQ(ierr);
1507072be85SIrina Sokolova #else
1517072be85SIrina Sokolova     ierr = PetscInfo(A,"MKL baij routines are not supported for used version of MKL. Using PETSc default routines. \n Please use version of MKL 11.3 and higher. \n");
1527072be85SIrina Sokolova     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
1537072be85SIrina Sokolova #endif
1547072be85SIrina Sokolova     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1557072be85SIrina Sokolova   } else {
156e1fdb2e4SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1577072be85SIrina Sokolova     ierr = MatSetType(*A,MATSEQBAIJMKL);CHKERRQ(ierr);
1587072be85SIrina Sokolova #else
1597072be85SIrina Sokolova     ierr = PetscInfo(A,"MKL baij routines are not supported for used version of MKL. Using PETSc default routines. \n Please use version of MKL 11.3 and higher. \n");
1607072be85SIrina Sokolova     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
1617072be85SIrina Sokolova #endif
1627072be85SIrina Sokolova     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1637072be85SIrina Sokolova   }
1647072be85SIrina Sokolova   PetscFunctionReturn(0);
1657072be85SIrina Sokolova }
1667072be85SIrina Sokolova 
1677072be85SIrina Sokolova PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJMKL(Mat A)
1687072be85SIrina Sokolova {
1697072be85SIrina Sokolova   PetscErrorCode ierr;
1707072be85SIrina Sokolova 
1717072be85SIrina Sokolova   PetscFunctionBegin;
1727072be85SIrina Sokolova   ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
173e1fdb2e4SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1747072be85SIrina Sokolova   ierr = MatConvert_MPIBAIJ_MPIBAIJMKL(A,MATMPIBAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
1757072be85SIrina Sokolova #else
1767072be85SIrina Sokolova   ierr = PetscInfo(A,"MKL baij routines are not supported for used version of MKL. Using PETSc default routines. \n Please use version of MKL 11.3 and higher. \n");
1777072be85SIrina Sokolova #endif
1787072be85SIrina Sokolova   PetscFunctionReturn(0);
1797072be85SIrina Sokolova }
1807072be85SIrina Sokolova 
1817072be85SIrina Sokolova /*MC
1827072be85SIrina Sokolova    MATBAIJMKL - MATBAIJMKL = "BAIJMKL" - A matrix type to be used for sparse matrices.
1837072be85SIrina Sokolova 
1847072be85SIrina Sokolova    This matrix type is identical to MATSEQBAIJMKL when constructed with a single process communicator,
1857072be85SIrina Sokolova    and MATMPIBAIJMKL otherwise.  As a result, for single process communicators,
1867072be85SIrina Sokolova   MatSeqBAIJSetPreallocation() is supported, and similarly MatMPIBAIJSetPreallocation() is supported
1877072be85SIrina Sokolova   for communicators controlling multiple processes.  It is recommended that you call both of
1887072be85SIrina Sokolova   the above preallocation routines for simplicity.
1897072be85SIrina Sokolova 
1907072be85SIrina Sokolova    Options Database Keys:
1917072be85SIrina Sokolova . -mat_type baijmkl - sets the matrix type to "BAIJMKL" during a call to MatSetFromOptions()
1927072be85SIrina Sokolova 
1937072be85SIrina Sokolova   Level: beginner
1947072be85SIrina Sokolova 
195ddee360bSSatish Balay .seealso: MatCreateBAIJMKL(), MATSEQBAIJMKL, MATMPIBAIJMKL
1967072be85SIrina Sokolova M*/
1977072be85SIrina Sokolova 
198