xref: /petsc/src/mat/impls/baij/mpi/baijmkl/mpibaijmkl.c (revision 7d5fd1e4d9337468ad3f05b65b7facdcd2dfd2a4)
1 #include <../src/mat/impls/baij/mpi/mpibaij.h>
2 
3 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat,MatType,MatReuse,Mat*);
4 
5 static PetscErrorCode  MatMPIBAIJSetPreallocation_MPIBAIJMKL(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
6 {
7   Mat_MPIBAIJ     *b = (Mat_MPIBAIJ*)B->data;
8   PetscErrorCode ierr;
9 
10   PetscFunctionBegin;
11   ierr = MatMPIBAIJSetPreallocation_MPIBAIJ(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
12   ierr = MatConvert_SeqBAIJ_SeqBAIJMKL(b->A,MATSEQBAIJMKL,MAT_INPLACE_MATRIX,&b->A);CHKERRQ(ierr);
13   ierr = MatConvert_SeqBAIJ_SeqBAIJMKL(b->B,MATSEQBAIJMKL,MAT_INPLACE_MATRIX,&b->B);CHKERRQ(ierr);
14   PetscFunctionReturn(0);
15 }
16 
17 static PetscErrorCode MatConvert_MPIBAIJ_MPIBAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
18 {
19   PetscErrorCode ierr;
20   Mat            B = *newmat;
21 
22   PetscFunctionBegin;
23   if (reuse == MAT_INITIAL_MATRIX) {
24     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
25   }
26 
27   ierr = PetscObjectChangeTypeName((PetscObject) B, MATMPIBAIJMKL);CHKERRQ(ierr);
28   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJMKL);CHKERRQ(ierr);
29   *newmat = B;
30   PetscFunctionReturn(0);
31 }
32 
33 /*@C
34    MatCreateBAIJMKL - Creates a sparse parallel matrix in block AIJ format
35    (block compressed row).
36    This type inherits from BAIJ and is largely identical, but uses sparse BLAS
37    routines from Intel MKL whenever possible.
38    MatMult, MatMultAdd, MatMultTranspose, and MatMultTransposeAdd
39    operations are currently supported.
40    If the installed version of MKL supports the "SpMV2" sparse
41    inspector-executor routines, then those are used by default.
42    Default PETSc kernels are used otherwise.
43    For good matrix assembly performance the user should preallocate the matrix
44    storage by setting the parameters d_nz (or d_nnz) and o_nz (or o_nnz).
45    By setting these parameters accurately, performance can be increased by more
46    than a factor of 50.
47 
48    Collective
49 
50    Input Parameters:
51 +  comm - MPI communicator
52 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
53           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
54 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
55            This value should be the same as the local size used in creating the
56            y vector for the matrix-vector product y = Ax.
57 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
58            This value should be the same as the local size used in creating the
59            x vector for the matrix-vector product y = Ax.
60 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
61 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
62 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
63            submatrix  (same for all local rows)
64 .  d_nnz - array containing the number of nonzero blocks in the various block rows
65            of the in diagonal portion of the local (possibly different for each block
66            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
67            and set it even if it is zero.
68 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
69            submatrix (same for all local rows).
70 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
71            off-diagonal portion of the local submatrix (possibly different for
72            each block row) or NULL.
73 
74    Output Parameter:
75 .  A - the matrix
76 
77    Options Database Keys:
78 +   -mat_block_size - size of the blocks to use
79 -   -mat_use_hash_table <fact>
80 
81    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
82    MatXXXXSetPreallocation() paradigm instead of this routine directly.
83    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
84 
85    Notes:
86    If the *_nnz parameter is given then the *_nz parameter is ignored
87 
88    A nonzero block is any block that as 1 or more nonzeros in it
89 
90    The user MUST specify either the local or global matrix dimensions
91    (possibly both).
92 
93    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
94    than it must be used on all processors that share the object for that argument.
95 
96    Storage Information:
97    For a square global matrix we define each processor's diagonal portion
98    to be its local rows and the corresponding columns (a square submatrix);
99    each processor's off-diagonal portion encompasses the remainder of the
100    local matrix (a rectangular submatrix).
101 
102    The user can specify preallocated storage for the diagonal part of
103    the local submatrix with either d_nz or d_nnz (not both).  Set
104    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
105    memory allocation.  Likewise, specify preallocated storage for the
106    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
107 
108    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
109    the figure below we depict these three local rows and all columns (0-11).
110 
111 .vb
112            0 1 2 3 4 5 6 7 8 9 10 11
113           --------------------------
114    row 3  |o o o d d d o o o o  o  o
115    row 4  |o o o d d d o o o o  o  o
116    row 5  |o o o d d d o o o o  o  o
117           --------------------------
118 .ve
119 
120    Thus, any entries in the d locations are stored in the d (diagonal)
121    submatrix, and any entries in the o locations are stored in the
122    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
123    stored simply in the MATSEQBAIJMKL format for compressed row storage.
124 
125    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
126    and o_nz should indicate the number of block nonzeros per row in the o matrix.
127    In general, for PDE problems in which most nonzeros are near the diagonal,
128    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
129    or you will get TERRIBLE performance; see the users' manual chapter on
130    matrices.
131 
132    Level: intermediate
133 
134 .seealso: MatCreate(), MatCreateSeqBAIJMKL(), MatSetValues(), MatCreateBAIJMKL(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
135 @*/
136 
137 PetscErrorCode  MatCreateBAIJMKL(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
138 {
139   PetscErrorCode ierr;
140   PetscMPIInt    size;
141 
142   PetscFunctionBegin;
143   ierr = MatCreate(comm,A);CHKERRQ(ierr);
144   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
145   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
146   if (size > 1) {
147     ierr = MatSetType(*A,MATMPIBAIJMKL);CHKERRQ(ierr);
148     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
149   } else {
150     ierr = MatSetType(*A,MATSEQBAIJMKL);CHKERRQ(ierr);
151     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
152   }
153   PetscFunctionReturn(0);
154 }
155 
156 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJMKL(Mat A)
157 {
158   PetscErrorCode ierr;
159 
160   PetscFunctionBegin;
161   ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
162   ierr = MatConvert_MPIBAIJ_MPIBAIJMKL(A,MATMPIBAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
163   PetscFunctionReturn(0);
164 }
165 
166 /*MC
167    MATBAIJMKL - MATBAIJMKL = "BAIJMKL" - A matrix type to be used for sparse matrices.
168 
169    This matrix type is identical to MATSEQBAIJMKL when constructed with a single process communicator,
170    and MATMPIBAIJMKL otherwise.  As a result, for single process communicators,
171   MatSeqBAIJSetPreallocation() is supported, and similarly MatMPIBAIJSetPreallocation() is supported
172   for communicators controlling multiple processes.  It is recommended that you call both of
173   the above preallocation routines for simplicity.
174 
175    Options Database Keys:
176 . -mat_type baijmkl - sets the matrix type to "BAIJMKL" during a call to MatSetFromOptions()
177 
178   Level: beginner
179 
180 .seealso: MatCreateBAIJMKL(), MATSEQBAIJMKL, MATMPIBAIJMKL
181 M*/
182