xref: /petsc/src/mat/impls/baij/mpi/baijmkl/mpibaijmkl.c (revision b5b72c8a3e9c2e7ea2cb9716c0e8b3bdf5d569e9)
1 #include <../src/mat/impls/baij/mpi/mpibaij.h>
2 
3 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
4 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat,MatType,MatReuse,Mat*);
5 
6 PetscErrorCode  MatMPIBAIJSetPreallocation_MPIBAIJMKL(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
7                                                    //(Mat B,            PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
8 {
9   Mat_MPIBAIJ     *b = (Mat_MPIBAIJ*)B->data;
10   PetscErrorCode ierr;
11 
12   PetscFunctionBegin;
13   ierr = MatMPIBAIJSetPreallocation_MPIBAIJ(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
14   ierr = MatConvert_SeqBAIJ_SeqBAIJMKL(b->A, MATSEQBAIJMKL, MAT_INPLACE_MATRIX, &b->A);CHKERRQ(ierr);
15   ierr = MatConvert_SeqBAIJ_SeqBAIJMKL(b->B, MATSEQBAIJMKL, MAT_INPLACE_MATRIX, &b->B);CHKERRQ(ierr);
16   PetscFunctionReturn(0);
17 }
18 
19 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
20 {
21   PetscErrorCode ierr;
22   Mat            B = *newmat;
23 
24   PetscFunctionBegin;
25   if (reuse == MAT_INITIAL_MATRIX) {
26     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
27   }
28 
29   ierr = PetscObjectChangeTypeName((PetscObject) B, MATMPIBAIJMKL);CHKERRQ(ierr);
30   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJMKL);CHKERRQ(ierr);
31   *newmat = B;
32   PetscFunctionReturn(0);
33 }
34 #endif
35 /*@C
36    MatCreateBAIJMKL - Creates a sparse parallel matrix in block AIJ format
37    (block compressed row).
38    This type inherits from BAIJ and is largely identical, but uses sparse BLAS
39    routines from Intel MKL whenever possible.
40    MatMult, MatMultAdd, MatMultTranspose, and MatMultTransposeAdd
41    operations are currently supported.
42    If the installed version of MKL supports the "SpMV2" sparse
43    inspector-executor routines, then those are used by default.
44    Default PETSc kernels are used otherwise.
45    For good matrix assembly performance the user should preallocate the matrix
46    storage by setting the parameters d_nz (or d_nnz) and o_nz (or o_nnz).
47    By setting these parameters accurately, performance can be increased by more
48    than a factor of 50.
49 
50    Collective on MPI_Comm
51 
52    Input Parameters:
53 +  comm - MPI communicator
54 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
55           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
56 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
57            This value should be the same as the local size used in creating the
58            y vector for the matrix-vector product y = Ax.
59 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
60            This value should be the same as the local size used in creating the
61            x vector for the matrix-vector product y = Ax.
62 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
63 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
64 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
65            submatrix  (same for all local rows)
66 .  d_nnz - array containing the number of nonzero blocks in the various block rows
67            of the in diagonal portion of the local (possibly different for each block
68            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
69            and set it even if it is zero.
70 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
71            submatrix (same for all local rows).
72 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
73            off-diagonal portion of the local submatrix (possibly different for
74            each block row) or NULL.
75 
76    Output Parameter:
77 .  A - the matrix
78 
79    Options Database Keys:
80 +   -mat_block_size - size of the blocks to use
81 -   -mat_use_hash_table <fact>
82 
83    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
84    MatXXXXSetPreallocation() paradgm instead of this routine directly.
85    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
86 
87    Notes:
88    If the *_nnz parameter is given then the *_nz parameter is ignored
89 
90    A nonzero block is any block that as 1 or more nonzeros in it
91 
92    The user MUST specify either the local or global matrix dimensions
93    (possibly both).
94 
95    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
96    than it must be used on all processors that share the object for that argument.
97 
98    Storage Information:
99    For a square global matrix we define each processor's diagonal portion
100    to be its local rows and the corresponding columns (a square submatrix);
101    each processor's off-diagonal portion encompasses the remainder of the
102    local matrix (a rectangular submatrix).
103 
104    The user can specify preallocated storage for the diagonal part of
105    the local submatrix with either d_nz or d_nnz (not both).  Set
106    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
107    memory allocation.  Likewise, specify preallocated storage for the
108    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
109 
110    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
111    the figure below we depict these three local rows and all columns (0-11).
112 
113 .vb
114            0 1 2 3 4 5 6 7 8 9 10 11
115           --------------------------
116    row 3  |o o o d d d o o o o  o  o
117    row 4  |o o o d d d o o o o  o  o
118    row 5  |o o o d d d o o o o  o  o
119           --------------------------
120 .ve
121 
122    Thus, any entries in the d locations are stored in the d (diagonal)
123    submatrix, and any entries in the o locations are stored in the
124    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
125    stored simply in the MATSEQBAIJMKL format for compressed row storage.
126 
127    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
128    and o_nz should indicate the number of block nonzeros per row in the o matrix.
129    In general, for PDE problems in which most nonzeros are near the diagonal,
130    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
131    or you will get TERRIBLE performance; see the users' manual chapter on
132    matrices.
133 
134    Level: intermediate
135 
136 .keywords: matrix, block, aij, compressed row, sparse, parallel
137 
138 .seealso: MatCreate(), MatCreateSeqBAIJMKL(), MatSetValues(), MatCreateBAIJMKL(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
139 @*/
140 
141 PetscErrorCode  MatCreateBAIJMKL(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
142 {
143   PetscErrorCode ierr;
144   PetscMPIInt    size;
145 
146   PetscFunctionBegin;
147   ierr = MatCreate(comm,A);CHKERRQ(ierr);
148   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
149   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
150   if (size > 1) {
151 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
152     ierr = MatSetType(*A,MATMPIBAIJMKL);CHKERRQ(ierr);
153 #else
154     ierr = PetscInfo(A,"MKL baij routines are not supported for used version of MKL. Using PETSc default routines. \n Please use version of MKL 11.3 and higher. \n");
155     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
156 #endif
157     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
158   } else {
159 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
160     ierr = MatSetType(*A,MATSEQBAIJMKL);CHKERRQ(ierr);
161 #else
162     ierr = PetscInfo(A,"MKL baij routines are not supported for used version of MKL. Using PETSc default routines. \n Please use version of MKL 11.3 and higher. \n");
163     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
164 #endif
165     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
166   }
167   PetscFunctionReturn(0);
168 }
169 
170 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJMKL(Mat A)
171 {
172   PetscErrorCode ierr;
173 
174   PetscFunctionBegin;
175   ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
176 #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
177   ierr = MatConvert_MPIBAIJ_MPIBAIJMKL(A,MATMPIBAIJMKL,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
178 #else
179   ierr = PetscInfo(A,"MKL baij routines are not supported for used version of MKL. Using PETSc default routines. \n Please use version of MKL 11.3 and higher. \n");
180 #endif
181   PetscFunctionReturn(0);
182 }
183 
184 /*MC
185    MATBAIJMKL - MATBAIJMKL = "BAIJMKL" - A matrix type to be used for sparse matrices.
186 
187    This matrix type is identical to MATSEQBAIJMKL when constructed with a single process communicator,
188    and MATMPIBAIJMKL otherwise.  As a result, for single process communicators,
189   MatSeqBAIJSetPreallocation() is supported, and similarly MatMPIBAIJSetPreallocation() is supported
190   for communicators controlling multiple processes.  It is recommended that you call both of
191   the above preallocation routines for simplicity.
192 
193    Options Database Keys:
194 . -mat_type baijmkl - sets the matrix type to "BAIJMKL" during a call to MatSetFromOptions()
195 
196   Level: beginner
197 
198 .seealso: MatCreateMPIBAIJMKL(), MATSEQBAIJMKL, MATMPIBAIJMKL
199 M*/
200 
201