xref: /petsc/src/mat/impls/aij/mpi/mumps/mumps.c (revision 30bab8dbce35fbb76248bd0fea1b2bc98f8a4f04)
1 
2 /*
3     Provides an interface to the MUMPS sparse solver
4 */
5 #include <petscpkg_version.h>
6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I  "petscmat.h"  I*/
7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
8 #include <../src/mat/impls/sell/mpi/mpisell.h>
9 
10 EXTERN_C_BEGIN
11 #if defined(PETSC_USE_COMPLEX)
12   #if defined(PETSC_USE_REAL_SINGLE)
13     #include <cmumps_c.h>
14   #else
15     #include <zmumps_c.h>
16   #endif
17 #else
18   #if defined(PETSC_USE_REAL_SINGLE)
19     #include <smumps_c.h>
20   #else
21     #include <dmumps_c.h>
22   #endif
23 #endif
24 EXTERN_C_END
25 #define JOB_INIT         -1
26 #define JOB_NULL         0
27 #define JOB_FACTSYMBOLIC 1
28 #define JOB_FACTNUMERIC  2
29 #define JOB_SOLVE        3
30 #define JOB_END          -2
31 
32 /* calls to MUMPS */
33 #if defined(PETSC_USE_COMPLEX)
34   #if defined(PETSC_USE_REAL_SINGLE)
35     #define MUMPS_c cmumps_c
36   #else
37     #define MUMPS_c zmumps_c
38   #endif
39 #else
40   #if defined(PETSC_USE_REAL_SINGLE)
41     #define MUMPS_c smumps_c
42   #else
43     #define MUMPS_c dmumps_c
44   #endif
45 #endif
46 
47 /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for
48    number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the
49    naming convention in PetscMPIInt, PetscBLASInt etc.
50 */
51 typedef MUMPS_INT PetscMUMPSInt;
52 
53 #if PETSC_PKG_MUMPS_VERSION_GE(5, 3, 0)
54   #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */
55     #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out"
56   #endif
57 #else
58   #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */
59     #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out"
60   #endif
61 #endif
62 
63 #define MPIU_MUMPSINT       MPI_INT
64 #define PETSC_MUMPS_INT_MAX 2147483647
65 #define PETSC_MUMPS_INT_MIN -2147483648
66 
67 /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/
68 static inline PetscErrorCode PetscMUMPSIntCast(PetscInt a, PetscMUMPSInt *b)
69 {
70   PetscFunctionBegin;
71 #if PetscDefined(USE_64BIT_INDICES)
72   PetscAssert(a <= PETSC_MUMPS_INT_MAX && a >= PETSC_MUMPS_INT_MIN, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt");
73 #endif
74   *b = (PetscMUMPSInt)(a);
75   PetscFunctionReturn(PETSC_SUCCESS);
76 }
77 
78 /* Put these utility routines here since they are only used in this file */
79 static inline PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems *PetscOptionsObject, const char opt[], const char text[], const char man[], PetscMUMPSInt currentvalue, PetscMUMPSInt *value, PetscBool *set, PetscMUMPSInt lb, PetscMUMPSInt ub)
80 {
81   PetscInt  myval;
82   PetscBool myset;
83   PetscFunctionBegin;
84   /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */
85   PetscCall(PetscOptionsInt_Private(PetscOptionsObject, opt, text, man, (PetscInt)currentvalue, &myval, &myset, lb, ub));
86   if (myset) PetscCall(PetscMUMPSIntCast(myval, value));
87   if (set) *set = myset;
88   PetscFunctionReturn(PETSC_SUCCESS);
89 }
90 #define PetscOptionsMUMPSInt(a, b, c, d, e, f) PetscOptionsMUMPSInt_Private(PetscOptionsObject, a, b, c, d, e, f, PETSC_MUMPS_INT_MIN, PETSC_MUMPS_INT_MAX)
91 
92 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */
93 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
94   #define PetscMUMPS_c(mumps) \
95     do { \
96       if (mumps->use_petsc_omp_support) { \
97         if (mumps->is_omp_master) { \
98           PetscCall(PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl)); \
99           PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \
100           PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \
101           PetscCall(PetscFPTrapPop()); \
102           PetscCall(PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl)); \
103         } \
104         PetscCall(PetscOmpCtrlBarrier(mumps->omp_ctrl)); \
105         /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific      \
106          to processes, so we only Bcast info[1], an error code and leave others (since they do not have   \
107          an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82.                   \
108          omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \
109       */ \
110         PetscCallMPI(MPI_Bcast(mumps->id.infog, PETSC_STATIC_ARRAY_LENGTH(mumps->id.infog), MPIU_MUMPSINT, 0, mumps->omp_comm)); \
111         PetscCallMPI(MPI_Bcast(mumps->id.rinfog, PETSC_STATIC_ARRAY_LENGTH(mumps->id.rinfog), MPIU_REAL, 0, mumps->omp_comm)); \
112         PetscCallMPI(MPI_Bcast(mumps->id.info, PETSC_STATIC_ARRAY_LENGTH(mumps->id.info), MPIU_MUMPSINT, 0, mumps->omp_comm)); \
113         PetscCallMPI(MPI_Bcast(mumps->id.rinfo, PETSC_STATIC_ARRAY_LENGTH(mumps->id.rinfo), MPIU_REAL, 0, mumps->omp_comm)); \
114       } else { \
115         PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \
116         PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \
117         PetscCall(PetscFPTrapPop()); \
118       } \
119     } while (0)
120 #else
121   #define PetscMUMPS_c(mumps) \
122     do { \
123       PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \
124       PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \
125       PetscCall(PetscFPTrapPop()); \
126     } while (0)
127 #endif
128 
129 /* declare MumpsScalar */
130 #if defined(PETSC_USE_COMPLEX)
131   #if defined(PETSC_USE_REAL_SINGLE)
132     #define MumpsScalar mumps_complex
133   #else
134     #define MumpsScalar mumps_double_complex
135   #endif
136 #else
137   #define MumpsScalar PetscScalar
138 #endif
139 
140 /* macros s.t. indices match MUMPS documentation */
141 #define ICNTL(I)  icntl[(I)-1]
142 #define CNTL(I)   cntl[(I)-1]
143 #define INFOG(I)  infog[(I)-1]
144 #define INFO(I)   info[(I)-1]
145 #define RINFOG(I) rinfog[(I)-1]
146 #define RINFO(I)  rinfo[(I)-1]
147 
148 typedef struct Mat_MUMPS Mat_MUMPS;
149 struct Mat_MUMPS {
150 #if defined(PETSC_USE_COMPLEX)
151   #if defined(PETSC_USE_REAL_SINGLE)
152   CMUMPS_STRUC_C id;
153   #else
154   ZMUMPS_STRUC_C id;
155   #endif
156 #else
157   #if defined(PETSC_USE_REAL_SINGLE)
158   SMUMPS_STRUC_C id;
159   #else
160   DMUMPS_STRUC_C id;
161   #endif
162 #endif
163 
164   MatStructure   matstruc;
165   PetscMPIInt    myid, petsc_size;
166   PetscMUMPSInt *irn, *jcn;       /* the (i,j,v) triplets passed to mumps. */
167   PetscScalar   *val, *val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */
168   PetscInt64     nnz;             /* number of nonzeros. The type is called selective 64-bit in mumps */
169   PetscMUMPSInt  sym;
170   MPI_Comm       mumps_comm;
171   PetscMUMPSInt *ICNTL_pre;
172   PetscReal     *CNTL_pre;
173   PetscMUMPSInt  ICNTL9_pre;         /* check if ICNTL(9) is changed from previous MatSolve */
174   VecScatter     scat_rhs, scat_sol; /* used by MatSolve() */
175   PetscMUMPSInt  ICNTL20;            /* use centralized (0) or distributed (10) dense RHS */
176   PetscMUMPSInt  lrhs_loc, nloc_rhs, *irhs_loc;
177 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
178   PetscInt    *rhs_nrow, max_nrhs;
179   PetscMPIInt *rhs_recvcounts, *rhs_disps;
180   PetscScalar *rhs_loc, *rhs_recvbuf;
181 #endif
182   Vec            b_seq, x_seq;
183   PetscInt       ninfo, *info; /* which INFO to display */
184   PetscInt       sizeredrhs;
185   PetscScalar   *schur_sol;
186   PetscInt       schur_sizesol;
187   PetscMUMPSInt *ia_alloc, *ja_alloc; /* work arrays used for the CSR struct for sparse rhs */
188   PetscInt64     cur_ilen, cur_jlen;  /* current len of ia_alloc[], ja_alloc[] */
189   PetscErrorCode (*ConvertToTriples)(Mat, PetscInt, MatReuse, Mat_MUMPS *);
190 
191   /* stuff used by petsc/mumps OpenMP support*/
192   PetscBool    use_petsc_omp_support;
193   PetscOmpCtrl omp_ctrl;             /* an OpenMP controller that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */
194   MPI_Comm     petsc_comm, omp_comm; /* petsc_comm is petsc matrix's comm */
195   PetscInt64  *recvcount;            /* a collection of nnz on omp_master */
196   PetscMPIInt  tag, omp_comm_size;
197   PetscBool    is_omp_master; /* is this rank the master of omp_comm */
198   MPI_Request *reqs;
199 };
200 
201 /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt.
202    Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices.
203  */
204 static PetscErrorCode PetscMUMPSIntCSRCast(Mat_MUMPS *mumps, PetscInt nrow, PetscInt *ia, PetscInt *ja, PetscMUMPSInt **ia_mumps, PetscMUMPSInt **ja_mumps, PetscMUMPSInt *nnz_mumps)
205 {
206   PetscInt nnz = ia[nrow] - 1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscInt64 since mumps only uses PetscMUMPSInt for rhs */
207 
208   PetscFunctionBegin;
209 #if defined(PETSC_USE_64BIT_INDICES)
210   {
211     PetscInt i;
212     if (nrow + 1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */
213       PetscCall(PetscFree(mumps->ia_alloc));
214       PetscCall(PetscMalloc1(nrow + 1, &mumps->ia_alloc));
215       mumps->cur_ilen = nrow + 1;
216     }
217     if (nnz > mumps->cur_jlen) {
218       PetscCall(PetscFree(mumps->ja_alloc));
219       PetscCall(PetscMalloc1(nnz, &mumps->ja_alloc));
220       mumps->cur_jlen = nnz;
221     }
222     for (i = 0; i < nrow + 1; i++) PetscCall(PetscMUMPSIntCast(ia[i], &(mumps->ia_alloc[i])));
223     for (i = 0; i < nnz; i++) PetscCall(PetscMUMPSIntCast(ja[i], &(mumps->ja_alloc[i])));
224     *ia_mumps = mumps->ia_alloc;
225     *ja_mumps = mumps->ja_alloc;
226   }
227 #else
228   *ia_mumps          = ia;
229   *ja_mumps          = ja;
230 #endif
231   PetscCall(PetscMUMPSIntCast(nnz, nnz_mumps));
232   PetscFunctionReturn(PETSC_SUCCESS);
233 }
234 
235 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS *mumps)
236 {
237   PetscFunctionBegin;
238   PetscCall(PetscFree(mumps->id.listvar_schur));
239   PetscCall(PetscFree(mumps->id.redrhs));
240   PetscCall(PetscFree(mumps->schur_sol));
241   mumps->id.size_schur = 0;
242   mumps->id.schur_lld  = 0;
243   mumps->id.ICNTL(19)  = 0;
244   PetscFunctionReturn(PETSC_SUCCESS);
245 }
246 
247 /* solve with rhs in mumps->id.redrhs and return in the same location */
248 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F)
249 {
250   Mat_MUMPS           *mumps = (Mat_MUMPS *)F->data;
251   Mat                  S, B, X;
252   MatFactorSchurStatus schurstatus;
253   PetscInt             sizesol;
254 
255   PetscFunctionBegin;
256   PetscCall(MatFactorFactorizeSchurComplement(F));
257   PetscCall(MatFactorGetSchurComplement(F, &S, &schurstatus));
258   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &B));
259   PetscCall(MatSetType(B, ((PetscObject)S)->type_name));
260 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
261   PetscCall(MatBindToCPU(B, S->boundtocpu));
262 #endif
263   switch (schurstatus) {
264   case MAT_FACTOR_SCHUR_FACTORED:
265     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &X));
266     PetscCall(MatSetType(X, ((PetscObject)S)->type_name));
267 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
268     PetscCall(MatBindToCPU(X, S->boundtocpu));
269 #endif
270     if (!mumps->id.ICNTL(9)) { /* transpose solve */
271       PetscCall(MatMatSolveTranspose(S, B, X));
272     } else {
273       PetscCall(MatMatSolve(S, B, X));
274     }
275     break;
276   case MAT_FACTOR_SCHUR_INVERTED:
277     sizesol = mumps->id.nrhs * mumps->id.size_schur;
278     if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) {
279       PetscCall(PetscFree(mumps->schur_sol));
280       PetscCall(PetscMalloc1(sizesol, &mumps->schur_sol));
281       mumps->schur_sizesol = sizesol;
282     }
283     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->schur_sol, &X));
284     PetscCall(MatSetType(X, ((PetscObject)S)->type_name));
285 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
286     PetscCall(MatBindToCPU(X, S->boundtocpu));
287 #endif
288     PetscCall(MatProductCreateWithMat(S, B, NULL, X));
289     if (!mumps->id.ICNTL(9)) { /* transpose solve */
290       PetscCall(MatProductSetType(X, MATPRODUCT_AtB));
291     } else {
292       PetscCall(MatProductSetType(X, MATPRODUCT_AB));
293     }
294     PetscCall(MatProductSetFromOptions(X));
295     PetscCall(MatProductSymbolic(X));
296     PetscCall(MatProductNumeric(X));
297 
298     PetscCall(MatCopy(X, B, SAME_NONZERO_PATTERN));
299     break;
300   default:
301     SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unhandled MatFactorSchurStatus %d", F->schur_status);
302   }
303   PetscCall(MatFactorRestoreSchurComplement(F, &S, schurstatus));
304   PetscCall(MatDestroy(&B));
305   PetscCall(MatDestroy(&X));
306   PetscFunctionReturn(PETSC_SUCCESS);
307 }
308 
309 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion)
310 {
311   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
312 
313   PetscFunctionBegin;
314   if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */
315     PetscFunctionReturn(PETSC_SUCCESS);
316   }
317   if (!expansion) { /* prepare for the condensation step */
318     PetscInt sizeredrhs = mumps->id.nrhs * mumps->id.size_schur;
319     /* allocate MUMPS internal array to store reduced right-hand sides */
320     if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) {
321       PetscCall(PetscFree(mumps->id.redrhs));
322       mumps->id.lredrhs = mumps->id.size_schur;
323       PetscCall(PetscMalloc1(mumps->id.nrhs * mumps->id.lredrhs, &mumps->id.redrhs));
324       mumps->sizeredrhs = mumps->id.nrhs * mumps->id.lredrhs;
325     }
326     mumps->id.ICNTL(26) = 1; /* condensation phase */
327   } else {                   /* prepare for the expansion step */
328     /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */
329     PetscCall(MatMumpsSolveSchur_Private(F));
330     mumps->id.ICNTL(26) = 2; /* expansion phase */
331     PetscMUMPS_c(mumps);
332     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1));
333     /* restore defaults */
334     mumps->id.ICNTL(26) = -1;
335     /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */
336     if (mumps->id.nrhs > 1) {
337       PetscCall(PetscFree(mumps->id.redrhs));
338       mumps->id.lredrhs = 0;
339       mumps->sizeredrhs = 0;
340     }
341   }
342   PetscFunctionReturn(PETSC_SUCCESS);
343 }
344 
345 /*
346   MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz]
347 
348   input:
349     A       - matrix in aij,baij or sbaij format
350     shift   - 0: C style output triple; 1: Fortran style output triple.
351     reuse   - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple
352               MAT_REUSE_MATRIX:   only the values in v array are updated
353   output:
354     nnz     - dim of r, c, and v (number of local nonzero entries of A)
355     r, c, v - row and col index, matrix values (matrix triples)
356 
357   The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is
358   freed with PetscFree(mumps->irn);  This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means
359   that the PetscMalloc() cannot easily be replaced with a PetscMalloc3().
360 
361  */
362 
363 PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
364 {
365   const PetscScalar *av;
366   const PetscInt    *ai, *aj, *ajj, M = A->rmap->n;
367   PetscInt64         nz, rnz, i, j, k;
368   PetscMUMPSInt     *row, *col;
369   Mat_SeqAIJ        *aa = (Mat_SeqAIJ *)A->data;
370 
371   PetscFunctionBegin;
372   PetscCall(MatSeqAIJGetArrayRead(A, &av));
373   mumps->val = (PetscScalar *)av;
374   if (reuse == MAT_INITIAL_MATRIX) {
375     nz = aa->nz;
376     ai = aa->i;
377     aj = aa->j;
378     PetscCall(PetscMalloc2(nz, &row, nz, &col));
379     for (i = k = 0; i < M; i++) {
380       rnz = ai[i + 1] - ai[i];
381       ajj = aj + ai[i];
382       for (j = 0; j < rnz; j++) {
383         PetscCall(PetscMUMPSIntCast(i + shift, &row[k]));
384         PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[k]));
385         k++;
386       }
387     }
388     mumps->irn = row;
389     mumps->jcn = col;
390     mumps->nnz = nz;
391   }
392   PetscCall(MatSeqAIJRestoreArrayRead(A, &av));
393   PetscFunctionReturn(PETSC_SUCCESS);
394 }
395 
396 PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
397 {
398   PetscInt64     nz, i, j, k, r;
399   Mat_SeqSELL   *a = (Mat_SeqSELL *)A->data;
400   PetscMUMPSInt *row, *col;
401 
402   PetscFunctionBegin;
403   mumps->val = a->val;
404   if (reuse == MAT_INITIAL_MATRIX) {
405     nz = a->sliidx[a->totalslices];
406     PetscCall(PetscMalloc2(nz, &row, nz, &col));
407     for (i = k = 0; i < a->totalslices; i++) {
408       for (j = a->sliidx[i], r = 0; j < a->sliidx[i + 1]; j++, r = ((r + 1) & 0x07)) PetscCall(PetscMUMPSIntCast(8 * i + r + shift, &row[k++]));
409     }
410     for (i = 0; i < nz; i++) PetscCall(PetscMUMPSIntCast(a->colidx[i] + shift, &col[i]));
411     mumps->irn = row;
412     mumps->jcn = col;
413     mumps->nnz = nz;
414   }
415   PetscFunctionReturn(PETSC_SUCCESS);
416 }
417 
418 PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
419 {
420   Mat_SeqBAIJ    *aa = (Mat_SeqBAIJ *)A->data;
421   const PetscInt *ai, *aj, *ajj, bs2 = aa->bs2;
422   PetscInt64      M, nz, idx = 0, rnz, i, j, k, m;
423   PetscInt        bs;
424   PetscMUMPSInt  *row, *col;
425 
426   PetscFunctionBegin;
427   PetscCall(MatGetBlockSize(A, &bs));
428   M          = A->rmap->N / bs;
429   mumps->val = aa->a;
430   if (reuse == MAT_INITIAL_MATRIX) {
431     ai = aa->i;
432     aj = aa->j;
433     nz = bs2 * aa->nz;
434     PetscCall(PetscMalloc2(nz, &row, nz, &col));
435     for (i = 0; i < M; i++) {
436       ajj = aj + ai[i];
437       rnz = ai[i + 1] - ai[i];
438       for (k = 0; k < rnz; k++) {
439         for (j = 0; j < bs; j++) {
440           for (m = 0; m < bs; m++) {
441             PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[idx]));
442             PetscCall(PetscMUMPSIntCast(bs * ajj[k] + j + shift, &col[idx]));
443             idx++;
444           }
445         }
446       }
447     }
448     mumps->irn = row;
449     mumps->jcn = col;
450     mumps->nnz = nz;
451   }
452   PetscFunctionReturn(PETSC_SUCCESS);
453 }
454 
455 PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
456 {
457   const PetscInt *ai, *aj, *ajj;
458   PetscInt        bs;
459   PetscInt64      nz, rnz, i, j, k, m;
460   PetscMUMPSInt  *row, *col;
461   PetscScalar    *val;
462   Mat_SeqSBAIJ   *aa  = (Mat_SeqSBAIJ *)A->data;
463   const PetscInt  bs2 = aa->bs2, mbs = aa->mbs;
464 #if defined(PETSC_USE_COMPLEX)
465   PetscBool isset, hermitian;
466 #endif
467 
468   PetscFunctionBegin;
469 #if defined(PETSC_USE_COMPLEX)
470   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
471   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
472 #endif
473   ai = aa->i;
474   aj = aa->j;
475   PetscCall(MatGetBlockSize(A, &bs));
476   if (reuse == MAT_INITIAL_MATRIX) {
477     const PetscInt64 alloc_size = aa->nz * bs2;
478 
479     PetscCall(PetscMalloc2(alloc_size, &row, alloc_size, &col));
480     if (bs > 1) {
481       PetscCall(PetscMalloc1(alloc_size, &mumps->val_alloc));
482       mumps->val = mumps->val_alloc;
483     } else {
484       mumps->val = aa->a;
485     }
486     mumps->irn = row;
487     mumps->jcn = col;
488   } else {
489     if (bs == 1) mumps->val = aa->a;
490     row = mumps->irn;
491     col = mumps->jcn;
492   }
493   val = mumps->val;
494 
495   nz = 0;
496   if (bs > 1) {
497     for (i = 0; i < mbs; i++) {
498       rnz = ai[i + 1] - ai[i];
499       ajj = aj + ai[i];
500       for (j = 0; j < rnz; j++) {
501         for (k = 0; k < bs; k++) {
502           for (m = 0; m < bs; m++) {
503             if (ajj[j] > i || k >= m) {
504               if (reuse == MAT_INITIAL_MATRIX) {
505                 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[nz]));
506                 PetscCall(PetscMUMPSIntCast(ajj[j] * bs + k + shift, &col[nz]));
507               }
508               val[nz++] = aa->a[(ai[i] + j) * bs2 + m + k * bs];
509             }
510           }
511         }
512       }
513     }
514   } else if (reuse == MAT_INITIAL_MATRIX) {
515     for (i = 0; i < mbs; i++) {
516       rnz = ai[i + 1] - ai[i];
517       ajj = aj + ai[i];
518       for (j = 0; j < rnz; j++) {
519         PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
520         PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz]));
521         nz++;
522       }
523     }
524     PetscCheck(nz == aa->nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different numbers of nonzeros %" PetscInt64_FMT " != %" PetscInt_FMT, nz, aa->nz);
525   }
526   if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz;
527   PetscFunctionReturn(PETSC_SUCCESS);
528 }
529 
530 PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
531 {
532   const PetscInt    *ai, *aj, *ajj, *adiag, M = A->rmap->n;
533   PetscInt64         nz, rnz, i, j;
534   const PetscScalar *av, *v1;
535   PetscScalar       *val;
536   PetscMUMPSInt     *row, *col;
537   Mat_SeqAIJ        *aa = (Mat_SeqAIJ *)A->data;
538   PetscBool          missing;
539 #if defined(PETSC_USE_COMPLEX)
540   PetscBool hermitian, isset;
541 #endif
542 
543   PetscFunctionBegin;
544 #if defined(PETSC_USE_COMPLEX)
545   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
546   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
547 #endif
548   PetscCall(MatSeqAIJGetArrayRead(A, &av));
549   ai    = aa->i;
550   aj    = aa->j;
551   adiag = aa->diag;
552   PetscCall(MatMissingDiagonal_SeqAIJ(A, &missing, NULL));
553   if (reuse == MAT_INITIAL_MATRIX) {
554     /* count nz in the upper triangular part of A */
555     nz = 0;
556     if (missing) {
557       for (i = 0; i < M; i++) {
558         if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
559           for (j = ai[i]; j < ai[i + 1]; j++) {
560             if (aj[j] < i) continue;
561             nz++;
562           }
563         } else {
564           nz += ai[i + 1] - adiag[i];
565         }
566       }
567     } else {
568       for (i = 0; i < M; i++) nz += ai[i + 1] - adiag[i];
569     }
570     PetscCall(PetscMalloc2(nz, &row, nz, &col));
571     PetscCall(PetscMalloc1(nz, &val));
572     mumps->nnz = nz;
573     mumps->irn = row;
574     mumps->jcn = col;
575     mumps->val = mumps->val_alloc = val;
576 
577     nz = 0;
578     if (missing) {
579       for (i = 0; i < M; i++) {
580         if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
581           for (j = ai[i]; j < ai[i + 1]; j++) {
582             if (aj[j] < i) continue;
583             PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
584             PetscCall(PetscMUMPSIntCast(aj[j] + shift, &col[nz]));
585             val[nz] = av[j];
586             nz++;
587           }
588         } else {
589           rnz = ai[i + 1] - adiag[i];
590           ajj = aj + adiag[i];
591           v1  = av + adiag[i];
592           for (j = 0; j < rnz; j++) {
593             PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
594             PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz]));
595             val[nz++] = v1[j];
596           }
597         }
598       }
599     } else {
600       for (i = 0; i < M; i++) {
601         rnz = ai[i + 1] - adiag[i];
602         ajj = aj + adiag[i];
603         v1  = av + adiag[i];
604         for (j = 0; j < rnz; j++) {
605           PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
606           PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz]));
607           val[nz++] = v1[j];
608         }
609       }
610     }
611   } else {
612     nz  = 0;
613     val = mumps->val;
614     if (missing) {
615       for (i = 0; i < M; i++) {
616         if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
617           for (j = ai[i]; j < ai[i + 1]; j++) {
618             if (aj[j] < i) continue;
619             val[nz++] = av[j];
620           }
621         } else {
622           rnz = ai[i + 1] - adiag[i];
623           v1  = av + adiag[i];
624           for (j = 0; j < rnz; j++) val[nz++] = v1[j];
625         }
626       }
627     } else {
628       for (i = 0; i < M; i++) {
629         rnz = ai[i + 1] - adiag[i];
630         v1  = av + adiag[i];
631         for (j = 0; j < rnz; j++) val[nz++] = v1[j];
632       }
633     }
634   }
635   PetscCall(MatSeqAIJRestoreArrayRead(A, &av));
636   PetscFunctionReturn(PETSC_SUCCESS);
637 }
638 
639 PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
640 {
641   const PetscInt    *ai, *aj, *bi, *bj, *garray, *ajj, *bjj;
642   PetscInt           bs;
643   PetscInt64         rstart, nz, i, j, k, m, jj, irow, countA, countB;
644   PetscMUMPSInt     *row, *col;
645   const PetscScalar *av, *bv, *v1, *v2;
646   PetscScalar       *val;
647   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ *)A->data;
648   Mat_SeqSBAIJ      *aa  = (Mat_SeqSBAIJ *)(mat->A)->data;
649   Mat_SeqBAIJ       *bb  = (Mat_SeqBAIJ *)(mat->B)->data;
650   const PetscInt     bs2 = aa->bs2, mbs = aa->mbs;
651 #if defined(PETSC_USE_COMPLEX)
652   PetscBool hermitian, isset;
653 #endif
654 
655   PetscFunctionBegin;
656 #if defined(PETSC_USE_COMPLEX)
657   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
658   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
659 #endif
660   PetscCall(MatGetBlockSize(A, &bs));
661   rstart = A->rmap->rstart;
662   ai     = aa->i;
663   aj     = aa->j;
664   bi     = bb->i;
665   bj     = bb->j;
666   av     = aa->a;
667   bv     = bb->a;
668 
669   garray = mat->garray;
670 
671   if (reuse == MAT_INITIAL_MATRIX) {
672     nz = (aa->nz + bb->nz) * bs2; /* just a conservative estimate */
673     PetscCall(PetscMalloc2(nz, &row, nz, &col));
674     PetscCall(PetscMalloc1(nz, &val));
675     /* can not decide the exact mumps->nnz now because of the SBAIJ */
676     mumps->irn = row;
677     mumps->jcn = col;
678     mumps->val = mumps->val_alloc = val;
679   } else {
680     val = mumps->val;
681   }
682 
683   jj   = 0;
684   irow = rstart;
685   for (i = 0; i < mbs; i++) {
686     ajj    = aj + ai[i]; /* ptr to the beginning of this row */
687     countA = ai[i + 1] - ai[i];
688     countB = bi[i + 1] - bi[i];
689     bjj    = bj + bi[i];
690     v1     = av + ai[i] * bs2;
691     v2     = bv + bi[i] * bs2;
692 
693     if (bs > 1) {
694       /* A-part */
695       for (j = 0; j < countA; j++) {
696         for (k = 0; k < bs; k++) {
697           for (m = 0; m < bs; m++) {
698             if (rstart + ajj[j] * bs > irow || k >= m) {
699               if (reuse == MAT_INITIAL_MATRIX) {
700                 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj]));
701                 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] * bs + k + shift, &col[jj]));
702               }
703               val[jj++] = v1[j * bs2 + m + k * bs];
704             }
705           }
706         }
707       }
708 
709       /* B-part */
710       for (j = 0; j < countB; j++) {
711         for (k = 0; k < bs; k++) {
712           for (m = 0; m < bs; m++) {
713             if (reuse == MAT_INITIAL_MATRIX) {
714               PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj]));
715               PetscCall(PetscMUMPSIntCast(garray[bjj[j]] * bs + k + shift, &col[jj]));
716             }
717             val[jj++] = v2[j * bs2 + m + k * bs];
718           }
719         }
720       }
721     } else {
722       /* A-part */
723       for (j = 0; j < countA; j++) {
724         if (reuse == MAT_INITIAL_MATRIX) {
725           PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
726           PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj]));
727         }
728         val[jj++] = v1[j];
729       }
730 
731       /* B-part */
732       for (j = 0; j < countB; j++) {
733         if (reuse == MAT_INITIAL_MATRIX) {
734           PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
735           PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]));
736         }
737         val[jj++] = v2[j];
738       }
739     }
740     irow += bs;
741   }
742   mumps->nnz = jj;
743   PetscFunctionReturn(PETSC_SUCCESS);
744 }
745 
746 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
747 {
748   const PetscInt    *ai, *aj, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj;
749   PetscInt64         rstart, nz, i, j, jj, irow, countA, countB;
750   PetscMUMPSInt     *row, *col;
751   const PetscScalar *av, *bv, *v1, *v2;
752   PetscScalar       *val;
753   Mat                Ad, Ao;
754   Mat_SeqAIJ        *aa;
755   Mat_SeqAIJ        *bb;
756 
757   PetscFunctionBegin;
758   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray));
759   PetscCall(MatSeqAIJGetArrayRead(Ad, &av));
760   PetscCall(MatSeqAIJGetArrayRead(Ao, &bv));
761 
762   aa = (Mat_SeqAIJ *)(Ad)->data;
763   bb = (Mat_SeqAIJ *)(Ao)->data;
764   ai = aa->i;
765   aj = aa->j;
766   bi = bb->i;
767   bj = bb->j;
768 
769   rstart = A->rmap->rstart;
770 
771   if (reuse == MAT_INITIAL_MATRIX) {
772     nz = (PetscInt64)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */
773     PetscCall(PetscMalloc2(nz, &row, nz, &col));
774     PetscCall(PetscMalloc1(nz, &val));
775     mumps->nnz = nz;
776     mumps->irn = row;
777     mumps->jcn = col;
778     mumps->val = mumps->val_alloc = val;
779   } else {
780     val = mumps->val;
781   }
782 
783   jj   = 0;
784   irow = rstart;
785   for (i = 0; i < m; i++) {
786     ajj    = aj + ai[i]; /* ptr to the beginning of this row */
787     countA = ai[i + 1] - ai[i];
788     countB = bi[i + 1] - bi[i];
789     bjj    = bj + bi[i];
790     v1     = av + ai[i];
791     v2     = bv + bi[i];
792 
793     /* A-part */
794     for (j = 0; j < countA; j++) {
795       if (reuse == MAT_INITIAL_MATRIX) {
796         PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
797         PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj]));
798       }
799       val[jj++] = v1[j];
800     }
801 
802     /* B-part */
803     for (j = 0; j < countB; j++) {
804       if (reuse == MAT_INITIAL_MATRIX) {
805         PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
806         PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]));
807       }
808       val[jj++] = v2[j];
809     }
810     irow++;
811   }
812   PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av));
813   PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv));
814   PetscFunctionReturn(PETSC_SUCCESS);
815 }
816 
817 PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
818 {
819   Mat_MPIBAIJ       *mat = (Mat_MPIBAIJ *)A->data;
820   Mat_SeqBAIJ       *aa  = (Mat_SeqBAIJ *)(mat->A)->data;
821   Mat_SeqBAIJ       *bb  = (Mat_SeqBAIJ *)(mat->B)->data;
822   const PetscInt    *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j, *ajj, *bjj;
823   const PetscInt    *garray = mat->garray, mbs = mat->mbs, rstart = A->rmap->rstart;
824   const PetscInt     bs2 = mat->bs2;
825   PetscInt           bs;
826   PetscInt64         nz, i, j, k, n, jj, irow, countA, countB, idx;
827   PetscMUMPSInt     *row, *col;
828   const PetscScalar *av = aa->a, *bv = bb->a, *v1, *v2;
829   PetscScalar       *val;
830 
831   PetscFunctionBegin;
832   PetscCall(MatGetBlockSize(A, &bs));
833   if (reuse == MAT_INITIAL_MATRIX) {
834     nz = bs2 * (aa->nz + bb->nz);
835     PetscCall(PetscMalloc2(nz, &row, nz, &col));
836     PetscCall(PetscMalloc1(nz, &val));
837     mumps->nnz = nz;
838     mumps->irn = row;
839     mumps->jcn = col;
840     mumps->val = mumps->val_alloc = val;
841   } else {
842     val = mumps->val;
843   }
844 
845   jj   = 0;
846   irow = rstart;
847   for (i = 0; i < mbs; i++) {
848     countA = ai[i + 1] - ai[i];
849     countB = bi[i + 1] - bi[i];
850     ajj    = aj + ai[i];
851     bjj    = bj + bi[i];
852     v1     = av + bs2 * ai[i];
853     v2     = bv + bs2 * bi[i];
854 
855     idx = 0;
856     /* A-part */
857     for (k = 0; k < countA; k++) {
858       for (j = 0; j < bs; j++) {
859         for (n = 0; n < bs; n++) {
860           if (reuse == MAT_INITIAL_MATRIX) {
861             PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj]));
862             PetscCall(PetscMUMPSIntCast(rstart + bs * ajj[k] + j + shift, &col[jj]));
863           }
864           val[jj++] = v1[idx++];
865         }
866       }
867     }
868 
869     idx = 0;
870     /* B-part */
871     for (k = 0; k < countB; k++) {
872       for (j = 0; j < bs; j++) {
873         for (n = 0; n < bs; n++) {
874           if (reuse == MAT_INITIAL_MATRIX) {
875             PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj]));
876             PetscCall(PetscMUMPSIntCast(bs * garray[bjj[k]] + j + shift, &col[jj]));
877           }
878           val[jj++] = v2[idx++];
879         }
880       }
881     }
882     irow += bs;
883   }
884   PetscFunctionReturn(PETSC_SUCCESS);
885 }
886 
887 PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
888 {
889   const PetscInt    *ai, *aj, *adiag, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj;
890   PetscInt64         rstart, nz, nza, nzb, i, j, jj, irow, countA, countB;
891   PetscMUMPSInt     *row, *col;
892   const PetscScalar *av, *bv, *v1, *v2;
893   PetscScalar       *val;
894   Mat                Ad, Ao;
895   Mat_SeqAIJ        *aa;
896   Mat_SeqAIJ        *bb;
897 #if defined(PETSC_USE_COMPLEX)
898   PetscBool hermitian, isset;
899 #endif
900 
901   PetscFunctionBegin;
902 #if defined(PETSC_USE_COMPLEX)
903   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
904   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
905 #endif
906   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray));
907   PetscCall(MatSeqAIJGetArrayRead(Ad, &av));
908   PetscCall(MatSeqAIJGetArrayRead(Ao, &bv));
909 
910   aa    = (Mat_SeqAIJ *)(Ad)->data;
911   bb    = (Mat_SeqAIJ *)(Ao)->data;
912   ai    = aa->i;
913   aj    = aa->j;
914   adiag = aa->diag;
915   bi    = bb->i;
916   bj    = bb->j;
917 
918   rstart = A->rmap->rstart;
919 
920   if (reuse == MAT_INITIAL_MATRIX) {
921     nza = 0; /* num of upper triangular entries in mat->A, including diagonals */
922     nzb = 0; /* num of upper triangular entries in mat->B */
923     for (i = 0; i < m; i++) {
924       nza += (ai[i + 1] - adiag[i]);
925       countB = bi[i + 1] - bi[i];
926       bjj    = bj + bi[i];
927       for (j = 0; j < countB; j++) {
928         if (garray[bjj[j]] > rstart) nzb++;
929       }
930     }
931 
932     nz = nza + nzb; /* total nz of upper triangular part of mat */
933     PetscCall(PetscMalloc2(nz, &row, nz, &col));
934     PetscCall(PetscMalloc1(nz, &val));
935     mumps->nnz = nz;
936     mumps->irn = row;
937     mumps->jcn = col;
938     mumps->val = mumps->val_alloc = val;
939   } else {
940     val = mumps->val;
941   }
942 
943   jj   = 0;
944   irow = rstart;
945   for (i = 0; i < m; i++) {
946     ajj    = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */
947     v1     = av + adiag[i];
948     countA = ai[i + 1] - adiag[i];
949     countB = bi[i + 1] - bi[i];
950     bjj    = bj + bi[i];
951     v2     = bv + bi[i];
952 
953     /* A-part */
954     for (j = 0; j < countA; j++) {
955       if (reuse == MAT_INITIAL_MATRIX) {
956         PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
957         PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj]));
958       }
959       val[jj++] = v1[j];
960     }
961 
962     /* B-part */
963     for (j = 0; j < countB; j++) {
964       if (garray[bjj[j]] > rstart) {
965         if (reuse == MAT_INITIAL_MATRIX) {
966           PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
967           PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]));
968         }
969         val[jj++] = v2[j];
970       }
971     }
972     irow++;
973   }
974   PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av));
975   PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv));
976   PetscFunctionReturn(PETSC_SUCCESS);
977 }
978 
979 PetscErrorCode MatDestroy_MUMPS(Mat A)
980 {
981   Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;
982 
983   PetscFunctionBegin;
984   PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc));
985   PetscCall(VecScatterDestroy(&mumps->scat_rhs));
986   PetscCall(VecScatterDestroy(&mumps->scat_sol));
987   PetscCall(VecDestroy(&mumps->b_seq));
988   PetscCall(VecDestroy(&mumps->x_seq));
989   PetscCall(PetscFree(mumps->id.perm_in));
990   PetscCall(PetscFree2(mumps->irn, mumps->jcn));
991   PetscCall(PetscFree(mumps->val_alloc));
992   PetscCall(PetscFree(mumps->info));
993   PetscCall(PetscFree(mumps->ICNTL_pre));
994   PetscCall(PetscFree(mumps->CNTL_pre));
995   PetscCall(MatMumpsResetSchur_Private(mumps));
996   if (mumps->id.job != JOB_NULL) { /* cannot call PetscMUMPS_c() if JOB_INIT has never been called for this instance */
997     mumps->id.job = JOB_END;
998     PetscMUMPS_c(mumps);
999     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in MatDestroy_MUMPS: INFOG(1)=%d", mumps->id.INFOG(1));
1000     if (mumps->mumps_comm != MPI_COMM_NULL) {
1001       if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) PetscCallMPI(MPI_Comm_free(&mumps->mumps_comm));
1002       else PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A), &mumps->mumps_comm));
1003     }
1004   }
1005 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1006   if (mumps->use_petsc_omp_support) {
1007     PetscCall(PetscOmpCtrlDestroy(&mumps->omp_ctrl));
1008     PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf));
1009     PetscCall(PetscFree3(mumps->rhs_nrow, mumps->rhs_recvcounts, mumps->rhs_disps));
1010   }
1011 #endif
1012   PetscCall(PetscFree(mumps->ia_alloc));
1013   PetscCall(PetscFree(mumps->ja_alloc));
1014   PetscCall(PetscFree(mumps->recvcount));
1015   PetscCall(PetscFree(mumps->reqs));
1016   PetscCall(PetscFree(mumps->irhs_loc));
1017   PetscCall(PetscFree(A->data));
1018 
1019   /* clear composed functions */
1020   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
1021   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorSetSchurIS_C", NULL));
1022   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorCreateSchurComplement_C", NULL));
1023   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetIcntl_C", NULL));
1024   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetIcntl_C", NULL));
1025   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetCntl_C", NULL));
1026   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetCntl_C", NULL));
1027   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfo_C", NULL));
1028   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfog_C", NULL));
1029   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfo_C", NULL));
1030   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfog_C", NULL));
1031   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetNullPivots_C", NULL));
1032   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverse_C", NULL));
1033   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverseTranspose_C", NULL));
1034   PetscFunctionReturn(PETSC_SUCCESS);
1035 }
1036 
1037 /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */
1038 static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A, PetscInt nrhs, const PetscScalar *array)
1039 {
1040   Mat_MUMPS        *mumps   = (Mat_MUMPS *)A->data;
1041   const PetscMPIInt ompsize = mumps->omp_comm_size;
1042   PetscInt          i, m, M, rstart;
1043 
1044   PetscFunctionBegin;
1045   PetscCall(MatGetSize(A, &M, NULL));
1046   PetscCall(MatGetLocalSize(A, &m, NULL));
1047   PetscCheck(M <= PETSC_MUMPS_INT_MAX, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt");
1048   if (ompsize == 1) {
1049     if (!mumps->irhs_loc) {
1050       mumps->nloc_rhs = m;
1051       PetscCall(PetscMalloc1(m, &mumps->irhs_loc));
1052       PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
1053       for (i = 0; i < m; i++) mumps->irhs_loc[i] = rstart + i + 1; /* use 1-based indices */
1054     }
1055     mumps->id.rhs_loc = (MumpsScalar *)array;
1056   } else {
1057 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1058     const PetscInt *ranges;
1059     PetscMPIInt     j, k, sendcount, *petsc_ranks, *omp_ranks;
1060     MPI_Group       petsc_group, omp_group;
1061     PetscScalar    *recvbuf = NULL;
1062 
1063     if (mumps->is_omp_master) {
1064       /* Lazily initialize the omp stuff for distributed rhs */
1065       if (!mumps->irhs_loc) {
1066         PetscCall(PetscMalloc2(ompsize, &omp_ranks, ompsize, &petsc_ranks));
1067         PetscCall(PetscMalloc3(ompsize, &mumps->rhs_nrow, ompsize, &mumps->rhs_recvcounts, ompsize, &mumps->rhs_disps));
1068         PetscCallMPI(MPI_Comm_group(mumps->petsc_comm, &petsc_group));
1069         PetscCallMPI(MPI_Comm_group(mumps->omp_comm, &omp_group));
1070         for (j = 0; j < ompsize; j++) omp_ranks[j] = j;
1071         PetscCallMPI(MPI_Group_translate_ranks(omp_group, ompsize, omp_ranks, petsc_group, petsc_ranks));
1072 
1073         /* Populate mumps->irhs_loc[], rhs_nrow[] */
1074         mumps->nloc_rhs = 0;
1075         PetscCall(MatGetOwnershipRanges(A, &ranges));
1076         for (j = 0; j < ompsize; j++) {
1077           mumps->rhs_nrow[j] = ranges[petsc_ranks[j] + 1] - ranges[petsc_ranks[j]];
1078           mumps->nloc_rhs += mumps->rhs_nrow[j];
1079         }
1080         PetscCall(PetscMalloc1(mumps->nloc_rhs, &mumps->irhs_loc));
1081         for (j = k = 0; j < ompsize; j++) {
1082           for (i = ranges[petsc_ranks[j]]; i < ranges[petsc_ranks[j] + 1]; i++, k++) mumps->irhs_loc[k] = i + 1; /* uses 1-based indices */
1083         }
1084 
1085         PetscCall(PetscFree2(omp_ranks, petsc_ranks));
1086         PetscCallMPI(MPI_Group_free(&petsc_group));
1087         PetscCallMPI(MPI_Group_free(&omp_group));
1088       }
1089 
1090       /* Realloc buffers when current nrhs is bigger than what we have met */
1091       if (nrhs > mumps->max_nrhs) {
1092         PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf));
1093         PetscCall(PetscMalloc2(mumps->nloc_rhs * nrhs, &mumps->rhs_loc, mumps->nloc_rhs * nrhs, &mumps->rhs_recvbuf));
1094         mumps->max_nrhs = nrhs;
1095       }
1096 
1097       /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */
1098       for (j = 0; j < ompsize; j++) PetscCall(PetscMPIIntCast(mumps->rhs_nrow[j] * nrhs, &mumps->rhs_recvcounts[j]));
1099       mumps->rhs_disps[0] = 0;
1100       for (j = 1; j < ompsize; j++) {
1101         mumps->rhs_disps[j] = mumps->rhs_disps[j - 1] + mumps->rhs_recvcounts[j - 1];
1102         PetscCheck(mumps->rhs_disps[j] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscMPIInt overflow!");
1103       }
1104       recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */
1105     }
1106 
1107     PetscCall(PetscMPIIntCast(m * nrhs, &sendcount));
1108     PetscCallMPI(MPI_Gatherv(array, sendcount, MPIU_SCALAR, recvbuf, mumps->rhs_recvcounts, mumps->rhs_disps, MPIU_SCALAR, 0, mumps->omp_comm));
1109 
1110     if (mumps->is_omp_master) {
1111       if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */
1112         PetscScalar *dst, *dstbase = mumps->rhs_loc;
1113         for (j = 0; j < ompsize; j++) {
1114           const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j];
1115           dst                    = dstbase;
1116           for (i = 0; i < nrhs; i++) {
1117             PetscCall(PetscArraycpy(dst, src, mumps->rhs_nrow[j]));
1118             src += mumps->rhs_nrow[j];
1119             dst += mumps->nloc_rhs;
1120           }
1121           dstbase += mumps->rhs_nrow[j];
1122         }
1123       }
1124       mumps->id.rhs_loc = (MumpsScalar *)mumps->rhs_loc;
1125     }
1126 #endif /* PETSC_HAVE_OPENMP_SUPPORT */
1127   }
1128   mumps->id.nrhs     = nrhs;
1129   mumps->id.nloc_rhs = mumps->nloc_rhs;
1130   mumps->id.lrhs_loc = mumps->nloc_rhs;
1131   mumps->id.irhs_loc = mumps->irhs_loc;
1132   PetscFunctionReturn(PETSC_SUCCESS);
1133 }
1134 
1135 PetscErrorCode MatSolve_MUMPS(Mat A, Vec b, Vec x)
1136 {
1137   Mat_MUMPS         *mumps  = (Mat_MUMPS *)A->data;
1138   const PetscScalar *rarray = NULL;
1139   PetscScalar       *array;
1140   IS                 is_iden, is_petsc;
1141   PetscInt           i;
1142   PetscBool          second_solve = PETSC_FALSE;
1143   static PetscBool   cite1 = PETSC_FALSE, cite2 = PETSC_FALSE;
1144 
1145   PetscFunctionBegin;
1146   PetscCall(PetscCitationsRegister("@article{MUMPS01,\n  author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n  title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n  journal = {SIAM "
1147                                    "Journal on Matrix Analysis and Applications},\n  volume = {23},\n  number = {1},\n  pages = {15--41},\n  year = {2001}\n}\n",
1148                                    &cite1));
1149   PetscCall(PetscCitationsRegister("@article{MUMPS02,\n  author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n  title = {Hybrid scheduling for the parallel solution of linear systems},\n  journal = {Parallel "
1150                                    "Computing},\n  volume = {32},\n  number = {2},\n  pages = {136--156},\n  year = {2006}\n}\n",
1151                                    &cite2));
1152 
1153   if (A->factorerrortype) {
1154     PetscCall(PetscInfo(A, "MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1155     PetscCall(VecSetInf(x));
1156     PetscFunctionReturn(PETSC_SUCCESS);
1157   }
1158 
1159   mumps->id.nrhs = 1;
1160   if (mumps->petsc_size > 1) {
1161     if (mumps->ICNTL20 == 10) {
1162       mumps->id.ICNTL(20) = 10; /* dense distributed RHS */
1163       PetscCall(VecGetArrayRead(b, &rarray));
1164       PetscCall(MatMumpsSetUpDistRHSInfo(A, 1, rarray));
1165     } else {
1166       mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential rhs vector*/
1167       PetscCall(VecScatterBegin(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD));
1168       PetscCall(VecScatterEnd(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD));
1169       if (!mumps->myid) {
1170         PetscCall(VecGetArray(mumps->b_seq, &array));
1171         mumps->id.rhs = (MumpsScalar *)array;
1172       }
1173     }
1174   } else {                   /* petsc_size == 1 */
1175     mumps->id.ICNTL(20) = 0; /* dense centralized RHS */
1176     PetscCall(VecCopy(b, x));
1177     PetscCall(VecGetArray(x, &array));
1178     mumps->id.rhs = (MumpsScalar *)array;
1179   }
1180 
1181   /*
1182      handle condensation step of Schur complement (if any)
1183      We set by default ICNTL(26) == -1 when Schur indices have been provided by the user.
1184      According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase
1185      Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system.
1186      This requires an extra call to PetscMUMPS_c and the computation of the factors for S
1187   */
1188   if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) {
1189     PetscCheck(mumps->petsc_size <= 1, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc");
1190     second_solve = PETSC_TRUE;
1191     PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE));
1192   }
1193   /* solve phase */
1194   mumps->id.job = JOB_SOLVE;
1195   PetscMUMPS_c(mumps);
1196   PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1));
1197 
1198   /* handle expansion step of Schur complement (if any) */
1199   if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE));
1200 
1201   if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */
1202     if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) {
1203       /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */
1204       PetscCall(VecScatterDestroy(&mumps->scat_sol));
1205     }
1206     if (!mumps->scat_sol) { /* create scatter scat_sol */
1207       PetscInt *isol2_loc = NULL;
1208       PetscCall(ISCreateStride(PETSC_COMM_SELF, mumps->id.lsol_loc, 0, 1, &is_iden)); /* from */
1209       PetscCall(PetscMalloc1(mumps->id.lsol_loc, &isol2_loc));
1210       for (i = 0; i < mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i] - 1;                        /* change Fortran style to C style */
1211       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, mumps->id.lsol_loc, isol2_loc, PETSC_OWN_POINTER, &is_petsc)); /* to */
1212       PetscCall(VecScatterCreate(mumps->x_seq, is_iden, x, is_petsc, &mumps->scat_sol));
1213       PetscCall(ISDestroy(&is_iden));
1214       PetscCall(ISDestroy(&is_petsc));
1215       mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */
1216     }
1217 
1218     PetscCall(VecScatterBegin(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD));
1219     PetscCall(VecScatterEnd(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD));
1220   }
1221 
1222   if (mumps->petsc_size > 1) {
1223     if (mumps->ICNTL20 == 10) {
1224       PetscCall(VecRestoreArrayRead(b, &rarray));
1225     } else if (!mumps->myid) {
1226       PetscCall(VecRestoreArray(mumps->b_seq, &array));
1227     }
1228   } else PetscCall(VecRestoreArray(x, &array));
1229 
1230   PetscCall(PetscLogFlops(2.0 * mumps->id.RINFO(3)));
1231   PetscFunctionReturn(PETSC_SUCCESS);
1232 }
1233 
1234 PetscErrorCode MatSolveTranspose_MUMPS(Mat A, Vec b, Vec x)
1235 {
1236   Mat_MUMPS          *mumps = (Mat_MUMPS *)A->data;
1237   const PetscMUMPSInt value = mumps->id.ICNTL(9);
1238 
1239   PetscFunctionBegin;
1240   mumps->id.ICNTL(9) = 0;
1241   PetscCall(MatSolve_MUMPS(A, b, x));
1242   mumps->id.ICNTL(9) = value;
1243   PetscFunctionReturn(PETSC_SUCCESS);
1244 }
1245 
1246 PetscErrorCode MatMatSolve_MUMPS(Mat A, Mat B, Mat X)
1247 {
1248   Mat                Bt = NULL;
1249   PetscBool          denseX, denseB, flg, flgT;
1250   Mat_MUMPS         *mumps = (Mat_MUMPS *)A->data;
1251   PetscInt           i, nrhs, M;
1252   PetscScalar       *array;
1253   const PetscScalar *rbray;
1254   PetscInt           lsol_loc, nlsol_loc, *idxx, iidx = 0;
1255   PetscMUMPSInt     *isol_loc, *isol_loc_save;
1256   PetscScalar       *bray, *sol_loc, *sol_loc_save;
1257   IS                 is_to, is_from;
1258   PetscInt           k, proc, j, m, myrstart;
1259   const PetscInt    *rstart;
1260   Vec                v_mpi, msol_loc;
1261   VecScatter         scat_sol;
1262   Vec                b_seq;
1263   VecScatter         scat_rhs;
1264   PetscScalar       *aa;
1265   PetscInt           spnr, *ia, *ja;
1266   Mat_MPIAIJ        *b = NULL;
1267 
1268   PetscFunctionBegin;
1269   PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &denseX, MATSEQDENSE, MATMPIDENSE, NULL));
1270   PetscCheck(denseX, PetscObjectComm((PetscObject)X), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix");
1271 
1272   PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &denseB, MATSEQDENSE, MATMPIDENSE, NULL));
1273   if (denseB) {
1274     PetscCheck(B->rmap->n == X->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Matrix B and X must have same row distribution");
1275     mumps->id.ICNTL(20) = 0; /* dense RHS */
1276   } else {                   /* sparse B */
1277     PetscCheck(X != B, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_IDN, "X and B must be different matrices");
1278     PetscCall(PetscObjectTypeCompare((PetscObject)B, MATTRANSPOSEVIRTUAL, &flgT));
1279     if (flgT) { /* input B is transpose of actual RHS matrix,
1280                  because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */
1281       PetscCall(MatTransposeGetMat(B, &Bt));
1282     } else SETERRQ(PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_WRONG, "Matrix B must be MATTRANSPOSEVIRTUAL matrix");
1283     mumps->id.ICNTL(20) = 1; /* sparse RHS */
1284   }
1285 
1286   PetscCall(MatGetSize(B, &M, &nrhs));
1287   mumps->id.nrhs = nrhs;
1288   mumps->id.lrhs = M;
1289   mumps->id.rhs  = NULL;
1290 
1291   if (mumps->petsc_size == 1) {
1292     PetscScalar *aa;
1293     PetscInt     spnr, *ia, *ja;
1294     PetscBool    second_solve = PETSC_FALSE;
1295 
1296     PetscCall(MatDenseGetArray(X, &array));
1297     mumps->id.rhs = (MumpsScalar *)array;
1298 
1299     if (denseB) {
1300       /* copy B to X */
1301       PetscCall(MatDenseGetArrayRead(B, &rbray));
1302       PetscCall(PetscArraycpy(array, rbray, M * nrhs));
1303       PetscCall(MatDenseRestoreArrayRead(B, &rbray));
1304     } else { /* sparse B */
1305       PetscCall(MatSeqAIJGetArray(Bt, &aa));
1306       PetscCall(MatGetRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
1307       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
1308       PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs));
1309       mumps->id.rhs_sparse = (MumpsScalar *)aa;
1310     }
1311     /* handle condensation step of Schur complement (if any) */
1312     if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) {
1313       second_solve = PETSC_TRUE;
1314       PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE));
1315     }
1316     /* solve phase */
1317     mumps->id.job = JOB_SOLVE;
1318     PetscMUMPS_c(mumps);
1319     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1));
1320 
1321     /* handle expansion step of Schur complement (if any) */
1322     if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE));
1323     if (!denseB) { /* sparse B */
1324       PetscCall(MatSeqAIJRestoreArray(Bt, &aa));
1325       PetscCall(MatRestoreRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
1326       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure");
1327     }
1328     PetscCall(MatDenseRestoreArray(X, &array));
1329     PetscFunctionReturn(PETSC_SUCCESS);
1330   }
1331 
1332   /* parallel case: MUMPS requires rhs B to be centralized on the host! */
1333   PetscCheck(mumps->petsc_size <= 1 || !mumps->id.ICNTL(19), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc");
1334 
1335   /* create msol_loc to hold mumps local solution */
1336   isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */
1337   sol_loc_save  = (PetscScalar *)mumps->id.sol_loc;
1338 
1339   lsol_loc  = mumps->id.lsol_loc;
1340   nlsol_loc = nrhs * lsol_loc; /* length of sol_loc */
1341   PetscCall(PetscMalloc2(nlsol_loc, &sol_loc, lsol_loc, &isol_loc));
1342   mumps->id.sol_loc  = (MumpsScalar *)sol_loc;
1343   mumps->id.isol_loc = isol_loc;
1344 
1345   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nlsol_loc, (PetscScalar *)sol_loc, &msol_loc));
1346 
1347   if (denseB) {
1348     if (mumps->ICNTL20 == 10) {
1349       mumps->id.ICNTL(20) = 10; /* dense distributed RHS */
1350       PetscCall(MatDenseGetArrayRead(B, &rbray));
1351       PetscCall(MatMumpsSetUpDistRHSInfo(A, nrhs, rbray));
1352       PetscCall(MatDenseRestoreArrayRead(B, &rbray));
1353       PetscCall(MatGetLocalSize(B, &m, NULL));
1354       PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhs * M, NULL, &v_mpi));
1355     } else {
1356       mumps->id.ICNTL(20) = 0; /* dense centralized RHS */
1357       /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in
1358         very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank
1359         0, re-arrange B into desired order, which is a local operation.
1360       */
1361 
1362       /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */
1363       /* wrap dense rhs matrix B into a vector v_mpi */
1364       PetscCall(MatGetLocalSize(B, &m, NULL));
1365       PetscCall(MatDenseGetArray(B, &bray));
1366       PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhs * M, (const PetscScalar *)bray, &v_mpi));
1367       PetscCall(MatDenseRestoreArray(B, &bray));
1368 
1369       /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */
1370       if (!mumps->myid) {
1371         PetscInt *idx;
1372         /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */
1373         PetscCall(PetscMalloc1(nrhs * M, &idx));
1374         PetscCall(MatGetOwnershipRanges(B, &rstart));
1375         k = 0;
1376         for (proc = 0; proc < mumps->petsc_size; proc++) {
1377           for (j = 0; j < nrhs; j++) {
1378             for (i = rstart[proc]; i < rstart[proc + 1]; i++) idx[k++] = j * M + i;
1379           }
1380         }
1381 
1382         PetscCall(VecCreateSeq(PETSC_COMM_SELF, nrhs * M, &b_seq));
1383         PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nrhs * M, idx, PETSC_OWN_POINTER, &is_to));
1384         PetscCall(ISCreateStride(PETSC_COMM_SELF, nrhs * M, 0, 1, &is_from));
1385       } else {
1386         PetscCall(VecCreateSeq(PETSC_COMM_SELF, 0, &b_seq));
1387         PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_to));
1388         PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_from));
1389       }
1390       PetscCall(VecScatterCreate(v_mpi, is_from, b_seq, is_to, &scat_rhs));
1391       PetscCall(VecScatterBegin(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD));
1392       PetscCall(ISDestroy(&is_to));
1393       PetscCall(ISDestroy(&is_from));
1394       PetscCall(VecScatterEnd(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD));
1395 
1396       if (!mumps->myid) { /* define rhs on the host */
1397         PetscCall(VecGetArray(b_seq, &bray));
1398         mumps->id.rhs = (MumpsScalar *)bray;
1399         PetscCall(VecRestoreArray(b_seq, &bray));
1400       }
1401     }
1402   } else { /* sparse B */
1403     b = (Mat_MPIAIJ *)Bt->data;
1404 
1405     /* wrap dense X into a vector v_mpi */
1406     PetscCall(MatGetLocalSize(X, &m, NULL));
1407     PetscCall(MatDenseGetArray(X, &bray));
1408     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)X), 1, nrhs * m, nrhs * M, (const PetscScalar *)bray, &v_mpi));
1409     PetscCall(MatDenseRestoreArray(X, &bray));
1410 
1411     if (!mumps->myid) {
1412       PetscCall(MatSeqAIJGetArray(b->A, &aa));
1413       PetscCall(MatGetRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
1414       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
1415       PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs));
1416       mumps->id.rhs_sparse = (MumpsScalar *)aa;
1417     } else {
1418       mumps->id.irhs_ptr    = NULL;
1419       mumps->id.irhs_sparse = NULL;
1420       mumps->id.nz_rhs      = 0;
1421       mumps->id.rhs_sparse  = NULL;
1422     }
1423   }
1424 
1425   /* solve phase */
1426   mumps->id.job = JOB_SOLVE;
1427   PetscMUMPS_c(mumps);
1428   PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1));
1429 
1430   /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */
1431   PetscCall(MatDenseGetArray(X, &array));
1432   PetscCall(VecPlaceArray(v_mpi, array));
1433 
1434   /* create scatter scat_sol */
1435   PetscCall(MatGetOwnershipRanges(X, &rstart));
1436   /* iidx: index for scatter mumps solution to petsc X */
1437 
1438   PetscCall(ISCreateStride(PETSC_COMM_SELF, nlsol_loc, 0, 1, &is_from));
1439   PetscCall(PetscMalloc1(nlsol_loc, &idxx));
1440   for (i = 0; i < lsol_loc; i++) {
1441     isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */
1442 
1443     for (proc = 0; proc < mumps->petsc_size; proc++) {
1444       if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc + 1]) {
1445         myrstart = rstart[proc];
1446         k        = isol_loc[i] - myrstart;          /* local index on 1st column of petsc vector X */
1447         iidx     = k + myrstart * nrhs;             /* maps mumps isol_loc[i] to petsc index in X */
1448         m        = rstart[proc + 1] - rstart[proc]; /* rows of X for this proc */
1449         break;
1450       }
1451     }
1452 
1453     for (j = 0; j < nrhs; j++) idxx[i + j * lsol_loc] = iidx + j * m;
1454   }
1455   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nlsol_loc, idxx, PETSC_COPY_VALUES, &is_to));
1456   PetscCall(VecScatterCreate(msol_loc, is_from, v_mpi, is_to, &scat_sol));
1457   PetscCall(VecScatterBegin(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD));
1458   PetscCall(ISDestroy(&is_from));
1459   PetscCall(ISDestroy(&is_to));
1460   PetscCall(VecScatterEnd(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD));
1461   PetscCall(MatDenseRestoreArray(X, &array));
1462 
1463   /* free spaces */
1464   mumps->id.sol_loc  = (MumpsScalar *)sol_loc_save;
1465   mumps->id.isol_loc = isol_loc_save;
1466 
1467   PetscCall(PetscFree2(sol_loc, isol_loc));
1468   PetscCall(PetscFree(idxx));
1469   PetscCall(VecDestroy(&msol_loc));
1470   PetscCall(VecDestroy(&v_mpi));
1471   if (!denseB) {
1472     if (!mumps->myid) {
1473       b = (Mat_MPIAIJ *)Bt->data;
1474       PetscCall(MatSeqAIJRestoreArray(b->A, &aa));
1475       PetscCall(MatRestoreRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
1476       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure");
1477     }
1478   } else {
1479     if (mumps->ICNTL20 == 0) {
1480       PetscCall(VecDestroy(&b_seq));
1481       PetscCall(VecScatterDestroy(&scat_rhs));
1482     }
1483   }
1484   PetscCall(VecScatterDestroy(&scat_sol));
1485   PetscCall(PetscLogFlops(2.0 * nrhs * mumps->id.RINFO(3)));
1486   PetscFunctionReturn(PETSC_SUCCESS);
1487 }
1488 
1489 PetscErrorCode MatMatSolveTranspose_MUMPS(Mat A, Mat B, Mat X)
1490 {
1491   Mat_MUMPS          *mumps = (Mat_MUMPS *)A->data;
1492   const PetscMUMPSInt value = mumps->id.ICNTL(9);
1493 
1494   PetscFunctionBegin;
1495   mumps->id.ICNTL(9) = 0;
1496   PetscCall(MatMatSolve_MUMPS(A, B, X));
1497   mumps->id.ICNTL(9) = value;
1498   PetscFunctionReturn(PETSC_SUCCESS);
1499 }
1500 
1501 PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A, Mat Bt, Mat X)
1502 {
1503   PetscBool flg;
1504   Mat       B;
1505 
1506   PetscFunctionBegin;
1507   PetscCall(PetscObjectTypeCompareAny((PetscObject)Bt, &flg, MATSEQAIJ, MATMPIAIJ, NULL));
1508   PetscCheck(flg, PetscObjectComm((PetscObject)Bt), PETSC_ERR_ARG_WRONG, "Matrix Bt must be MATAIJ matrix");
1509 
1510   /* Create B=Bt^T that uses Bt's data structure */
1511   PetscCall(MatCreateTranspose(Bt, &B));
1512 
1513   PetscCall(MatMatSolve_MUMPS(A, B, X));
1514   PetscCall(MatDestroy(&B));
1515   PetscFunctionReturn(PETSC_SUCCESS);
1516 }
1517 
1518 #if !defined(PETSC_USE_COMPLEX)
1519 /*
1520   input:
1521    F:        numeric factor
1522   output:
1523    nneg:     total number of negative pivots
1524    nzero:    total number of zero pivots
1525    npos:     (global dimension of F) - nneg - nzero
1526 */
1527 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos)
1528 {
1529   Mat_MUMPS  *mumps = (Mat_MUMPS *)F->data;
1530   PetscMPIInt size;
1531 
1532   PetscFunctionBegin;
1533   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)F), &size));
1534   /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */
1535   PetscCheck(size <= 1 || mumps->id.ICNTL(13) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia", mumps->id.INFOG(13));
1536 
1537   if (nneg) *nneg = mumps->id.INFOG(12);
1538   if (nzero || npos) {
1539     PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection");
1540     if (nzero) *nzero = mumps->id.INFOG(28);
1541     if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28));
1542   }
1543   PetscFunctionReturn(PETSC_SUCCESS);
1544 }
1545 #endif
1546 
1547 PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse, Mat_MUMPS *mumps)
1548 {
1549   PetscInt       i, nreqs;
1550   PetscMUMPSInt *irn, *jcn;
1551   PetscMPIInt    count;
1552   PetscInt64     totnnz, remain;
1553   const PetscInt osize = mumps->omp_comm_size;
1554   PetscScalar   *val;
1555 
1556   PetscFunctionBegin;
1557   if (osize > 1) {
1558     if (reuse == MAT_INITIAL_MATRIX) {
1559       /* master first gathers counts of nonzeros to receive */
1560       if (mumps->is_omp_master) PetscCall(PetscMalloc1(osize, &mumps->recvcount));
1561       PetscCallMPI(MPI_Gather(&mumps->nnz, 1, MPIU_INT64, mumps->recvcount, 1, MPIU_INT64, 0 /*master*/, mumps->omp_comm));
1562 
1563       /* Then each computes number of send/recvs */
1564       if (mumps->is_omp_master) {
1565         /* Start from 1 since self communication is not done in MPI */
1566         nreqs = 0;
1567         for (i = 1; i < osize; i++) nreqs += (mumps->recvcount[i] + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX;
1568       } else {
1569         nreqs = (mumps->nnz + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX;
1570       }
1571       PetscCall(PetscMalloc1(nreqs * 3, &mumps->reqs)); /* Triple the requests since we send irn, jcn and val separately */
1572 
1573       /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others.
1574          MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz
1575          might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size
1576          is very small, the current approach should have no extra overhead compared to MPI_Gatherv.
1577        */
1578       nreqs = 0; /* counter for actual send/recvs */
1579       if (mumps->is_omp_master) {
1580         for (i = 0, totnnz = 0; i < osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */
1581         PetscCall(PetscMalloc2(totnnz, &irn, totnnz, &jcn));
1582         PetscCall(PetscMalloc1(totnnz, &val));
1583 
1584         /* Self communication */
1585         PetscCall(PetscArraycpy(irn, mumps->irn, mumps->nnz));
1586         PetscCall(PetscArraycpy(jcn, mumps->jcn, mumps->nnz));
1587         PetscCall(PetscArraycpy(val, mumps->val, mumps->nnz));
1588 
1589         /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */
1590         PetscCall(PetscFree2(mumps->irn, mumps->jcn));
1591         PetscCall(PetscFree(mumps->val_alloc));
1592         mumps->nnz = totnnz;
1593         mumps->irn = irn;
1594         mumps->jcn = jcn;
1595         mumps->val = mumps->val_alloc = val;
1596 
1597         irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */
1598         jcn += mumps->recvcount[0];
1599         val += mumps->recvcount[0];
1600 
1601         /* Remote communication */
1602         for (i = 1; i < osize; i++) {
1603           count  = PetscMin(mumps->recvcount[i], PETSC_MPI_INT_MAX);
1604           remain = mumps->recvcount[i] - count;
1605           while (count > 0) {
1606             PetscCallMPI(MPI_Irecv(irn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1607             PetscCallMPI(MPI_Irecv(jcn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1608             PetscCallMPI(MPI_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1609             irn += count;
1610             jcn += count;
1611             val += count;
1612             count = PetscMin(remain, PETSC_MPI_INT_MAX);
1613             remain -= count;
1614           }
1615         }
1616       } else {
1617         irn    = mumps->irn;
1618         jcn    = mumps->jcn;
1619         val    = mumps->val;
1620         count  = PetscMin(mumps->nnz, PETSC_MPI_INT_MAX);
1621         remain = mumps->nnz - count;
1622         while (count > 0) {
1623           PetscCallMPI(MPI_Isend(irn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1624           PetscCallMPI(MPI_Isend(jcn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1625           PetscCallMPI(MPI_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1626           irn += count;
1627           jcn += count;
1628           val += count;
1629           count = PetscMin(remain, PETSC_MPI_INT_MAX);
1630           remain -= count;
1631         }
1632       }
1633     } else {
1634       nreqs = 0;
1635       if (mumps->is_omp_master) {
1636         val = mumps->val + mumps->recvcount[0];
1637         for (i = 1; i < osize; i++) { /* Remote communication only since self data is already in place */
1638           count  = PetscMin(mumps->recvcount[i], PETSC_MPI_INT_MAX);
1639           remain = mumps->recvcount[i] - count;
1640           while (count > 0) {
1641             PetscCallMPI(MPI_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1642             val += count;
1643             count = PetscMin(remain, PETSC_MPI_INT_MAX);
1644             remain -= count;
1645           }
1646         }
1647       } else {
1648         val    = mumps->val;
1649         count  = PetscMin(mumps->nnz, PETSC_MPI_INT_MAX);
1650         remain = mumps->nnz - count;
1651         while (count > 0) {
1652           PetscCallMPI(MPI_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1653           val += count;
1654           count = PetscMin(remain, PETSC_MPI_INT_MAX);
1655           remain -= count;
1656         }
1657       }
1658     }
1659     PetscCallMPI(MPI_Waitall(nreqs, mumps->reqs, MPI_STATUSES_IGNORE));
1660     mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */
1661   }
1662   PetscFunctionReturn(PETSC_SUCCESS);
1663 }
1664 
1665 PetscErrorCode MatFactorNumeric_MUMPS(Mat F, Mat A, const MatFactorInfo *info)
1666 {
1667   Mat_MUMPS *mumps = (Mat_MUMPS *)(F)->data;
1668   PetscBool  isMPIAIJ;
1669 
1670   PetscFunctionBegin;
1671   if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) {
1672     if (mumps->id.INFOG(1) == -6) PetscCall(PetscInfo(A, "MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1673     PetscCall(PetscInfo(A, "MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1674     PetscFunctionReturn(PETSC_SUCCESS);
1675   }
1676 
1677   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps));
1678   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX, mumps));
1679 
1680   /* numerical factorization phase */
1681   mumps->id.job = JOB_FACTNUMERIC;
1682   if (!mumps->id.ICNTL(18)) { /* A is centralized */
1683     if (!mumps->myid) mumps->id.a = (MumpsScalar *)mumps->val;
1684   } else {
1685     mumps->id.a_loc = (MumpsScalar *)mumps->val;
1686   }
1687   PetscMUMPS_c(mumps);
1688   if (mumps->id.INFOG(1) < 0) {
1689     PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d", mumps->id.INFOG(1), mumps->id.INFO(2));
1690     if (mumps->id.INFOG(1) == -10) { /* numerically singular matrix */
1691       PetscCall(PetscInfo(F, "matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1692       F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
1693     } else if (mumps->id.INFOG(1) == -13) {
1694       PetscCall(PetscInfo(F, "MUMPS in numerical factorization phase: INFOG(1)=%d, cannot allocate required memory %d megabytes\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1695       F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1696     } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) {
1697       PetscCall(PetscInfo(F, "MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d, problem with workarray\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1698       F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1699     } else {
1700       PetscCall(PetscInfo(F, "MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1701       F->factorerrortype = MAT_FACTOR_OTHER;
1702     }
1703   }
1704   PetscCheck(mumps->myid || mumps->id.ICNTL(16) <= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "  mumps->id.ICNTL(16):=%d", mumps->id.INFOG(16));
1705 
1706   F->assembled = PETSC_TRUE;
1707 
1708   if (F->schur) { /* reset Schur status to unfactored */
1709 #if defined(PETSC_HAVE_CUDA)
1710     F->schur->offloadmask = PETSC_OFFLOAD_CPU;
1711 #endif
1712     if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
1713       mumps->id.ICNTL(19) = 2;
1714       PetscCall(MatTranspose(F->schur, MAT_INPLACE_MATRIX, &F->schur));
1715     }
1716     PetscCall(MatFactorRestoreSchurComplement(F, NULL, MAT_FACTOR_SCHUR_UNFACTORED));
1717   }
1718 
1719   /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */
1720   if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3;
1721 
1722   if (!mumps->is_omp_master) mumps->id.INFO(23) = 0;
1723   if (mumps->petsc_size > 1) {
1724     PetscInt     lsol_loc;
1725     PetscScalar *sol_loc;
1726 
1727     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &isMPIAIJ));
1728 
1729     /* distributed solution; Create x_seq=sol_loc for repeated use */
1730     if (mumps->x_seq) {
1731       PetscCall(VecScatterDestroy(&mumps->scat_sol));
1732       PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc));
1733       PetscCall(VecDestroy(&mumps->x_seq));
1734     }
1735     lsol_loc = mumps->id.INFO(23); /* length of sol_loc */
1736     PetscCall(PetscMalloc2(lsol_loc, &sol_loc, lsol_loc, &mumps->id.isol_loc));
1737     mumps->id.lsol_loc = lsol_loc;
1738     mumps->id.sol_loc  = (MumpsScalar *)sol_loc;
1739     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, lsol_loc, sol_loc, &mumps->x_seq));
1740   }
1741   PetscCall(PetscLogFlops(mumps->id.RINFO(2)));
1742   PetscFunctionReturn(PETSC_SUCCESS);
1743 }
1744 
1745 /* Sets MUMPS options from the options database */
1746 PetscErrorCode MatSetFromOptions_MUMPS(Mat F, Mat A)
1747 {
1748   Mat_MUMPS    *mumps = (Mat_MUMPS *)F->data;
1749   PetscMUMPSInt icntl = 0, size, *listvar_schur;
1750   PetscInt      info[80], i, ninfo = 80, rbs, cbs;
1751   PetscBool     flg = PETSC_FALSE, schur = (PetscBool)(mumps->id.ICNTL(26) == -1);
1752   MumpsScalar  *arr;
1753 
1754   PetscFunctionBegin;
1755   PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "MUMPS Options", "Mat");
1756   if (mumps->id.job == JOB_NULL) { /* MatSetFromOptions_MUMPS() has never been called before */
1757     PetscInt nthreads   = 0;
1758     PetscInt nCNTL_pre  = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0;
1759     PetscInt nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0;
1760 
1761     mumps->petsc_comm = PetscObjectComm((PetscObject)A);
1762     PetscCallMPI(MPI_Comm_size(mumps->petsc_comm, &mumps->petsc_size));
1763     PetscCallMPI(MPI_Comm_rank(mumps->petsc_comm, &mumps->myid)); /* "if (!myid)" still works even if mumps_comm is different */
1764 
1765     PetscCall(PetscOptionsName("-mat_mumps_use_omp_threads", "Convert MPI processes into OpenMP threads", "None", &mumps->use_petsc_omp_support));
1766     if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */
1767     /* do not use PetscOptionsInt() so that the option -mat_mumps_use_omp_threads is not displayed twice in the help */
1768     PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)F)->prefix, "-mat_mumps_use_omp_threads", &nthreads, NULL));
1769     if (mumps->use_petsc_omp_support) {
1770       PetscCheck(PetscDefined(HAVE_OPENMP_SUPPORT), PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "The system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual",
1771                  ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : "");
1772       PetscCheck(!schur, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use -%smat_mumps_use_omp_threads with the Schur complement feature", ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : "");
1773 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1774       PetscCall(PetscOmpCtrlCreate(mumps->petsc_comm, nthreads, &mumps->omp_ctrl));
1775       PetscCall(PetscOmpCtrlGetOmpComms(mumps->omp_ctrl, &mumps->omp_comm, &mumps->mumps_comm, &mumps->is_omp_master));
1776 #endif
1777     } else {
1778       mumps->omp_comm      = PETSC_COMM_SELF;
1779       mumps->mumps_comm    = mumps->petsc_comm;
1780       mumps->is_omp_master = PETSC_TRUE;
1781     }
1782     PetscCallMPI(MPI_Comm_size(mumps->omp_comm, &mumps->omp_comm_size));
1783     mumps->reqs = NULL;
1784     mumps->tag  = 0;
1785 
1786     if (mumps->mumps_comm != MPI_COMM_NULL) {
1787       if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) {
1788         /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */
1789         MPI_Comm comm;
1790         PetscCallMPI(MPI_Comm_dup(mumps->mumps_comm, &comm));
1791         mumps->mumps_comm = comm;
1792       } else PetscCall(PetscCommGetComm(mumps->petsc_comm, &mumps->mumps_comm));
1793     }
1794 
1795     mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm);
1796     mumps->id.job          = JOB_INIT;
1797     mumps->id.par          = 1; /* host participates factorizaton and solve */
1798     mumps->id.sym          = mumps->sym;
1799 
1800     size          = mumps->id.size_schur;
1801     arr           = mumps->id.schur;
1802     listvar_schur = mumps->id.listvar_schur;
1803     PetscMUMPS_c(mumps);
1804     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS: INFOG(1)=%d", mumps->id.INFOG(1));
1805     /* restore cached ICNTL and CNTL values */
1806     for (icntl = 0; icntl < nICNTL_pre; ++icntl) mumps->id.ICNTL(mumps->ICNTL_pre[1 + 2 * icntl]) = mumps->ICNTL_pre[2 + 2 * icntl];
1807     for (icntl = 0; icntl < nCNTL_pre; ++icntl) mumps->id.CNTL((PetscInt)mumps->CNTL_pre[1 + 2 * icntl]) = mumps->CNTL_pre[2 + 2 * icntl];
1808     PetscCall(PetscFree(mumps->ICNTL_pre));
1809     PetscCall(PetscFree(mumps->CNTL_pre));
1810 
1811     if (schur) {
1812       mumps->id.size_schur    = size;
1813       mumps->id.schur_lld     = size;
1814       mumps->id.schur         = arr;
1815       mumps->id.listvar_schur = listvar_schur;
1816       if (mumps->petsc_size > 1) {
1817         PetscBool gs; /* gs is false if any rank other than root has non-empty IS */
1818 
1819         mumps->id.ICNTL(19) = 1;                                                                            /* MUMPS returns Schur centralized on the host */
1820         gs                  = mumps->myid ? (mumps->id.size_schur ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */
1821         PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &gs, 1, MPIU_BOOL, MPI_LAND, mumps->petsc_comm));
1822         PetscCheck(gs, PETSC_COMM_SELF, PETSC_ERR_SUP, "MUMPS distributed parallel Schur complements not yet supported from PETSc");
1823       } else {
1824         if (F->factortype == MAT_FACTOR_LU) {
1825           mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */
1826         } else {
1827           mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */
1828         }
1829       }
1830       mumps->id.ICNTL(26) = -1;
1831     }
1832 
1833     /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code.
1834        For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS.
1835      */
1836     PetscCallMPI(MPI_Bcast(mumps->id.icntl, 40, MPI_INT, 0, mumps->omp_comm));
1837     PetscCallMPI(MPI_Bcast(mumps->id.cntl, 15, MPIU_REAL, 0, mumps->omp_comm));
1838 
1839     mumps->scat_rhs = NULL;
1840     mumps->scat_sol = NULL;
1841 
1842     /* set PETSc-MUMPS default options - override MUMPS default */
1843     mumps->id.ICNTL(3) = 0;
1844     mumps->id.ICNTL(4) = 0;
1845     if (mumps->petsc_size == 1) {
1846       mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */
1847       mumps->id.ICNTL(7)  = 7; /* automatic choice of ordering done by the package */
1848     } else {
1849       mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */
1850       mumps->id.ICNTL(21) = 1; /* distributed solution */
1851     }
1852   }
1853   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_1", "ICNTL(1): output stream for error messages", "None", mumps->id.ICNTL(1), &icntl, &flg));
1854   if (flg) mumps->id.ICNTL(1) = icntl;
1855   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_2", "ICNTL(2): output stream for diagnostic printing, statistics, and warning", "None", mumps->id.ICNTL(2), &icntl, &flg));
1856   if (flg) mumps->id.ICNTL(2) = icntl;
1857   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_3", "ICNTL(3): output stream for global information, collected on the host", "None", mumps->id.ICNTL(3), &icntl, &flg));
1858   if (flg) mumps->id.ICNTL(3) = icntl;
1859 
1860   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_4", "ICNTL(4): level of printing (0 to 4)", "None", mumps->id.ICNTL(4), &icntl, &flg));
1861   if (flg) mumps->id.ICNTL(4) = icntl;
1862   if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */
1863 
1864   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_6", "ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)", "None", mumps->id.ICNTL(6), &icntl, &flg));
1865   if (flg) mumps->id.ICNTL(6) = icntl;
1866 
1867   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_7", "ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)", "None", mumps->id.ICNTL(7), &icntl, &flg));
1868   if (flg) {
1869     PetscCheck(icntl != 1 && icntl >= 0 && icntl <= 7, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto");
1870     mumps->id.ICNTL(7) = icntl;
1871   }
1872 
1873   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_8", "ICNTL(8): scaling strategy (-2 to 8 or 77)", "None", mumps->id.ICNTL(8), &mumps->id.ICNTL(8), NULL));
1874   /* PetscCall(PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL)); handled by MatSolveTranspose_MUMPS() */
1875   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_10", "ICNTL(10): max num of refinements", "None", mumps->id.ICNTL(10), &mumps->id.ICNTL(10), NULL));
1876   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_11", "ICNTL(11): statistics related to an error analysis (via -ksp_view)", "None", mumps->id.ICNTL(11), &mumps->id.ICNTL(11), NULL));
1877   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_12", "ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)", "None", mumps->id.ICNTL(12), &mumps->id.ICNTL(12), NULL));
1878   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_13", "ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting", "None", mumps->id.ICNTL(13), &mumps->id.ICNTL(13), NULL));
1879   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_14", "ICNTL(14): percentage increase in the estimated working space", "None", mumps->id.ICNTL(14), &mumps->id.ICNTL(14), NULL));
1880   PetscCall(MatGetBlockSizes(A, &rbs, &cbs));
1881   if (rbs == cbs && rbs > 1) mumps->id.ICNTL(15) = -rbs;
1882   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_15", "ICNTL(15): compression of the input matrix resulting from a block format", "None", mumps->id.ICNTL(15), &mumps->id.ICNTL(15), &flg));
1883   if (flg) {
1884     PetscCheck(mumps->id.ICNTL(15) <= 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Positive -mat_mumps_icntl_15 not handled");
1885     PetscCheck((-mumps->id.ICNTL(15) % cbs == 0) && (-mumps->id.ICNTL(15) % rbs == 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "The opposite of -mat_mumps_icntl_15 must be a multiple of the column and row blocksizes");
1886   }
1887   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_19", "ICNTL(19): computes the Schur complement", "None", mumps->id.ICNTL(19), &mumps->id.ICNTL(19), NULL));
1888   if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */
1889     PetscCall(MatDestroy(&F->schur));
1890     PetscCall(MatMumpsResetSchur_Private(mumps));
1891   }
1892 
1893   /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps",
1894      and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran
1895      and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...'
1896      This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug,
1897      see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590.
1898      In short, we could not use distributed RHS with MPICH until v4.0b1.
1899    */
1900 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) || (defined(PETSC_HAVE_MPICH_NUMVERSION) && (PETSC_HAVE_MPICH_NUMVERSION < 40000101))
1901   mumps->ICNTL20 = 0; /* Centralized dense RHS*/
1902 #else
1903   mumps->ICNTL20     = 10; /* Distributed dense RHS*/
1904 #endif
1905   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_20", "ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides", "None", mumps->ICNTL20, &mumps->ICNTL20, &flg));
1906   PetscCheck(!flg || mumps->ICNTL20 == 10 || mumps->ICNTL20 == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10", (int)mumps->ICNTL20);
1907 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0)
1908   PetscCheck(!flg || mumps->ICNTL20 != 10, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=10 is not supported before MUMPS-5.3.0");
1909 #endif
1910   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL)); we only use distributed solution vector */
1911 
1912   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_22", "ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)", "None", mumps->id.ICNTL(22), &mumps->id.ICNTL(22), NULL));
1913   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_23", "ICNTL(23): max size of the working memory (MB) that can allocate per processor", "None", mumps->id.ICNTL(23), &mumps->id.ICNTL(23), NULL));
1914   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_24", "ICNTL(24): detection of null pivot rows (0 or 1)", "None", mumps->id.ICNTL(24), &mumps->id.ICNTL(24), NULL));
1915   if (mumps->id.ICNTL(24)) { mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ }
1916 
1917   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_25", "ICNTL(25): computes a solution of a deficient matrix and a null space basis", "None", mumps->id.ICNTL(25), &mumps->id.ICNTL(25), NULL));
1918   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_26", "ICNTL(26): drives the solution phase if a Schur complement matrix", "None", mumps->id.ICNTL(26), &mumps->id.ICNTL(26), NULL));
1919   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_27", "ICNTL(27): controls the blocking size for multiple right-hand sides", "None", mumps->id.ICNTL(27), &mumps->id.ICNTL(27), NULL));
1920   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_28", "ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering", "None", mumps->id.ICNTL(28), &mumps->id.ICNTL(28), NULL));
1921   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_29", "ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis", "None", mumps->id.ICNTL(29), &mumps->id.ICNTL(29), NULL));
1922   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL)); */ /* call MatMumpsGetInverse() directly */
1923   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_31", "ICNTL(31): indicates which factors may be discarded during factorization", "None", mumps->id.ICNTL(31), &mumps->id.ICNTL(31), NULL));
1924   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elemination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL));  -- not supported by PETSc API */
1925   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_33", "ICNTL(33): compute determinant", "None", mumps->id.ICNTL(33), &mumps->id.ICNTL(33), NULL));
1926   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_35", "ICNTL(35): activates Block Low Rank (BLR) based factorization", "None", mumps->id.ICNTL(35), &mumps->id.ICNTL(35), NULL));
1927   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_36", "ICNTL(36): choice of BLR factorization variant", "None", mumps->id.ICNTL(36), &mumps->id.ICNTL(36), NULL));
1928   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_38", "ICNTL(38): estimated compression rate of LU factors with BLR", "None", mumps->id.ICNTL(38), &mumps->id.ICNTL(38), NULL));
1929 
1930   PetscCall(PetscOptionsReal("-mat_mumps_cntl_1", "CNTL(1): relative pivoting threshold", "None", mumps->id.CNTL(1), &mumps->id.CNTL(1), NULL));
1931   PetscCall(PetscOptionsReal("-mat_mumps_cntl_2", "CNTL(2): stopping criterion of refinement", "None", mumps->id.CNTL(2), &mumps->id.CNTL(2), NULL));
1932   PetscCall(PetscOptionsReal("-mat_mumps_cntl_3", "CNTL(3): absolute pivoting threshold", "None", mumps->id.CNTL(3), &mumps->id.CNTL(3), NULL));
1933   PetscCall(PetscOptionsReal("-mat_mumps_cntl_4", "CNTL(4): value for static pivoting", "None", mumps->id.CNTL(4), &mumps->id.CNTL(4), NULL));
1934   PetscCall(PetscOptionsReal("-mat_mumps_cntl_5", "CNTL(5): fixation for null pivots", "None", mumps->id.CNTL(5), &mumps->id.CNTL(5), NULL));
1935   PetscCall(PetscOptionsReal("-mat_mumps_cntl_7", "CNTL(7): dropping parameter used during BLR", "None", mumps->id.CNTL(7), &mumps->id.CNTL(7), NULL));
1936 
1937   PetscCall(PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL));
1938 
1939   PetscCall(PetscOptionsIntArray("-mat_mumps_view_info", "request INFO local to each processor", "", info, &ninfo, NULL));
1940   if (ninfo) {
1941     PetscCheck(ninfo <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "number of INFO %" PetscInt_FMT " must <= 80", ninfo);
1942     PetscCall(PetscMalloc1(ninfo, &mumps->info));
1943     mumps->ninfo = ninfo;
1944     for (i = 0; i < ninfo; i++) {
1945       PetscCheck(info[i] >= 0 && info[i] <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "index of INFO %" PetscInt_FMT " must between 1 and 80", ninfo);
1946       mumps->info[i] = info[i];
1947     }
1948   }
1949   PetscOptionsEnd();
1950   PetscFunctionReturn(PETSC_SUCCESS);
1951 }
1952 
1953 PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F, Mat A, const MatFactorInfo *info, Mat_MUMPS *mumps)
1954 {
1955   PetscFunctionBegin;
1956   if (mumps->id.INFOG(1) < 0) {
1957     PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in analysis phase: INFOG(1)=%d", mumps->id.INFOG(1));
1958     if (mumps->id.INFOG(1) == -6) {
1959       PetscCall(PetscInfo(F, "matrix is singular in structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1960       F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT;
1961     } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) {
1962       PetscCall(PetscInfo(F, "problem of workspace, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1963       F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1964     } else if (mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0) {
1965       PetscCall(PetscInfo(F, "Empty matrix\n"));
1966     } else {
1967       PetscCall(PetscInfo(F, "Error reported by MUMPS in analysis phase: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1968       F->factorerrortype = MAT_FACTOR_OTHER;
1969     }
1970   }
1971   PetscFunctionReturn(PETSC_SUCCESS);
1972 }
1973 
1974 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info)
1975 {
1976   Mat_MUMPS     *mumps = (Mat_MUMPS *)F->data;
1977   Vec            b;
1978   const PetscInt M = A->rmap->N;
1979 
1980   PetscFunctionBegin;
1981   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
1982     /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */
1983     PetscFunctionReturn(PETSC_SUCCESS);
1984   }
1985 
1986   /* Set MUMPS options from the options database */
1987   PetscCall(MatSetFromOptions_MUMPS(F, A));
1988 
1989   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
1990   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps));
1991 
1992   /* analysis phase */
1993   mumps->id.job = JOB_FACTSYMBOLIC;
1994   mumps->id.n   = M;
1995   switch (mumps->id.ICNTL(18)) {
1996   case 0: /* centralized assembled matrix input */
1997     if (!mumps->myid) {
1998       mumps->id.nnz = mumps->nnz;
1999       mumps->id.irn = mumps->irn;
2000       mumps->id.jcn = mumps->jcn;
2001       if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val;
2002       if (r) {
2003         mumps->id.ICNTL(7) = 1;
2004         if (!mumps->myid) {
2005           const PetscInt *idx;
2006           PetscInt        i;
2007 
2008           PetscCall(PetscMalloc1(M, &mumps->id.perm_in));
2009           PetscCall(ISGetIndices(r, &idx));
2010           for (i = 0; i < M; i++) PetscCall(PetscMUMPSIntCast(idx[i] + 1, &(mumps->id.perm_in[i]))); /* perm_in[]: start from 1, not 0! */
2011           PetscCall(ISRestoreIndices(r, &idx));
2012         }
2013       }
2014     }
2015     break;
2016   case 3: /* distributed assembled matrix input (size>1) */
2017     mumps->id.nnz_loc = mumps->nnz;
2018     mumps->id.irn_loc = mumps->irn;
2019     mumps->id.jcn_loc = mumps->jcn;
2020     if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val;
2021     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2022       PetscCall(MatCreateVecs(A, NULL, &b));
2023       PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq));
2024       PetscCall(VecDestroy(&b));
2025     }
2026     break;
2027   }
2028   PetscMUMPS_c(mumps);
2029   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps));
2030 
2031   F->ops->lufactornumeric   = MatFactorNumeric_MUMPS;
2032   F->ops->solve             = MatSolve_MUMPS;
2033   F->ops->solvetranspose    = MatSolveTranspose_MUMPS;
2034   F->ops->matsolve          = MatMatSolve_MUMPS;
2035   F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS;
2036   F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS;
2037 
2038   mumps->matstruc = SAME_NONZERO_PATTERN;
2039   PetscFunctionReturn(PETSC_SUCCESS);
2040 }
2041 
2042 /* Note the Petsc r and c permutations are ignored */
2043 PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info)
2044 {
2045   Mat_MUMPS     *mumps = (Mat_MUMPS *)F->data;
2046   Vec            b;
2047   const PetscInt M = A->rmap->N;
2048 
2049   PetscFunctionBegin;
2050   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
2051     /* F is assembled by a previous call of MatLUFactorSymbolic_BAIJMUMPS() */
2052     PetscFunctionReturn(PETSC_SUCCESS);
2053   }
2054 
2055   /* Set MUMPS options from the options database */
2056   PetscCall(MatSetFromOptions_MUMPS(F, A));
2057 
2058   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
2059   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps));
2060 
2061   /* analysis phase */
2062   mumps->id.job = JOB_FACTSYMBOLIC;
2063   mumps->id.n   = M;
2064   switch (mumps->id.ICNTL(18)) {
2065   case 0: /* centralized assembled matrix input */
2066     if (!mumps->myid) {
2067       mumps->id.nnz = mumps->nnz;
2068       mumps->id.irn = mumps->irn;
2069       mumps->id.jcn = mumps->jcn;
2070       if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val;
2071     }
2072     break;
2073   case 3: /* distributed assembled matrix input (size>1) */
2074     mumps->id.nnz_loc = mumps->nnz;
2075     mumps->id.irn_loc = mumps->irn;
2076     mumps->id.jcn_loc = mumps->jcn;
2077     if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val;
2078     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2079       PetscCall(MatCreateVecs(A, NULL, &b));
2080       PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq));
2081       PetscCall(VecDestroy(&b));
2082     }
2083     break;
2084   }
2085   PetscMUMPS_c(mumps);
2086   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps));
2087 
2088   F->ops->lufactornumeric   = MatFactorNumeric_MUMPS;
2089   F->ops->solve             = MatSolve_MUMPS;
2090   F->ops->solvetranspose    = MatSolveTranspose_MUMPS;
2091   F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS;
2092 
2093   mumps->matstruc = SAME_NONZERO_PATTERN;
2094   PetscFunctionReturn(PETSC_SUCCESS);
2095 }
2096 
2097 /* Note the Petsc r permutation and factor info are ignored */
2098 PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F, Mat A, IS r, const MatFactorInfo *info)
2099 {
2100   Mat_MUMPS     *mumps = (Mat_MUMPS *)F->data;
2101   Vec            b;
2102   const PetscInt M = A->rmap->N;
2103 
2104   PetscFunctionBegin;
2105   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
2106     /* F is assembled by a previous call of MatCholeskyFactorSymbolic_MUMPS() */
2107     PetscFunctionReturn(PETSC_SUCCESS);
2108   }
2109 
2110   /* Set MUMPS options from the options database */
2111   PetscCall(MatSetFromOptions_MUMPS(F, A));
2112 
2113   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
2114   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps));
2115 
2116   /* analysis phase */
2117   mumps->id.job = JOB_FACTSYMBOLIC;
2118   mumps->id.n   = M;
2119   switch (mumps->id.ICNTL(18)) {
2120   case 0: /* centralized assembled matrix input */
2121     if (!mumps->myid) {
2122       mumps->id.nnz = mumps->nnz;
2123       mumps->id.irn = mumps->irn;
2124       mumps->id.jcn = mumps->jcn;
2125       if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val;
2126     }
2127     break;
2128   case 3: /* distributed assembled matrix input (size>1) */
2129     mumps->id.nnz_loc = mumps->nnz;
2130     mumps->id.irn_loc = mumps->irn;
2131     mumps->id.jcn_loc = mumps->jcn;
2132     if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val;
2133     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2134       PetscCall(MatCreateVecs(A, NULL, &b));
2135       PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq));
2136       PetscCall(VecDestroy(&b));
2137     }
2138     break;
2139   }
2140   PetscMUMPS_c(mumps);
2141   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps));
2142 
2143   F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS;
2144   F->ops->solve                 = MatSolve_MUMPS;
2145   F->ops->solvetranspose        = MatSolve_MUMPS;
2146   F->ops->matsolve              = MatMatSolve_MUMPS;
2147   F->ops->mattransposesolve     = MatMatTransposeSolve_MUMPS;
2148   F->ops->matsolvetranspose     = MatMatSolveTranspose_MUMPS;
2149 #if defined(PETSC_USE_COMPLEX)
2150   F->ops->getinertia = NULL;
2151 #else
2152   F->ops->getinertia = MatGetInertia_SBAIJMUMPS;
2153 #endif
2154 
2155   mumps->matstruc = SAME_NONZERO_PATTERN;
2156   PetscFunctionReturn(PETSC_SUCCESS);
2157 }
2158 
2159 PetscErrorCode MatView_MUMPS(Mat A, PetscViewer viewer)
2160 {
2161   PetscBool         iascii;
2162   PetscViewerFormat format;
2163   Mat_MUMPS        *mumps = (Mat_MUMPS *)A->data;
2164 
2165   PetscFunctionBegin;
2166   /* check if matrix is mumps type */
2167   if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(PETSC_SUCCESS);
2168 
2169   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
2170   if (iascii) {
2171     PetscCall(PetscViewerGetFormat(viewer, &format));
2172     if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
2173       PetscCall(PetscViewerASCIIPrintf(viewer, "MUMPS run parameters:\n"));
2174       if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
2175         PetscCall(PetscViewerASCIIPrintf(viewer, "  SYM (matrix type):                   %d\n", mumps->id.sym));
2176         PetscCall(PetscViewerASCIIPrintf(viewer, "  PAR (host participation):            %d\n", mumps->id.par));
2177         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(1) (output for error):         %d\n", mumps->id.ICNTL(1)));
2178         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(2) (output of diagnostic msg): %d\n", mumps->id.ICNTL(2)));
2179         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(3) (output for global info):   %d\n", mumps->id.ICNTL(3)));
2180         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(4) (level of printing):        %d\n", mumps->id.ICNTL(4)));
2181         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(5) (input mat struct):         %d\n", mumps->id.ICNTL(5)));
2182         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(6) (matrix prescaling):        %d\n", mumps->id.ICNTL(6)));
2183         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(7) (sequential matrix ordering):%d\n", mumps->id.ICNTL(7)));
2184         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(8) (scaling strategy):         %d\n", mumps->id.ICNTL(8)));
2185         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(10) (max num of refinements):  %d\n", mumps->id.ICNTL(10)));
2186         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(11) (error analysis):          %d\n", mumps->id.ICNTL(11)));
2187         if (mumps->id.ICNTL(11) > 0) {
2188           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(4) (inf norm of input mat):        %g\n", mumps->id.RINFOG(4)));
2189           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(5) (inf norm of solution):         %g\n", mumps->id.RINFOG(5)));
2190           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(6) (inf norm of residual):         %g\n", mumps->id.RINFOG(6)));
2191           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(7),RINFOG(8) (backward error est): %g, %g\n", mumps->id.RINFOG(7), mumps->id.RINFOG(8)));
2192           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(9) (error estimate):               %g\n", mumps->id.RINFOG(9)));
2193           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n", mumps->id.RINFOG(10), mumps->id.RINFOG(11)));
2194         }
2195         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(12) (efficiency control):                         %d\n", mumps->id.ICNTL(12)));
2196         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(13) (sequential factorization of the root node):  %d\n", mumps->id.ICNTL(13)));
2197         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(14) (percentage of estimated workspace increase): %d\n", mumps->id.ICNTL(14)));
2198         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(15) (compression of the input matrix):            %d\n", mumps->id.ICNTL(15)));
2199         /* ICNTL(15-17) not used */
2200         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(18) (input mat struct):                           %d\n", mumps->id.ICNTL(18)));
2201         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(19) (Schur complement info):                      %d\n", mumps->id.ICNTL(19)));
2202         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(20) (RHS sparse pattern):                         %d\n", mumps->id.ICNTL(20)));
2203         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(21) (solution struct):                            %d\n", mumps->id.ICNTL(21)));
2204         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(22) (in-core/out-of-core facility):               %d\n", mumps->id.ICNTL(22)));
2205         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(23) (max size of memory can be allocated locally):%d\n", mumps->id.ICNTL(23)));
2206 
2207         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(24) (detection of null pivot rows):               %d\n", mumps->id.ICNTL(24)));
2208         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(25) (computation of a null space basis):          %d\n", mumps->id.ICNTL(25)));
2209         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(26) (Schur options for RHS or solution):          %d\n", mumps->id.ICNTL(26)));
2210         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(27) (blocking size for multiple RHS):             %d\n", mumps->id.ICNTL(27)));
2211         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(28) (use parallel or sequential ordering):        %d\n", mumps->id.ICNTL(28)));
2212         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(29) (parallel ordering):                          %d\n", mumps->id.ICNTL(29)));
2213 
2214         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(30) (user-specified set of entries in inv(A)):    %d\n", mumps->id.ICNTL(30)));
2215         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(31) (factors is discarded in the solve phase):    %d\n", mumps->id.ICNTL(31)));
2216         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(33) (compute determinant):                        %d\n", mumps->id.ICNTL(33)));
2217         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(35) (activate BLR based factorization):           %d\n", mumps->id.ICNTL(35)));
2218         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(36) (choice of BLR factorization variant):        %d\n", mumps->id.ICNTL(36)));
2219         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(38) (estimated compression rate of LU factors):   %d\n", mumps->id.ICNTL(38)));
2220 
2221         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(1) (relative pivoting threshold):      %g\n", mumps->id.CNTL(1)));
2222         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(2) (stopping criterion of refinement): %g\n", mumps->id.CNTL(2)));
2223         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(3) (absolute pivoting threshold):      %g\n", mumps->id.CNTL(3)));
2224         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(4) (value of static pivoting):         %g\n", mumps->id.CNTL(4)));
2225         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(5) (fixation for null pivots):         %g\n", mumps->id.CNTL(5)));
2226         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(7) (dropping parameter for BLR):       %g\n", mumps->id.CNTL(7)));
2227 
2228         /* information local to each processor */
2229         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(1) (local estimated flops for the elimination after analysis):\n"));
2230         PetscCall(PetscViewerASCIIPushSynchronized(viewer));
2231         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %g\n", mumps->myid, mumps->id.RINFO(1)));
2232         PetscCall(PetscViewerFlush(viewer));
2233         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(2) (local estimated flops for the assembly after factorization):\n"));
2234         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %g\n", mumps->myid, mumps->id.RINFO(2)));
2235         PetscCall(PetscViewerFlush(viewer));
2236         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(3) (local estimated flops for the elimination after factorization):\n"));
2237         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %g\n", mumps->myid, mumps->id.RINFO(3)));
2238         PetscCall(PetscViewerFlush(viewer));
2239 
2240         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization):\n"));
2241         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(15)));
2242         PetscCall(PetscViewerFlush(viewer));
2243 
2244         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization):\n"));
2245         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(16)));
2246         PetscCall(PetscViewerFlush(viewer));
2247 
2248         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(23) (num of pivots eliminated on this processor after factorization):\n"));
2249         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(23)));
2250         PetscCall(PetscViewerFlush(viewer));
2251 
2252         if (mumps->ninfo && mumps->ninfo <= 80) {
2253           PetscInt i;
2254           for (i = 0; i < mumps->ninfo; i++) {
2255             PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(%" PetscInt_FMT "):\n", mumps->info[i]));
2256             PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(mumps->info[i])));
2257             PetscCall(PetscViewerFlush(viewer));
2258           }
2259         }
2260         PetscCall(PetscViewerASCIIPopSynchronized(viewer));
2261       } else PetscCall(PetscViewerASCIIPrintf(viewer, "  Use -%sksp_view ::ascii_info_detail to display information for all processes\n", ((PetscObject)A)->prefix ? ((PetscObject)A)->prefix : ""));
2262 
2263       if (mumps->myid == 0) { /* information from the host */
2264         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFOG(1) (global estimated flops for the elimination after analysis): %g\n", mumps->id.RINFOG(1)));
2265         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFOG(2) (global estimated flops for the assembly after factorization): %g\n", mumps->id.RINFOG(2)));
2266         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFOG(3) (global estimated flops for the elimination after factorization): %g\n", mumps->id.RINFOG(3)));
2267         PetscCall(PetscViewerASCIIPrintf(viewer, "  (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n", mumps->id.RINFOG(12), mumps->id.RINFOG(13), mumps->id.INFOG(34)));
2268 
2269         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(3)));
2270         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(4)));
2271         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(5) (estimated maximum front size in the complete tree): %d\n", mumps->id.INFOG(5)));
2272         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(6) (number of nodes in the complete tree): %d\n", mumps->id.INFOG(6)));
2273         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(7) (ordering option effectively used after analysis): %d\n", mumps->id.INFOG(7)));
2274         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n", mumps->id.INFOG(8)));
2275         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n", mumps->id.INFOG(9)));
2276         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(10) (total integer space store the matrix factors after factorization): %d\n", mumps->id.INFOG(10)));
2277         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(11) (order of largest frontal matrix after factorization): %d\n", mumps->id.INFOG(11)));
2278         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(12) (number of off-diagonal pivots): %d\n", mumps->id.INFOG(12)));
2279         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(13) (number of delayed pivots after factorization): %d\n", mumps->id.INFOG(13)));
2280         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(14) (number of memory compress after factorization): %d\n", mumps->id.INFOG(14)));
2281         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(15) (number of steps of iterative refinement after solution): %d\n", mumps->id.INFOG(15)));
2282         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n", mumps->id.INFOG(16)));
2283         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n", mumps->id.INFOG(17)));
2284         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n", mumps->id.INFOG(18)));
2285         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n", mumps->id.INFOG(19)));
2286         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(20) (estimated number of entries in the factors): %d\n", mumps->id.INFOG(20)));
2287         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n", mumps->id.INFOG(21)));
2288         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n", mumps->id.INFOG(22)));
2289         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n", mumps->id.INFOG(23)));
2290         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n", mumps->id.INFOG(24)));
2291         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n", mumps->id.INFOG(25)));
2292         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(28) (after factorization: number of null pivots encountered): %d\n", mumps->id.INFOG(28)));
2293         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n", mumps->id.INFOG(29)));
2294         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n", mumps->id.INFOG(30), mumps->id.INFOG(31)));
2295         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(32) (after analysis: type of analysis done): %d\n", mumps->id.INFOG(32)));
2296         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(33) (value used for ICNTL(8)): %d\n", mumps->id.INFOG(33)));
2297         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(34) (exponent of the determinant if determinant is requested): %d\n", mumps->id.INFOG(34)));
2298         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n", mumps->id.INFOG(35)));
2299         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(36)));
2300         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n", mumps->id.INFOG(37)));
2301         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(38)));
2302         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n", mumps->id.INFOG(39)));
2303       }
2304     }
2305   }
2306   PetscFunctionReturn(PETSC_SUCCESS);
2307 }
2308 
2309 PetscErrorCode MatGetInfo_MUMPS(Mat A, MatInfoType flag, MatInfo *info)
2310 {
2311   Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;
2312 
2313   PetscFunctionBegin;
2314   info->block_size        = 1.0;
2315   info->nz_allocated      = mumps->id.INFOG(20);
2316   info->nz_used           = mumps->id.INFOG(20);
2317   info->nz_unneeded       = 0.0;
2318   info->assemblies        = 0.0;
2319   info->mallocs           = 0.0;
2320   info->memory            = 0.0;
2321   info->fill_ratio_given  = 0;
2322   info->fill_ratio_needed = 0;
2323   info->factor_mallocs    = 0;
2324   PetscFunctionReturn(PETSC_SUCCESS);
2325 }
2326 
2327 PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is)
2328 {
2329   Mat_MUMPS         *mumps = (Mat_MUMPS *)F->data;
2330   const PetscScalar *arr;
2331   const PetscInt    *idxs;
2332   PetscInt           size, i;
2333 
2334   PetscFunctionBegin;
2335   PetscCall(ISGetLocalSize(is, &size));
2336   /* Schur complement matrix */
2337   PetscCall(MatDestroy(&F->schur));
2338   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size, size, NULL, &F->schur));
2339   PetscCall(MatDenseGetArrayRead(F->schur, &arr));
2340   mumps->id.schur      = (MumpsScalar *)arr;
2341   mumps->id.size_schur = size;
2342   mumps->id.schur_lld  = size;
2343   PetscCall(MatDenseRestoreArrayRead(F->schur, &arr));
2344   if (mumps->sym == 1) PetscCall(MatSetOption(F->schur, MAT_SPD, PETSC_TRUE));
2345 
2346   /* MUMPS expects Fortran style indices */
2347   PetscCall(PetscFree(mumps->id.listvar_schur));
2348   PetscCall(PetscMalloc1(size, &mumps->id.listvar_schur));
2349   PetscCall(ISGetIndices(is, &idxs));
2350   for (i = 0; i < size; i++) PetscCall(PetscMUMPSIntCast(idxs[i] + 1, &(mumps->id.listvar_schur[i])));
2351   PetscCall(ISRestoreIndices(is, &idxs));
2352   /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */
2353   mumps->id.ICNTL(26) = -1;
2354   PetscFunctionReturn(PETSC_SUCCESS);
2355 }
2356 
2357 PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F, Mat *S)
2358 {
2359   Mat          St;
2360   Mat_MUMPS   *mumps = (Mat_MUMPS *)F->data;
2361   PetscScalar *array;
2362 #if defined(PETSC_USE_COMPLEX)
2363   PetscScalar im = PetscSqrtScalar((PetscScalar)-1.0);
2364 #endif
2365 
2366   PetscFunctionBegin;
2367   PetscCheck(mumps->id.ICNTL(19), PetscObjectComm((PetscObject)F), PETSC_ERR_ORDER, "Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it");
2368   PetscCall(MatCreate(PETSC_COMM_SELF, &St));
2369   PetscCall(MatSetSizes(St, PETSC_DECIDE, PETSC_DECIDE, mumps->id.size_schur, mumps->id.size_schur));
2370   PetscCall(MatSetType(St, MATDENSE));
2371   PetscCall(MatSetUp(St));
2372   PetscCall(MatDenseGetArray(St, &array));
2373   if (!mumps->sym) {                /* MUMPS always return a full matrix */
2374     if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
2375       PetscInt i, j, N = mumps->id.size_schur;
2376       for (i = 0; i < N; i++) {
2377         for (j = 0; j < N; j++) {
2378 #if !defined(PETSC_USE_COMPLEX)
2379           PetscScalar val = mumps->id.schur[i * N + j];
2380 #else
2381           PetscScalar val = mumps->id.schur[i * N + j].r + im * mumps->id.schur[i * N + j].i;
2382 #endif
2383           array[j * N + i] = val;
2384         }
2385       }
2386     } else { /* stored by columns */
2387       PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur));
2388     }
2389   } else {                          /* either full or lower-triangular (not packed) */
2390     if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */
2391       PetscInt i, j, N = mumps->id.size_schur;
2392       for (i = 0; i < N; i++) {
2393         for (j = i; j < N; j++) {
2394 #if !defined(PETSC_USE_COMPLEX)
2395           PetscScalar val = mumps->id.schur[i * N + j];
2396 #else
2397           PetscScalar val = mumps->id.schur[i * N + j].r + im * mumps->id.schur[i * N + j].i;
2398 #endif
2399           array[i * N + j] = val;
2400           array[j * N + i] = val;
2401         }
2402       }
2403     } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */
2404       PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur));
2405     } else { /* ICNTL(19) == 1 lower triangular stored by rows */
2406       PetscInt i, j, N = mumps->id.size_schur;
2407       for (i = 0; i < N; i++) {
2408         for (j = 0; j < i + 1; j++) {
2409 #if !defined(PETSC_USE_COMPLEX)
2410           PetscScalar val = mumps->id.schur[i * N + j];
2411 #else
2412           PetscScalar val = mumps->id.schur[i * N + j].r + im * mumps->id.schur[i * N + j].i;
2413 #endif
2414           array[i * N + j] = val;
2415           array[j * N + i] = val;
2416         }
2417       }
2418     }
2419   }
2420   PetscCall(MatDenseRestoreArray(St, &array));
2421   *S = St;
2422   PetscFunctionReturn(PETSC_SUCCESS);
2423 }
2424 
2425 PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt ival)
2426 {
2427   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2428 
2429   PetscFunctionBegin;
2430   if (mumps->id.job == JOB_NULL) {                                       /* need to cache icntl and ival since PetscMUMPS_c() has never been called */
2431     PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; /* number of already cached ICNTL */
2432     for (i = 0; i < nICNTL_pre; ++i)
2433       if (mumps->ICNTL_pre[1 + 2 * i] == icntl) break; /* is this ICNTL already cached? */
2434     if (i == nICNTL_pre) {                             /* not already cached */
2435       if (i > 0) PetscCall(PetscRealloc(sizeof(PetscMUMPSInt) * (2 * nICNTL_pre + 3), &mumps->ICNTL_pre));
2436       else PetscCall(PetscCalloc(sizeof(PetscMUMPSInt) * 3, &mumps->ICNTL_pre));
2437       mumps->ICNTL_pre[0]++;
2438     }
2439     mumps->ICNTL_pre[1 + 2 * i] = icntl;
2440     PetscCall(PetscMUMPSIntCast(ival, mumps->ICNTL_pre + 2 + 2 * i));
2441   } else PetscCall(PetscMUMPSIntCast(ival, &mumps->id.ICNTL(icntl)));
2442   PetscFunctionReturn(PETSC_SUCCESS);
2443 }
2444 
2445 PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt *ival)
2446 {
2447   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2448 
2449   PetscFunctionBegin;
2450   if (mumps->id.job == JOB_NULL) {
2451     PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0;
2452     *ival = 0;
2453     for (i = 0; i < nICNTL_pre; ++i) {
2454       if (mumps->ICNTL_pre[1 + 2 * i] == icntl) *ival = mumps->ICNTL_pre[2 + 2 * i];
2455     }
2456   } else *ival = mumps->id.ICNTL(icntl);
2457   PetscFunctionReturn(PETSC_SUCCESS);
2458 }
2459 
2460 /*@
2461   MatMumpsSetIcntl - Set MUMPS parameter ICNTL()
2462 
2463    Logically Collective
2464 
2465    Input Parameters:
2466 +  F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2467 .  icntl - index of MUMPS parameter array ICNTL()
2468 -  ival - value of MUMPS ICNTL(icntl)
2469 
2470   Options Database Key:
2471 .   -mat_mumps_icntl_<icntl> <ival> - change the option numbered icntl to ival
2472 
2473    Level: beginner
2474 
2475    References:
2476 .  * - MUMPS Users' Guide
2477 
2478 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2479 @*/
2480 PetscErrorCode MatMumpsSetIcntl(Mat F, PetscInt icntl, PetscInt ival)
2481 {
2482   PetscFunctionBegin;
2483   PetscValidType(F, 1);
2484   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2485   PetscValidLogicalCollectiveInt(F, icntl, 2);
2486   PetscValidLogicalCollectiveInt(F, ival, 3);
2487   PetscCheck(icntl >= 1 && icntl <= 38, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl);
2488   PetscTryMethod(F, "MatMumpsSetIcntl_C", (Mat, PetscInt, PetscInt), (F, icntl, ival));
2489   PetscFunctionReturn(PETSC_SUCCESS);
2490 }
2491 
2492 /*@
2493   MatMumpsGetIcntl - Get MUMPS parameter ICNTL()
2494 
2495    Logically Collective
2496 
2497    Input Parameters:
2498 +  F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2499 -  icntl - index of MUMPS parameter array ICNTL()
2500 
2501   Output Parameter:
2502 .  ival - value of MUMPS ICNTL(icntl)
2503 
2504    Level: beginner
2505 
2506    References:
2507 .  * - MUMPS Users' Guide
2508 
2509 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2510 @*/
2511 PetscErrorCode MatMumpsGetIcntl(Mat F, PetscInt icntl, PetscInt *ival)
2512 {
2513   PetscFunctionBegin;
2514   PetscValidType(F, 1);
2515   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2516   PetscValidLogicalCollectiveInt(F, icntl, 2);
2517   PetscValidIntPointer(ival, 3);
2518   PetscCheck(icntl >= 1 && icntl <= 38, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl);
2519   PetscUseMethod(F, "MatMumpsGetIcntl_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival));
2520   PetscFunctionReturn(PETSC_SUCCESS);
2521 }
2522 
2523 PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal val)
2524 {
2525   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2526 
2527   PetscFunctionBegin;
2528   if (mumps->id.job == JOB_NULL) {
2529     PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0;
2530     for (i = 0; i < nCNTL_pre; ++i)
2531       if (mumps->CNTL_pre[1 + 2 * i] == icntl) break;
2532     if (i == nCNTL_pre) {
2533       if (i > 0) PetscCall(PetscRealloc(sizeof(PetscReal) * (2 * nCNTL_pre + 3), &mumps->CNTL_pre));
2534       else PetscCall(PetscCalloc(sizeof(PetscReal) * 3, &mumps->CNTL_pre));
2535       mumps->CNTL_pre[0]++;
2536     }
2537     mumps->CNTL_pre[1 + 2 * i] = icntl;
2538     mumps->CNTL_pre[2 + 2 * i] = val;
2539   } else mumps->id.CNTL(icntl) = val;
2540   PetscFunctionReturn(PETSC_SUCCESS);
2541 }
2542 
2543 PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal *val)
2544 {
2545   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2546 
2547   PetscFunctionBegin;
2548   if (mumps->id.job == JOB_NULL) {
2549     PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0;
2550     *val = 0.0;
2551     for (i = 0; i < nCNTL_pre; ++i) {
2552       if (mumps->CNTL_pre[1 + 2 * i] == icntl) *val = mumps->CNTL_pre[2 + 2 * i];
2553     }
2554   } else *val = mumps->id.CNTL(icntl);
2555   PetscFunctionReturn(PETSC_SUCCESS);
2556 }
2557 
2558 /*@
2559   MatMumpsSetCntl - Set MUMPS parameter CNTL()
2560 
2561    Logically Collective
2562 
2563    Input Parameters:
2564 +  F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2565 .  icntl - index of MUMPS parameter array CNTL()
2566 -  val - value of MUMPS CNTL(icntl)
2567 
2568   Options Database Key:
2569 .   -mat_mumps_cntl_<icntl> <val>  - change the option numbered icntl to ival
2570 
2571    Level: beginner
2572 
2573    References:
2574 .  * - MUMPS Users' Guide
2575 
2576 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2577 @*/
2578 PetscErrorCode MatMumpsSetCntl(Mat F, PetscInt icntl, PetscReal val)
2579 {
2580   PetscFunctionBegin;
2581   PetscValidType(F, 1);
2582   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2583   PetscValidLogicalCollectiveInt(F, icntl, 2);
2584   PetscValidLogicalCollectiveReal(F, val, 3);
2585   PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl);
2586   PetscTryMethod(F, "MatMumpsSetCntl_C", (Mat, PetscInt, PetscReal), (F, icntl, val));
2587   PetscFunctionReturn(PETSC_SUCCESS);
2588 }
2589 
2590 /*@
2591   MatMumpsGetCntl - Get MUMPS parameter CNTL()
2592 
2593    Logically Collective
2594 
2595    Input Parameters:
2596 +  F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2597 -  icntl - index of MUMPS parameter array CNTL()
2598 
2599   Output Parameter:
2600 .  val - value of MUMPS CNTL(icntl)
2601 
2602    Level: beginner
2603 
2604    References:
2605 .  * - MUMPS Users' Guide
2606 
2607 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2608 @*/
2609 PetscErrorCode MatMumpsGetCntl(Mat F, PetscInt icntl, PetscReal *val)
2610 {
2611   PetscFunctionBegin;
2612   PetscValidType(F, 1);
2613   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2614   PetscValidLogicalCollectiveInt(F, icntl, 2);
2615   PetscValidRealPointer(val, 3);
2616   PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl);
2617   PetscUseMethod(F, "MatMumpsGetCntl_C", (Mat, PetscInt, PetscReal *), (F, icntl, val));
2618   PetscFunctionReturn(PETSC_SUCCESS);
2619 }
2620 
2621 PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F, PetscInt icntl, PetscInt *info)
2622 {
2623   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2624 
2625   PetscFunctionBegin;
2626   *info = mumps->id.INFO(icntl);
2627   PetscFunctionReturn(PETSC_SUCCESS);
2628 }
2629 
2630 PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F, PetscInt icntl, PetscInt *infog)
2631 {
2632   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2633 
2634   PetscFunctionBegin;
2635   *infog = mumps->id.INFOG(icntl);
2636   PetscFunctionReturn(PETSC_SUCCESS);
2637 }
2638 
2639 PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfo)
2640 {
2641   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2642 
2643   PetscFunctionBegin;
2644   *rinfo = mumps->id.RINFO(icntl);
2645   PetscFunctionReturn(PETSC_SUCCESS);
2646 }
2647 
2648 PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfog)
2649 {
2650   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2651 
2652   PetscFunctionBegin;
2653   *rinfog = mumps->id.RINFOG(icntl);
2654   PetscFunctionReturn(PETSC_SUCCESS);
2655 }
2656 
2657 PetscErrorCode MatMumpsGetNullPivots_MUMPS(Mat F, PetscInt *size, PetscInt **array)
2658 {
2659   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2660 
2661   PetscFunctionBegin;
2662   PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection");
2663   *size  = 0;
2664   *array = NULL;
2665   if (!mumps->myid) {
2666     *size = mumps->id.INFOG(28);
2667     PetscCall(PetscMalloc1(*size, array));
2668     for (int i = 0; i < *size; i++) (*array)[i] = mumps->id.pivnul_list[i] - 1;
2669   }
2670   PetscFunctionReturn(PETSC_SUCCESS);
2671 }
2672 
2673 PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F, Mat spRHS)
2674 {
2675   Mat          Bt = NULL, Btseq = NULL;
2676   PetscBool    flg;
2677   Mat_MUMPS   *mumps = (Mat_MUMPS *)F->data;
2678   PetscScalar *aa;
2679   PetscInt     spnr, *ia, *ja, M, nrhs;
2680 
2681   PetscFunctionBegin;
2682   PetscValidPointer(spRHS, 2);
2683   PetscCall(PetscObjectTypeCompare((PetscObject)spRHS, MATTRANSPOSEVIRTUAL, &flg));
2684   if (flg) {
2685     PetscCall(MatTransposeGetMat(spRHS, &Bt));
2686   } else SETERRQ(PetscObjectComm((PetscObject)spRHS), PETSC_ERR_ARG_WRONG, "Matrix spRHS must be type MATTRANSPOSEVIRTUAL matrix");
2687 
2688   PetscCall(MatMumpsSetIcntl(F, 30, 1));
2689 
2690   if (mumps->petsc_size > 1) {
2691     Mat_MPIAIJ *b = (Mat_MPIAIJ *)Bt->data;
2692     Btseq         = b->A;
2693   } else {
2694     Btseq = Bt;
2695   }
2696 
2697   PetscCall(MatGetSize(spRHS, &M, &nrhs));
2698   mumps->id.nrhs = nrhs;
2699   mumps->id.lrhs = M;
2700   mumps->id.rhs  = NULL;
2701 
2702   if (!mumps->myid) {
2703     PetscCall(MatSeqAIJGetArray(Btseq, &aa));
2704     PetscCall(MatGetRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
2705     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
2706     PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs));
2707     mumps->id.rhs_sparse = (MumpsScalar *)aa;
2708   } else {
2709     mumps->id.irhs_ptr    = NULL;
2710     mumps->id.irhs_sparse = NULL;
2711     mumps->id.nz_rhs      = 0;
2712     mumps->id.rhs_sparse  = NULL;
2713   }
2714   mumps->id.ICNTL(20) = 1; /* rhs is sparse */
2715   mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */
2716 
2717   /* solve phase */
2718   mumps->id.job = JOB_SOLVE;
2719   PetscMUMPS_c(mumps);
2720   PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d INFO(2)=%d", mumps->id.INFOG(1), mumps->id.INFO(2));
2721 
2722   if (!mumps->myid) {
2723     PetscCall(MatSeqAIJRestoreArray(Btseq, &aa));
2724     PetscCall(MatRestoreRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
2725     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
2726   }
2727   PetscFunctionReturn(PETSC_SUCCESS);
2728 }
2729 
2730 /*@
2731   MatMumpsGetInverse - Get user-specified set of entries in inverse of `A`
2732 
2733    Logically Collective
2734 
2735    Input Parameter:
2736 .  F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2737 
2738   Output Parameter:
2739 . spRHS - sequential sparse matrix in `MATTRANSPOSEVIRTUAL` format with requested entries of inverse of `A`
2740 
2741    Level: beginner
2742 
2743    References:
2744 .  * - MUMPS Users' Guide
2745 
2746 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()`
2747 @*/
2748 PetscErrorCode MatMumpsGetInverse(Mat F, Mat spRHS)
2749 {
2750   PetscFunctionBegin;
2751   PetscValidType(F, 1);
2752   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2753   PetscUseMethod(F, "MatMumpsGetInverse_C", (Mat, Mat), (F, spRHS));
2754   PetscFunctionReturn(PETSC_SUCCESS);
2755 }
2756 
2757 PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F, Mat spRHST)
2758 {
2759   Mat spRHS;
2760 
2761   PetscFunctionBegin;
2762   PetscCall(MatCreateTranspose(spRHST, &spRHS));
2763   PetscCall(MatMumpsGetInverse_MUMPS(F, spRHS));
2764   PetscCall(MatDestroy(&spRHS));
2765   PetscFunctionReturn(PETSC_SUCCESS);
2766 }
2767 
2768 /*@
2769   MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix `A`^T
2770 
2771    Logically Collective
2772 
2773    Input Parameter:
2774 .  F - the factored matrix of A obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2775 
2776   Output Parameter:
2777 . spRHST - sequential sparse matrix in `MATAIJ` format containing the requested entries of inverse of `A`^T
2778 
2779    Level: beginner
2780 
2781    References:
2782 .  * - MUMPS Users' Guide
2783 
2784 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()`, `MatMumpsGetInverse()`
2785 @*/
2786 PetscErrorCode MatMumpsGetInverseTranspose(Mat F, Mat spRHST)
2787 {
2788   PetscBool flg;
2789 
2790   PetscFunctionBegin;
2791   PetscValidType(F, 1);
2792   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2793   PetscCall(PetscObjectTypeCompareAny((PetscObject)spRHST, &flg, MATSEQAIJ, MATMPIAIJ, NULL));
2794   PetscCheck(flg, PetscObjectComm((PetscObject)spRHST), PETSC_ERR_ARG_WRONG, "Matrix spRHST must be MATAIJ matrix");
2795 
2796   PetscUseMethod(F, "MatMumpsGetInverseTranspose_C", (Mat, Mat), (F, spRHST));
2797   PetscFunctionReturn(PETSC_SUCCESS);
2798 }
2799 
2800 /*@
2801   MatMumpsGetInfo - Get MUMPS parameter INFO()
2802 
2803    Logically Collective
2804 
2805    Input Parameters:
2806 +  F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2807 -  icntl - index of MUMPS parameter array INFO()
2808 
2809   Output Parameter:
2810 .  ival - value of MUMPS INFO(icntl)
2811 
2812    Level: beginner
2813 
2814    References:
2815 .  * - MUMPS Users' Guide
2816 
2817 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2818 @*/
2819 PetscErrorCode MatMumpsGetInfo(Mat F, PetscInt icntl, PetscInt *ival)
2820 {
2821   PetscFunctionBegin;
2822   PetscValidType(F, 1);
2823   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2824   PetscValidIntPointer(ival, 3);
2825   PetscUseMethod(F, "MatMumpsGetInfo_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival));
2826   PetscFunctionReturn(PETSC_SUCCESS);
2827 }
2828 
2829 /*@
2830   MatMumpsGetInfog - Get MUMPS parameter INFOG()
2831 
2832    Logically Collective
2833 
2834    Input Parameters:
2835 +  F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2836 -  icntl - index of MUMPS parameter array INFOG()
2837 
2838   Output Parameter:
2839 .  ival - value of MUMPS INFOG(icntl)
2840 
2841    Level: beginner
2842 
2843    References:
2844 .  * - MUMPS Users' Guide
2845 
2846 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2847 @*/
2848 PetscErrorCode MatMumpsGetInfog(Mat F, PetscInt icntl, PetscInt *ival)
2849 {
2850   PetscFunctionBegin;
2851   PetscValidType(F, 1);
2852   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2853   PetscValidIntPointer(ival, 3);
2854   PetscUseMethod(F, "MatMumpsGetInfog_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival));
2855   PetscFunctionReturn(PETSC_SUCCESS);
2856 }
2857 
2858 /*@
2859   MatMumpsGetRinfo - Get MUMPS parameter RINFO()
2860 
2861    Logically Collective
2862 
2863    Input Parameters:
2864 +  F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2865 -  icntl - index of MUMPS parameter array RINFO()
2866 
2867   Output Parameter:
2868 .  val - value of MUMPS RINFO(icntl)
2869 
2870    Level: beginner
2871 
2872    References:
2873 .  * - MUMPS Users' Guide
2874 
2875 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfog()`
2876 @*/
2877 PetscErrorCode MatMumpsGetRinfo(Mat F, PetscInt icntl, PetscReal *val)
2878 {
2879   PetscFunctionBegin;
2880   PetscValidType(F, 1);
2881   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2882   PetscValidRealPointer(val, 3);
2883   PetscUseMethod(F, "MatMumpsGetRinfo_C", (Mat, PetscInt, PetscReal *), (F, icntl, val));
2884   PetscFunctionReturn(PETSC_SUCCESS);
2885 }
2886 
2887 /*@
2888   MatMumpsGetRinfog - Get MUMPS parameter RINFOG()
2889 
2890    Logically Collective
2891 
2892    Input Parameters:
2893 +  F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2894 -  icntl - index of MUMPS parameter array RINFOG()
2895 
2896   Output Parameter:
2897 .  val - value of MUMPS RINFOG(icntl)
2898 
2899    Level: beginner
2900 
2901    References:
2902 .  * - MUMPS Users' Guide
2903 
2904 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`
2905 @*/
2906 PetscErrorCode MatMumpsGetRinfog(Mat F, PetscInt icntl, PetscReal *val)
2907 {
2908   PetscFunctionBegin;
2909   PetscValidType(F, 1);
2910   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2911   PetscValidRealPointer(val, 3);
2912   PetscUseMethod(F, "MatMumpsGetRinfog_C", (Mat, PetscInt, PetscReal *), (F, icntl, val));
2913   PetscFunctionReturn(PETSC_SUCCESS);
2914 }
2915 
2916 /*@
2917   MatMumpsGetNullPivots - Get MUMPS parameter PIVNUL_LIST()
2918 
2919    Logically Collective
2920 
2921    Input Parameter:
2922 .  F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2923 
2924   Output Parameters:
2925 +  size - local size of the array. The size of the array is non-zero only on the host.
2926 -  array - array of rows with null pivot, these rows follow 0-based indexing. The array gets allocated within the function and the user is responsible
2927            for freeing this array.
2928 
2929    Level: beginner
2930 
2931    References:
2932 .  * - MUMPS Users' Guide
2933 
2934 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`
2935 @*/
2936 PetscErrorCode MatMumpsGetNullPivots(Mat F, PetscInt *size, PetscInt **array)
2937 {
2938   PetscFunctionBegin;
2939   PetscValidType(F, 1);
2940   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2941   PetscValidIntPointer(size, 3);
2942   PetscValidPointer(array, 4);
2943   PetscUseMethod(F, "MatMumpsGetNullPivots_C", (Mat, PetscInt *, PetscInt **), (F, size, array));
2944   PetscFunctionReturn(PETSC_SUCCESS);
2945 }
2946 
2947 /*MC
2948   MATSOLVERMUMPS -  A matrix type providing direct solvers (LU and Cholesky) for
2949   distributed and sequential matrices via the external package MUMPS.
2950 
2951   Works with `MATAIJ` and `MATSBAIJ` matrices
2952 
2953   Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS
2954 
2955   Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode.
2956   See details below.
2957 
2958   Use `-pc_type cholesky` or `lu` `-pc_factor_mat_solver_type mumps` to use this direct solver
2959 
2960   Options Database Keys:
2961 +  -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages
2962 .  -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning
2963 .  -mat_mumps_icntl_3 -  ICNTL(3): output stream for global information, collected on the host
2964 .  -mat_mumps_icntl_4 -  ICNTL(4): level of printing (0 to 4)
2965 .  -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)
2966 .  -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto
2967                         Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only)
2968 .  -mat_mumps_icntl_8  - ICNTL(8): scaling strategy (-2 to 8 or 77)
2969 .  -mat_mumps_icntl_10  - ICNTL(10): max num of refinements
2970 .  -mat_mumps_icntl_11  - ICNTL(11): statistics related to an error analysis (via -ksp_view)
2971 .  -mat_mumps_icntl_12  - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)
2972 .  -mat_mumps_icntl_13  - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting
2973 .  -mat_mumps_icntl_14  - ICNTL(14): percentage increase in the estimated working space
2974 .  -mat_mumps_icntl_15  - ICNTL(15): compression of the input matrix resulting from a block format
2975 .  -mat_mumps_icntl_19  - ICNTL(19): computes the Schur complement
2976 .  -mat_mumps_icntl_20  - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS
2977 .  -mat_mumps_icntl_22  - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)
2978 .  -mat_mumps_icntl_23  - ICNTL(23): max size of the working memory (MB) that can allocate per processor
2979 .  -mat_mumps_icntl_24  - ICNTL(24): detection of null pivot rows (0 or 1)
2980 .  -mat_mumps_icntl_25  - ICNTL(25): compute a solution of a deficient matrix and a null space basis
2981 .  -mat_mumps_icntl_26  - ICNTL(26): drives the solution phase if a Schur complement matrix
2982 .  -mat_mumps_icntl_28  - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering
2983 .  -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis
2984 .  -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A)
2985 .  -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization
2986 .  -mat_mumps_icntl_33 - ICNTL(33): compute determinant
2987 .  -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature
2988 .  -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant
2989 .  -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR
2990 .  -mat_mumps_cntl_1  - CNTL(1): relative pivoting threshold
2991 .  -mat_mumps_cntl_2  -  CNTL(2): stopping criterion of refinement
2992 .  -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold
2993 .  -mat_mumps_cntl_4 - CNTL(4): value for static pivoting
2994 .  -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots
2995 .  -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization
2996 -  -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS.
2997                                    Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual.
2998 
2999   Level: beginner
3000 
3001     Notes:
3002     MUMPS Cholesky does not handle (complex) Hermitian matrices (see User's Guide at https://mumps-solver.org/index.php?page=doc) so using it will
3003     error if the matrix is Hermitian.
3004 
3005     When used within a `KSP`/`PC` solve the options are prefixed with that of the `PC`. Otherwise one can set the options prefix by calling
3006     `MatSetOptionsPrefixFactor()` on the matrix from which the factor was obtained or `MatSetOptionsPrefix()` on the factor matrix.
3007 
3008     When a MUMPS factorization fails inside a KSP solve, for example with a `KSP_DIVERGED_PC_FAILED`, one can find the MUMPS information about
3009     the failure with
3010 .vb
3011           KSPGetPC(ksp,&pc);
3012           PCFactorGetMatrix(pc,&mat);
3013           MatMumpsGetInfo(mat,....);
3014           MatMumpsGetInfog(mat,....); etc.
3015 .ve
3016     Or run with `-ksp_error_if_not_converged` and the program will be stopped and the information printed in the error message.
3017 
3018     MUMPS provides 64-bit integer support in two build modes:
3019       full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and
3020       requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI).
3021 
3022       selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices,
3023       MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and
3024       columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit
3025       integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS.
3026 
3027     With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc.
3028 
3029   Two modes to run MUMPS/PETSc with OpenMP
3030 .vb
3031      Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP
3032      threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test".
3033 .ve
3034 
3035 .vb
3036      -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example,
3037     if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16"
3038 .ve
3039 
3040    To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part
3041    (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with `--with-openmp` `--download-hwloc`
3042    (or `--with-hwloc`), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS
3043    libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS
3044    (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided).
3045 
3046    If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI
3047    processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of
3048    size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm
3049    are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set
3050    by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs.
3051    In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets,
3052    if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind
3053    MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half
3054    cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the
3055    problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding.
3056    For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and
3057    examine the mapping result.
3058 
3059    PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set `OMP_PROC_BIND` and `OMP_PLACES` in job scripts,
3060    for example, export `OMP_PLACES`=threads and export `OMP_PROC_BIND`=spread. One does not need to export `OMP_NUM_THREADS`=m in job scripts as PETSc
3061    calls `omp_set_num_threads`(m) internally before calling MUMPS.
3062 
3063    References:
3064 +  * - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011).
3065 -  * - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017.
3066 
3067 .seealso: [](ch_matrices), `Mat`, `PCFactorSetMatSolverType()`, `MatSolverType`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`, `KSPGetPC()`, `PCFactorGetMatrix()`
3068 M*/
3069 
3070 static PetscErrorCode MatFactorGetSolverType_mumps(Mat A, MatSolverType *type)
3071 {
3072   PetscFunctionBegin;
3073   *type = MATSOLVERMUMPS;
3074   PetscFunctionReturn(PETSC_SUCCESS);
3075 }
3076 
3077 /* MatGetFactor for Seq and MPI AIJ matrices */
3078 static PetscErrorCode MatGetFactor_aij_mumps(Mat A, MatFactorType ftype, Mat *F)
3079 {
3080   Mat         B;
3081   Mat_MUMPS  *mumps;
3082   PetscBool   isSeqAIJ;
3083   PetscMPIInt size;
3084 
3085   PetscFunctionBegin;
3086 #if defined(PETSC_USE_COMPLEX)
3087   PetscCheck(A->hermitian != PETSC_BOOL3_TRUE || A->symmetric == PETSC_BOOL3_TRUE || ftype != MAT_FACTOR_CHOLESKY, PETSC_COMM_SELF, PETSC_ERR_SUP, "Hermitian CHOLESKY Factor is not supported");
3088 #endif
3089   /* Create the factorization matrix */
3090   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isSeqAIJ));
3091   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
3092   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
3093   PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name));
3094   PetscCall(MatSetUp(B));
3095 
3096   PetscCall(PetscNew(&mumps));
3097 
3098   B->ops->view    = MatView_MUMPS;
3099   B->ops->getinfo = MatGetInfo_MUMPS;
3100 
3101   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
3102   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
3103   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
3104   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
3105   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
3106   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
3107   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
3108   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
3109   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
3110   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
3111   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
3112   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
3113   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
3114   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
3115 
3116   if (ftype == MAT_FACTOR_LU) {
3117     B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
3118     B->factortype            = MAT_FACTOR_LU;
3119     if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij;
3120     else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij;
3121     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]));
3122     mumps->sym = 0;
3123   } else {
3124     B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
3125     B->factortype                  = MAT_FACTOR_CHOLESKY;
3126     if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij;
3127     else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij;
3128     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY]));
3129 #if defined(PETSC_USE_COMPLEX)
3130     mumps->sym = 2;
3131 #else
3132     if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1;
3133     else mumps->sym = 2;
3134 #endif
3135   }
3136 
3137   /* set solvertype */
3138   PetscCall(PetscFree(B->solvertype));
3139   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
3140   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
3141   if (size == 1) {
3142     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3143     B->canuseordering = PETSC_TRUE;
3144   }
3145   B->ops->destroy = MatDestroy_MUMPS;
3146   B->data         = (void *)mumps;
3147 
3148   *F               = B;
3149   mumps->id.job    = JOB_NULL;
3150   mumps->ICNTL_pre = NULL;
3151   mumps->CNTL_pre  = NULL;
3152   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
3153   PetscFunctionReturn(PETSC_SUCCESS);
3154 }
3155 
3156 /* MatGetFactor for Seq and MPI SBAIJ matrices */
3157 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A, MatFactorType ftype, Mat *F)
3158 {
3159   Mat         B;
3160   Mat_MUMPS  *mumps;
3161   PetscBool   isSeqSBAIJ;
3162   PetscMPIInt size;
3163 
3164   PetscFunctionBegin;
3165 #if defined(PETSC_USE_COMPLEX)
3166   PetscCheck(A->hermitian != PETSC_BOOL3_TRUE || A->symmetric == PETSC_BOOL3_TRUE, PETSC_COMM_SELF, PETSC_ERR_SUP, "Hermitian CHOLESKY Factor is not supported");
3167 #endif
3168   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
3169   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
3170   PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name));
3171   PetscCall(MatSetUp(B));
3172 
3173   PetscCall(PetscNew(&mumps));
3174   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &isSeqSBAIJ));
3175   if (isSeqSBAIJ) {
3176     mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij;
3177   } else {
3178     mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij;
3179   }
3180 
3181   B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
3182   B->ops->view                   = MatView_MUMPS;
3183   B->ops->getinfo                = MatGetInfo_MUMPS;
3184 
3185   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
3186   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
3187   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
3188   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
3189   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
3190   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
3191   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
3192   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
3193   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
3194   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
3195   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
3196   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
3197   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
3198   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
3199 
3200   B->factortype = MAT_FACTOR_CHOLESKY;
3201 #if defined(PETSC_USE_COMPLEX)
3202   mumps->sym = 2;
3203 #else
3204   if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1;
3205   else mumps->sym = 2;
3206 #endif
3207 
3208   /* set solvertype */
3209   PetscCall(PetscFree(B->solvertype));
3210   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
3211   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
3212   if (size == 1) {
3213     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3214     B->canuseordering = PETSC_TRUE;
3215   }
3216   PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY]));
3217   B->ops->destroy = MatDestroy_MUMPS;
3218   B->data         = (void *)mumps;
3219 
3220   *F               = B;
3221   mumps->id.job    = JOB_NULL;
3222   mumps->ICNTL_pre = NULL;
3223   mumps->CNTL_pre  = NULL;
3224   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
3225   PetscFunctionReturn(PETSC_SUCCESS);
3226 }
3227 
3228 static PetscErrorCode MatGetFactor_baij_mumps(Mat A, MatFactorType ftype, Mat *F)
3229 {
3230   Mat         B;
3231   Mat_MUMPS  *mumps;
3232   PetscBool   isSeqBAIJ;
3233   PetscMPIInt size;
3234 
3235   PetscFunctionBegin;
3236   /* Create the factorization matrix */
3237   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &isSeqBAIJ));
3238   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
3239   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
3240   PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name));
3241   PetscCall(MatSetUp(B));
3242 
3243   PetscCall(PetscNew(&mumps));
3244   if (ftype == MAT_FACTOR_LU) {
3245     B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS;
3246     B->factortype            = MAT_FACTOR_LU;
3247     if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij;
3248     else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij;
3249     mumps->sym = 0;
3250     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]));
3251   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead");
3252 
3253   B->ops->view    = MatView_MUMPS;
3254   B->ops->getinfo = MatGetInfo_MUMPS;
3255 
3256   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
3257   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
3258   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
3259   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
3260   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
3261   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
3262   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
3263   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
3264   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
3265   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
3266   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
3267   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
3268   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
3269   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
3270 
3271   /* set solvertype */
3272   PetscCall(PetscFree(B->solvertype));
3273   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
3274   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
3275   if (size == 1) {
3276     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3277     B->canuseordering = PETSC_TRUE;
3278   }
3279   B->ops->destroy = MatDestroy_MUMPS;
3280   B->data         = (void *)mumps;
3281 
3282   *F               = B;
3283   mumps->id.job    = JOB_NULL;
3284   mumps->ICNTL_pre = NULL;
3285   mumps->CNTL_pre  = NULL;
3286   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
3287   PetscFunctionReturn(PETSC_SUCCESS);
3288 }
3289 
3290 /* MatGetFactor for Seq and MPI SELL matrices */
3291 static PetscErrorCode MatGetFactor_sell_mumps(Mat A, MatFactorType ftype, Mat *F)
3292 {
3293   Mat         B;
3294   Mat_MUMPS  *mumps;
3295   PetscBool   isSeqSELL;
3296   PetscMPIInt size;
3297 
3298   PetscFunctionBegin;
3299   /* Create the factorization matrix */
3300   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSELL, &isSeqSELL));
3301   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
3302   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
3303   PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name));
3304   PetscCall(MatSetUp(B));
3305 
3306   PetscCall(PetscNew(&mumps));
3307 
3308   B->ops->view    = MatView_MUMPS;
3309   B->ops->getinfo = MatGetInfo_MUMPS;
3310 
3311   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
3312   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
3313   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
3314   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
3315   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
3316   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
3317   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
3318   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
3319   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
3320   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
3321   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
3322   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
3323 
3324   if (ftype == MAT_FACTOR_LU) {
3325     B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
3326     B->factortype            = MAT_FACTOR_LU;
3327     if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij;
3328     else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented");
3329     mumps->sym = 0;
3330     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]));
3331   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented");
3332 
3333   /* set solvertype */
3334   PetscCall(PetscFree(B->solvertype));
3335   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
3336   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
3337   if (size == 1) {
3338     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization  */
3339     B->canuseordering = PETSC_TRUE;
3340   }
3341   B->ops->destroy = MatDestroy_MUMPS;
3342   B->data         = (void *)mumps;
3343 
3344   *F               = B;
3345   mumps->id.job    = JOB_NULL;
3346   mumps->ICNTL_pre = NULL;
3347   mumps->CNTL_pre  = NULL;
3348   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
3349   PetscFunctionReturn(PETSC_SUCCESS);
3350 }
3351 
3352 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void)
3353 {
3354   PetscFunctionBegin;
3355   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
3356   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
3357   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps));
3358   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps));
3359   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPISBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps));
3360   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
3361   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
3362   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps));
3363   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps));
3364   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps));
3365   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSELL, MAT_FACTOR_LU, MatGetFactor_sell_mumps));
3366   PetscFunctionReturn(PETSC_SUCCESS);
3367 }
3368