xref: /petsc/src/mat/impls/aij/mpi/mumps/mumps.c (revision e44f6aeb0c7eee2716d50e70cd976c48a5d3c225)
1 /*
2     Provides an interface to the MUMPS sparse solver
3 */
4 #include <petscpkg_version.h>
5 #include <petscsf.h>
6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I  "petscmat.h"  I*/
7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
8 #include <../src/mat/impls/sell/mpi/mpisell.h>
9 
10 #define MUMPS_MANUALS "(see users manual https://mumps-solver.org/index.php?page=doc \"Error and warning diagnostics\")"
11 
12 EXTERN_C_BEGIN
13 #if defined(PETSC_USE_COMPLEX)
14   #if defined(PETSC_USE_REAL_SINGLE)
15     #include <cmumps_c.h>
16   #else
17     #include <zmumps_c.h>
18   #endif
19 #else
20   #if defined(PETSC_USE_REAL_SINGLE)
21     #include <smumps_c.h>
22   #else
23     #include <dmumps_c.h>
24   #endif
25 #endif
26 EXTERN_C_END
27 #define JOB_INIT         -1
28 #define JOB_NULL         0
29 #define JOB_FACTSYMBOLIC 1
30 #define JOB_FACTNUMERIC  2
31 #define JOB_SOLVE        3
32 #define JOB_END          -2
33 
34 /* calls to MUMPS */
35 #if defined(PETSC_USE_COMPLEX)
36   #if defined(PETSC_USE_REAL_SINGLE)
37     #define MUMPS_c cmumps_c
38   #else
39     #define MUMPS_c zmumps_c
40   #endif
41 #else
42   #if defined(PETSC_USE_REAL_SINGLE)
43     #define MUMPS_c smumps_c
44   #else
45     #define MUMPS_c dmumps_c
46   #endif
47 #endif
48 
49 /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for
50    number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the
51    naming convention in PetscMPIInt, PetscBLASInt etc.
52 */
53 typedef MUMPS_INT PetscMUMPSInt;
54 
55 #if PETSC_PKG_MUMPS_VERSION_GE(5, 3, 0)
56   #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */
57     #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out"
58   #endif
59 #else
60   #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */
61     #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out"
62   #endif
63 #endif
64 
65 #define MPIU_MUMPSINT       MPI_INT
66 #define PETSC_MUMPS_INT_MAX 2147483647
67 #define PETSC_MUMPS_INT_MIN -2147483648
68 
69 /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/
70 static inline PetscErrorCode PetscMUMPSIntCast(PetscInt a, PetscMUMPSInt *b)
71 {
72   PetscFunctionBegin;
73 #if PetscDefined(USE_64BIT_INDICES)
74   PetscAssert(a <= PETSC_MUMPS_INT_MAX && a >= PETSC_MUMPS_INT_MIN, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt");
75 #endif
76   *b = (PetscMUMPSInt)(a);
77   PetscFunctionReturn(PETSC_SUCCESS);
78 }
79 
80 /* Put these utility routines here since they are only used in this file */
81 static inline PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems *PetscOptionsObject, const char opt[], const char text[], const char man[], PetscMUMPSInt currentvalue, PetscMUMPSInt *value, PetscBool *set, PetscMUMPSInt lb, PetscMUMPSInt ub)
82 {
83   PetscInt  myval;
84   PetscBool myset;
85   PetscFunctionBegin;
86   /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */
87   PetscCall(PetscOptionsInt_Private(PetscOptionsObject, opt, text, man, (PetscInt)currentvalue, &myval, &myset, lb, ub));
88   if (myset) PetscCall(PetscMUMPSIntCast(myval, value));
89   if (set) *set = myset;
90   PetscFunctionReturn(PETSC_SUCCESS);
91 }
92 #define PetscOptionsMUMPSInt(a, b, c, d, e, f) PetscOptionsMUMPSInt_Private(PetscOptionsObject, a, b, c, d, e, f, PETSC_MUMPS_INT_MIN, PETSC_MUMPS_INT_MAX)
93 
94 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */
95 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
96   #define PetscMUMPS_c(mumps) \
97     do { \
98       if (mumps->use_petsc_omp_support) { \
99         if (mumps->is_omp_master) { \
100           PetscCall(PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl)); \
101           PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \
102           PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \
103           PetscCall(PetscFPTrapPop()); \
104           PetscCall(PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl)); \
105         } \
106         PetscCall(PetscOmpCtrlBarrier(mumps->omp_ctrl)); \
107         /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific      \
108          to processes, so we only Bcast info[1], an error code and leave others (since they do not have   \
109          an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82.                   \
110          omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \
111       */ \
112         PetscCallMPI(MPI_Bcast(mumps->id.infog, PETSC_STATIC_ARRAY_LENGTH(mumps->id.infog), MPIU_MUMPSINT, 0, mumps->omp_comm)); \
113         PetscCallMPI(MPI_Bcast(mumps->id.rinfog, PETSC_STATIC_ARRAY_LENGTH(mumps->id.rinfog), MPIU_REAL, 0, mumps->omp_comm)); \
114         PetscCallMPI(MPI_Bcast(mumps->id.info, PETSC_STATIC_ARRAY_LENGTH(mumps->id.info), MPIU_MUMPSINT, 0, mumps->omp_comm)); \
115         PetscCallMPI(MPI_Bcast(mumps->id.rinfo, PETSC_STATIC_ARRAY_LENGTH(mumps->id.rinfo), MPIU_REAL, 0, mumps->omp_comm)); \
116       } else { \
117         PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \
118         PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \
119         PetscCall(PetscFPTrapPop()); \
120       } \
121     } while (0)
122 #else
123   #define PetscMUMPS_c(mumps) \
124     do { \
125       PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \
126       PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \
127       PetscCall(PetscFPTrapPop()); \
128     } while (0)
129 #endif
130 
131 /* declare MumpsScalar */
132 #if defined(PETSC_USE_COMPLEX)
133   #if defined(PETSC_USE_REAL_SINGLE)
134     #define MumpsScalar mumps_complex
135   #else
136     #define MumpsScalar mumps_double_complex
137   #endif
138 #else
139   #define MumpsScalar PetscScalar
140 #endif
141 
142 /* macros s.t. indices match MUMPS documentation */
143 #define ICNTL(I)  icntl[(I)-1]
144 #define CNTL(I)   cntl[(I)-1]
145 #define INFOG(I)  infog[(I)-1]
146 #define INFO(I)   info[(I)-1]
147 #define RINFOG(I) rinfog[(I)-1]
148 #define RINFO(I)  rinfo[(I)-1]
149 
150 typedef struct Mat_MUMPS Mat_MUMPS;
151 struct Mat_MUMPS {
152 #if defined(PETSC_USE_COMPLEX)
153   #if defined(PETSC_USE_REAL_SINGLE)
154   CMUMPS_STRUC_C id;
155   #else
156   ZMUMPS_STRUC_C id;
157   #endif
158 #else
159   #if defined(PETSC_USE_REAL_SINGLE)
160   SMUMPS_STRUC_C id;
161   #else
162   DMUMPS_STRUC_C id;
163   #endif
164 #endif
165 
166   MatStructure   matstruc;
167   PetscMPIInt    myid, petsc_size;
168   PetscMUMPSInt *irn, *jcn;       /* the (i,j,v) triplets passed to mumps. */
169   PetscScalar   *val, *val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */
170   PetscInt64     nnz;             /* number of nonzeros. The type is called selective 64-bit in mumps */
171   PetscMUMPSInt  sym;
172   MPI_Comm       mumps_comm;
173   PetscMUMPSInt *ICNTL_pre;
174   PetscReal     *CNTL_pre;
175   PetscMUMPSInt  ICNTL9_pre;         /* check if ICNTL(9) is changed from previous MatSolve */
176   VecScatter     scat_rhs, scat_sol; /* used by MatSolve() */
177   PetscMUMPSInt  ICNTL20;            /* use centralized (0) or distributed (10) dense RHS */
178   PetscMUMPSInt  lrhs_loc, nloc_rhs, *irhs_loc;
179 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
180   PetscInt    *rhs_nrow, max_nrhs;
181   PetscMPIInt *rhs_recvcounts, *rhs_disps;
182   PetscScalar *rhs_loc, *rhs_recvbuf;
183 #endif
184   Vec            b_seq, x_seq;
185   PetscInt       ninfo, *info; /* which INFO to display */
186   PetscInt       sizeredrhs;
187   PetscScalar   *schur_sol;
188   PetscInt       schur_sizesol;
189   PetscMUMPSInt *ia_alloc, *ja_alloc; /* work arrays used for the CSR struct for sparse rhs */
190   PetscInt64     cur_ilen, cur_jlen;  /* current len of ia_alloc[], ja_alloc[] */
191   PetscErrorCode (*ConvertToTriples)(Mat, PetscInt, MatReuse, Mat_MUMPS *);
192 
193   /* Support for MATNEST */
194   PetscErrorCode (**nest_convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *);
195   PetscInt64  *nest_vals_start;
196   PetscScalar *nest_vals;
197 
198   /* stuff used by petsc/mumps OpenMP support*/
199   PetscBool    use_petsc_omp_support;
200   PetscOmpCtrl omp_ctrl;             /* an OpenMP controller that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */
201   MPI_Comm     petsc_comm, omp_comm; /* petsc_comm is petsc matrix's comm */
202   PetscInt64  *recvcount;            /* a collection of nnz on omp_master */
203   PetscMPIInt  tag, omp_comm_size;
204   PetscBool    is_omp_master; /* is this rank the master of omp_comm */
205   MPI_Request *reqs;
206 };
207 
208 /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt.
209    Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices.
210  */
211 static PetscErrorCode PetscMUMPSIntCSRCast(Mat_MUMPS *mumps, PetscInt nrow, PetscInt *ia, PetscInt *ja, PetscMUMPSInt **ia_mumps, PetscMUMPSInt **ja_mumps, PetscMUMPSInt *nnz_mumps)
212 {
213   PetscInt nnz = ia[nrow] - 1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscInt64 since mumps only uses PetscMUMPSInt for rhs */
214 
215   PetscFunctionBegin;
216 #if defined(PETSC_USE_64BIT_INDICES)
217   {
218     PetscInt i;
219     if (nrow + 1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */
220       PetscCall(PetscFree(mumps->ia_alloc));
221       PetscCall(PetscMalloc1(nrow + 1, &mumps->ia_alloc));
222       mumps->cur_ilen = nrow + 1;
223     }
224     if (nnz > mumps->cur_jlen) {
225       PetscCall(PetscFree(mumps->ja_alloc));
226       PetscCall(PetscMalloc1(nnz, &mumps->ja_alloc));
227       mumps->cur_jlen = nnz;
228     }
229     for (i = 0; i < nrow + 1; i++) PetscCall(PetscMUMPSIntCast(ia[i], &(mumps->ia_alloc[i])));
230     for (i = 0; i < nnz; i++) PetscCall(PetscMUMPSIntCast(ja[i], &(mumps->ja_alloc[i])));
231     *ia_mumps = mumps->ia_alloc;
232     *ja_mumps = mumps->ja_alloc;
233   }
234 #else
235   *ia_mumps = ia;
236   *ja_mumps = ja;
237 #endif
238   PetscCall(PetscMUMPSIntCast(nnz, nnz_mumps));
239   PetscFunctionReturn(PETSC_SUCCESS);
240 }
241 
242 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS *mumps)
243 {
244   PetscFunctionBegin;
245   PetscCall(PetscFree(mumps->id.listvar_schur));
246   PetscCall(PetscFree(mumps->id.redrhs));
247   PetscCall(PetscFree(mumps->schur_sol));
248   mumps->id.size_schur = 0;
249   mumps->id.schur_lld  = 0;
250   mumps->id.ICNTL(19)  = 0;
251   PetscFunctionReturn(PETSC_SUCCESS);
252 }
253 
254 /* solve with rhs in mumps->id.redrhs and return in the same location */
255 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F)
256 {
257   Mat_MUMPS           *mumps = (Mat_MUMPS *)F->data;
258   Mat                  S, B, X;
259   MatFactorSchurStatus schurstatus;
260   PetscInt             sizesol;
261 
262   PetscFunctionBegin;
263   PetscCall(MatFactorFactorizeSchurComplement(F));
264   PetscCall(MatFactorGetSchurComplement(F, &S, &schurstatus));
265   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &B));
266   PetscCall(MatSetType(B, ((PetscObject)S)->type_name));
267 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
268   PetscCall(MatBindToCPU(B, S->boundtocpu));
269 #endif
270   switch (schurstatus) {
271   case MAT_FACTOR_SCHUR_FACTORED:
272     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &X));
273     PetscCall(MatSetType(X, ((PetscObject)S)->type_name));
274 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
275     PetscCall(MatBindToCPU(X, S->boundtocpu));
276 #endif
277     if (!mumps->id.ICNTL(9)) { /* transpose solve */
278       PetscCall(MatMatSolveTranspose(S, B, X));
279     } else {
280       PetscCall(MatMatSolve(S, B, X));
281     }
282     break;
283   case MAT_FACTOR_SCHUR_INVERTED:
284     sizesol = mumps->id.nrhs * mumps->id.size_schur;
285     if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) {
286       PetscCall(PetscFree(mumps->schur_sol));
287       PetscCall(PetscMalloc1(sizesol, &mumps->schur_sol));
288       mumps->schur_sizesol = sizesol;
289     }
290     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->schur_sol, &X));
291     PetscCall(MatSetType(X, ((PetscObject)S)->type_name));
292 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
293     PetscCall(MatBindToCPU(X, S->boundtocpu));
294 #endif
295     PetscCall(MatProductCreateWithMat(S, B, NULL, X));
296     if (!mumps->id.ICNTL(9)) { /* transpose solve */
297       PetscCall(MatProductSetType(X, MATPRODUCT_AtB));
298     } else {
299       PetscCall(MatProductSetType(X, MATPRODUCT_AB));
300     }
301     PetscCall(MatProductSetFromOptions(X));
302     PetscCall(MatProductSymbolic(X));
303     PetscCall(MatProductNumeric(X));
304 
305     PetscCall(MatCopy(X, B, SAME_NONZERO_PATTERN));
306     break;
307   default:
308     SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unhandled MatFactorSchurStatus %d", F->schur_status);
309   }
310   PetscCall(MatFactorRestoreSchurComplement(F, &S, schurstatus));
311   PetscCall(MatDestroy(&B));
312   PetscCall(MatDestroy(&X));
313   PetscFunctionReturn(PETSC_SUCCESS);
314 }
315 
316 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion)
317 {
318   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
319 
320   PetscFunctionBegin;
321   if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */
322     PetscFunctionReturn(PETSC_SUCCESS);
323   }
324   if (!expansion) { /* prepare for the condensation step */
325     PetscInt sizeredrhs = mumps->id.nrhs * mumps->id.size_schur;
326     /* allocate MUMPS internal array to store reduced right-hand sides */
327     if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) {
328       PetscCall(PetscFree(mumps->id.redrhs));
329       mumps->id.lredrhs = mumps->id.size_schur;
330       PetscCall(PetscMalloc1(mumps->id.nrhs * mumps->id.lredrhs, &mumps->id.redrhs));
331       mumps->sizeredrhs = mumps->id.nrhs * mumps->id.lredrhs;
332     }
333   } else { /* prepare for the expansion step */
334     /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */
335     PetscCall(MatMumpsSolveSchur_Private(F));
336     mumps->id.ICNTL(26) = 2; /* expansion phase */
337     PetscMUMPS_c(mumps);
338     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
339     /* restore defaults */
340     mumps->id.ICNTL(26) = -1;
341     /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */
342     if (mumps->id.nrhs > 1) {
343       PetscCall(PetscFree(mumps->id.redrhs));
344       mumps->id.lredrhs = 0;
345       mumps->sizeredrhs = 0;
346     }
347   }
348   PetscFunctionReturn(PETSC_SUCCESS);
349 }
350 
351 /*
352   MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz]
353 
354   input:
355     A       - matrix in aij,baij or sbaij format
356     shift   - 0: C style output triple; 1: Fortran style output triple.
357     reuse   - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple
358               MAT_REUSE_MATRIX:   only the values in v array are updated
359   output:
360     nnz     - dim of r, c, and v (number of local nonzero entries of A)
361     r, c, v - row and col index, matrix values (matrix triples)
362 
363   The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is
364   freed with PetscFree(mumps->irn);  This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means
365   that the PetscMalloc() cannot easily be replaced with a PetscMalloc3().
366 
367  */
368 
369 static PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
370 {
371   const PetscScalar *av;
372   const PetscInt    *ai, *aj, *ajj, M = A->rmap->n;
373   PetscInt64         nz, rnz, i, j, k;
374   PetscMUMPSInt     *row, *col;
375   Mat_SeqAIJ        *aa = (Mat_SeqAIJ *)A->data;
376 
377   PetscFunctionBegin;
378   PetscCall(MatSeqAIJGetArrayRead(A, &av));
379   if (reuse == MAT_INITIAL_MATRIX) {
380     nz = aa->nz;
381     ai = aa->i;
382     aj = aa->j;
383     PetscCall(PetscMalloc2(nz, &row, nz, &col));
384     for (i = k = 0; i < M; i++) {
385       rnz = ai[i + 1] - ai[i];
386       ajj = aj + ai[i];
387       for (j = 0; j < rnz; j++) {
388         PetscCall(PetscMUMPSIntCast(i + shift, &row[k]));
389         PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[k]));
390         k++;
391       }
392     }
393     mumps->val = (PetscScalar *)av;
394     mumps->irn = row;
395     mumps->jcn = col;
396     mumps->nnz = nz;
397   } else PetscCall(PetscArraycpy(mumps->val, av, aa->nz));
398   PetscCall(MatSeqAIJRestoreArrayRead(A, &av));
399   PetscFunctionReturn(PETSC_SUCCESS);
400 }
401 
402 static PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
403 {
404   PetscInt64     nz, i, j, k, r;
405   Mat_SeqSELL   *a = (Mat_SeqSELL *)A->data;
406   PetscMUMPSInt *row, *col;
407 
408   PetscFunctionBegin;
409   nz = a->sliidx[a->totalslices];
410   if (reuse == MAT_INITIAL_MATRIX) {
411     PetscCall(PetscMalloc2(nz, &row, nz, &col));
412     for (i = k = 0; i < a->totalslices; i++) {
413       for (j = a->sliidx[i], r = 0; j < a->sliidx[i + 1]; j++, r = ((r + 1) & 0x07)) PetscCall(PetscMUMPSIntCast(8 * i + r + shift, &row[k++]));
414     }
415     for (i = 0; i < nz; i++) PetscCall(PetscMUMPSIntCast(a->colidx[i] + shift, &col[i]));
416     mumps->irn = row;
417     mumps->jcn = col;
418     mumps->nnz = nz;
419     mumps->val = a->val;
420   } else PetscCall(PetscArraycpy(mumps->val, a->val, nz));
421   PetscFunctionReturn(PETSC_SUCCESS);
422 }
423 
424 static PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
425 {
426   Mat_SeqBAIJ    *aa = (Mat_SeqBAIJ *)A->data;
427   const PetscInt *ai, *aj, *ajj, bs2 = aa->bs2;
428   PetscInt64      M, nz = bs2 * aa->nz, idx = 0, rnz, i, j, k, m;
429   PetscInt        bs;
430   PetscMUMPSInt  *row, *col;
431 
432   PetscFunctionBegin;
433   if (reuse == MAT_INITIAL_MATRIX) {
434     PetscCall(MatGetBlockSize(A, &bs));
435     M  = A->rmap->N / bs;
436     ai = aa->i;
437     aj = aa->j;
438     PetscCall(PetscMalloc2(nz, &row, nz, &col));
439     for (i = 0; i < M; i++) {
440       ajj = aj + ai[i];
441       rnz = ai[i + 1] - ai[i];
442       for (k = 0; k < rnz; k++) {
443         for (j = 0; j < bs; j++) {
444           for (m = 0; m < bs; m++) {
445             PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[idx]));
446             PetscCall(PetscMUMPSIntCast(bs * ajj[k] + j + shift, &col[idx]));
447             idx++;
448           }
449         }
450       }
451     }
452     mumps->irn = row;
453     mumps->jcn = col;
454     mumps->nnz = nz;
455     mumps->val = aa->a;
456   } else PetscCall(PetscArraycpy(mumps->val, aa->a, nz));
457   PetscFunctionReturn(PETSC_SUCCESS);
458 }
459 
460 static PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
461 {
462   const PetscInt *ai, *aj, *ajj;
463   PetscInt        bs;
464   PetscInt64      nz, rnz, i, j, k, m;
465   PetscMUMPSInt  *row, *col;
466   PetscScalar    *val;
467   Mat_SeqSBAIJ   *aa  = (Mat_SeqSBAIJ *)A->data;
468   const PetscInt  bs2 = aa->bs2, mbs = aa->mbs;
469 #if defined(PETSC_USE_COMPLEX)
470   PetscBool isset, hermitian;
471 #endif
472 
473   PetscFunctionBegin;
474 #if defined(PETSC_USE_COMPLEX)
475   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
476   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
477 #endif
478   ai = aa->i;
479   aj = aa->j;
480   PetscCall(MatGetBlockSize(A, &bs));
481   if (reuse == MAT_INITIAL_MATRIX) {
482     const PetscInt64 alloc_size = aa->nz * bs2;
483 
484     PetscCall(PetscMalloc2(alloc_size, &row, alloc_size, &col));
485     if (bs > 1) {
486       PetscCall(PetscMalloc1(alloc_size, &mumps->val_alloc));
487       mumps->val = mumps->val_alloc;
488     } else {
489       mumps->val = aa->a;
490     }
491     mumps->irn = row;
492     mumps->jcn = col;
493   } else {
494     row = mumps->irn;
495     col = mumps->jcn;
496   }
497   val = mumps->val;
498 
499   nz = 0;
500   if (bs > 1) {
501     for (i = 0; i < mbs; i++) {
502       rnz = ai[i + 1] - ai[i];
503       ajj = aj + ai[i];
504       for (j = 0; j < rnz; j++) {
505         for (k = 0; k < bs; k++) {
506           for (m = 0; m < bs; m++) {
507             if (ajj[j] > i || k >= m) {
508               if (reuse == MAT_INITIAL_MATRIX) {
509                 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[nz]));
510                 PetscCall(PetscMUMPSIntCast(ajj[j] * bs + k + shift, &col[nz]));
511               }
512               val[nz++] = aa->a[(ai[i] + j) * bs2 + m + k * bs];
513             }
514           }
515         }
516       }
517     }
518   } else if (reuse == MAT_INITIAL_MATRIX) {
519     for (i = 0; i < mbs; i++) {
520       rnz = ai[i + 1] - ai[i];
521       ajj = aj + ai[i];
522       for (j = 0; j < rnz; j++) {
523         PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
524         PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz]));
525         nz++;
526       }
527     }
528     PetscCheck(nz == aa->nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different numbers of nonzeros %" PetscInt64_FMT " != %" PetscInt_FMT, nz, aa->nz);
529   } else PetscCall(PetscArraycpy(mumps->val, aa->a, aa->nz)); /* bs == 1 and MAT_REUSE_MATRIX */
530   if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz;
531   PetscFunctionReturn(PETSC_SUCCESS);
532 }
533 
534 static PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
535 {
536   const PetscInt    *ai, *aj, *ajj, *adiag, M = A->rmap->n;
537   PetscInt64         nz, rnz, i, j;
538   const PetscScalar *av, *v1;
539   PetscScalar       *val;
540   PetscMUMPSInt     *row, *col;
541   Mat_SeqAIJ        *aa = (Mat_SeqAIJ *)A->data;
542   PetscBool          missing;
543 #if defined(PETSC_USE_COMPLEX)
544   PetscBool hermitian, isset;
545 #endif
546 
547   PetscFunctionBegin;
548 #if defined(PETSC_USE_COMPLEX)
549   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
550   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
551 #endif
552   PetscCall(MatSeqAIJGetArrayRead(A, &av));
553   ai    = aa->i;
554   aj    = aa->j;
555   adiag = aa->diag;
556   PetscCall(MatMissingDiagonal_SeqAIJ(A, &missing, NULL));
557   if (reuse == MAT_INITIAL_MATRIX) {
558     /* count nz in the upper triangular part of A */
559     nz = 0;
560     if (missing) {
561       for (i = 0; i < M; i++) {
562         if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
563           for (j = ai[i]; j < ai[i + 1]; j++) {
564             if (aj[j] < i) continue;
565             nz++;
566           }
567         } else {
568           nz += ai[i + 1] - adiag[i];
569         }
570       }
571     } else {
572       for (i = 0; i < M; i++) nz += ai[i + 1] - adiag[i];
573     }
574     PetscCall(PetscMalloc2(nz, &row, nz, &col));
575     PetscCall(PetscMalloc1(nz, &val));
576     mumps->nnz = nz;
577     mumps->irn = row;
578     mumps->jcn = col;
579     mumps->val = mumps->val_alloc = val;
580 
581     nz = 0;
582     if (missing) {
583       for (i = 0; i < M; i++) {
584         if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
585           for (j = ai[i]; j < ai[i + 1]; j++) {
586             if (aj[j] < i) continue;
587             PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
588             PetscCall(PetscMUMPSIntCast(aj[j] + shift, &col[nz]));
589             val[nz] = av[j];
590             nz++;
591           }
592         } else {
593           rnz = ai[i + 1] - adiag[i];
594           ajj = aj + adiag[i];
595           v1  = av + adiag[i];
596           for (j = 0; j < rnz; j++) {
597             PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
598             PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz]));
599             val[nz++] = v1[j];
600           }
601         }
602       }
603     } else {
604       for (i = 0; i < M; i++) {
605         rnz = ai[i + 1] - adiag[i];
606         ajj = aj + adiag[i];
607         v1  = av + adiag[i];
608         for (j = 0; j < rnz; j++) {
609           PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
610           PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz]));
611           val[nz++] = v1[j];
612         }
613       }
614     }
615   } else {
616     nz  = 0;
617     val = mumps->val;
618     if (missing) {
619       for (i = 0; i < M; i++) {
620         if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
621           for (j = ai[i]; j < ai[i + 1]; j++) {
622             if (aj[j] < i) continue;
623             val[nz++] = av[j];
624           }
625         } else {
626           rnz = ai[i + 1] - adiag[i];
627           v1  = av + adiag[i];
628           for (j = 0; j < rnz; j++) val[nz++] = v1[j];
629         }
630       }
631     } else {
632       for (i = 0; i < M; i++) {
633         rnz = ai[i + 1] - adiag[i];
634         v1  = av + adiag[i];
635         for (j = 0; j < rnz; j++) val[nz++] = v1[j];
636       }
637     }
638   }
639   PetscCall(MatSeqAIJRestoreArrayRead(A, &av));
640   PetscFunctionReturn(PETSC_SUCCESS);
641 }
642 
643 static PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
644 {
645   const PetscInt    *ai, *aj, *bi, *bj, *garray, *ajj, *bjj;
646   PetscInt           bs;
647   PetscInt64         rstart, nz, i, j, k, m, jj, irow, countA, countB;
648   PetscMUMPSInt     *row, *col;
649   const PetscScalar *av, *bv, *v1, *v2;
650   PetscScalar       *val;
651   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ *)A->data;
652   Mat_SeqSBAIJ      *aa  = (Mat_SeqSBAIJ *)(mat->A)->data;
653   Mat_SeqBAIJ       *bb  = (Mat_SeqBAIJ *)(mat->B)->data;
654   const PetscInt     bs2 = aa->bs2, mbs = aa->mbs;
655 #if defined(PETSC_USE_COMPLEX)
656   PetscBool hermitian, isset;
657 #endif
658 
659   PetscFunctionBegin;
660 #if defined(PETSC_USE_COMPLEX)
661   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
662   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
663 #endif
664   PetscCall(MatGetBlockSize(A, &bs));
665   rstart = A->rmap->rstart;
666   ai     = aa->i;
667   aj     = aa->j;
668   bi     = bb->i;
669   bj     = bb->j;
670   av     = aa->a;
671   bv     = bb->a;
672 
673   garray = mat->garray;
674 
675   if (reuse == MAT_INITIAL_MATRIX) {
676     nz = (aa->nz + bb->nz) * bs2; /* just a conservative estimate */
677     PetscCall(PetscMalloc2(nz, &row, nz, &col));
678     PetscCall(PetscMalloc1(nz, &val));
679     /* can not decide the exact mumps->nnz now because of the SBAIJ */
680     mumps->irn = row;
681     mumps->jcn = col;
682     mumps->val = mumps->val_alloc = val;
683   } else {
684     val = mumps->val;
685   }
686 
687   jj   = 0;
688   irow = rstart;
689   for (i = 0; i < mbs; i++) {
690     ajj    = aj + ai[i]; /* ptr to the beginning of this row */
691     countA = ai[i + 1] - ai[i];
692     countB = bi[i + 1] - bi[i];
693     bjj    = bj + bi[i];
694     v1     = av + ai[i] * bs2;
695     v2     = bv + bi[i] * bs2;
696 
697     if (bs > 1) {
698       /* A-part */
699       for (j = 0; j < countA; j++) {
700         for (k = 0; k < bs; k++) {
701           for (m = 0; m < bs; m++) {
702             if (rstart + ajj[j] * bs > irow || k >= m) {
703               if (reuse == MAT_INITIAL_MATRIX) {
704                 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj]));
705                 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] * bs + k + shift, &col[jj]));
706               }
707               val[jj++] = v1[j * bs2 + m + k * bs];
708             }
709           }
710         }
711       }
712 
713       /* B-part */
714       for (j = 0; j < countB; j++) {
715         for (k = 0; k < bs; k++) {
716           for (m = 0; m < bs; m++) {
717             if (reuse == MAT_INITIAL_MATRIX) {
718               PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj]));
719               PetscCall(PetscMUMPSIntCast(garray[bjj[j]] * bs + k + shift, &col[jj]));
720             }
721             val[jj++] = v2[j * bs2 + m + k * bs];
722           }
723         }
724       }
725     } else {
726       /* A-part */
727       for (j = 0; j < countA; j++) {
728         if (reuse == MAT_INITIAL_MATRIX) {
729           PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
730           PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj]));
731         }
732         val[jj++] = v1[j];
733       }
734 
735       /* B-part */
736       for (j = 0; j < countB; j++) {
737         if (reuse == MAT_INITIAL_MATRIX) {
738           PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
739           PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]));
740         }
741         val[jj++] = v2[j];
742       }
743     }
744     irow += bs;
745   }
746   if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = jj;
747   PetscFunctionReturn(PETSC_SUCCESS);
748 }
749 
750 static PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
751 {
752   const PetscInt    *ai, *aj, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj;
753   PetscInt64         rstart, cstart, nz, i, j, jj, irow, countA, countB;
754   PetscMUMPSInt     *row, *col;
755   const PetscScalar *av, *bv, *v1, *v2;
756   PetscScalar       *val;
757   Mat                Ad, Ao;
758   Mat_SeqAIJ        *aa;
759   Mat_SeqAIJ        *bb;
760 
761   PetscFunctionBegin;
762   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray));
763   PetscCall(MatSeqAIJGetArrayRead(Ad, &av));
764   PetscCall(MatSeqAIJGetArrayRead(Ao, &bv));
765 
766   aa = (Mat_SeqAIJ *)(Ad)->data;
767   bb = (Mat_SeqAIJ *)(Ao)->data;
768   ai = aa->i;
769   aj = aa->j;
770   bi = bb->i;
771   bj = bb->j;
772 
773   rstart = A->rmap->rstart;
774   cstart = A->cmap->rstart;
775 
776   if (reuse == MAT_INITIAL_MATRIX) {
777     nz = (PetscInt64)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */
778     PetscCall(PetscMalloc2(nz, &row, nz, &col));
779     PetscCall(PetscMalloc1(nz, &val));
780     mumps->nnz = nz;
781     mumps->irn = row;
782     mumps->jcn = col;
783     mumps->val = mumps->val_alloc = val;
784   } else {
785     val = mumps->val;
786   }
787 
788   jj   = 0;
789   irow = rstart;
790   for (i = 0; i < m; i++) {
791     ajj    = aj + ai[i]; /* ptr to the beginning of this row */
792     countA = ai[i + 1] - ai[i];
793     countB = bi[i + 1] - bi[i];
794     bjj    = bj + bi[i];
795     v1     = av + ai[i];
796     v2     = bv + bi[i];
797 
798     /* A-part */
799     for (j = 0; j < countA; j++) {
800       if (reuse == MAT_INITIAL_MATRIX) {
801         PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
802         PetscCall(PetscMUMPSIntCast(cstart + ajj[j] + shift, &col[jj]));
803       }
804       val[jj++] = v1[j];
805     }
806 
807     /* B-part */
808     for (j = 0; j < countB; j++) {
809       if (reuse == MAT_INITIAL_MATRIX) {
810         PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
811         PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]));
812       }
813       val[jj++] = v2[j];
814     }
815     irow++;
816   }
817   PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av));
818   PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv));
819   PetscFunctionReturn(PETSC_SUCCESS);
820 }
821 
822 static PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
823 {
824   Mat_MPIBAIJ       *mat = (Mat_MPIBAIJ *)A->data;
825   Mat_SeqBAIJ       *aa  = (Mat_SeqBAIJ *)(mat->A)->data;
826   Mat_SeqBAIJ       *bb  = (Mat_SeqBAIJ *)(mat->B)->data;
827   const PetscInt    *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j, *ajj, *bjj;
828   const PetscInt    *garray = mat->garray, mbs = mat->mbs, rstart = A->rmap->rstart, cstart = A->cmap->rstart;
829   const PetscInt     bs2 = mat->bs2;
830   PetscInt           bs;
831   PetscInt64         nz, i, j, k, n, jj, irow, countA, countB, idx;
832   PetscMUMPSInt     *row, *col;
833   const PetscScalar *av = aa->a, *bv = bb->a, *v1, *v2;
834   PetscScalar       *val;
835 
836   PetscFunctionBegin;
837   PetscCall(MatGetBlockSize(A, &bs));
838   if (reuse == MAT_INITIAL_MATRIX) {
839     nz = bs2 * (aa->nz + bb->nz);
840     PetscCall(PetscMalloc2(nz, &row, nz, &col));
841     PetscCall(PetscMalloc1(nz, &val));
842     mumps->nnz = nz;
843     mumps->irn = row;
844     mumps->jcn = col;
845     mumps->val = mumps->val_alloc = val;
846   } else {
847     val = mumps->val;
848   }
849 
850   jj   = 0;
851   irow = rstart;
852   for (i = 0; i < mbs; i++) {
853     countA = ai[i + 1] - ai[i];
854     countB = bi[i + 1] - bi[i];
855     ajj    = aj + ai[i];
856     bjj    = bj + bi[i];
857     v1     = av + bs2 * ai[i];
858     v2     = bv + bs2 * bi[i];
859 
860     idx = 0;
861     /* A-part */
862     for (k = 0; k < countA; k++) {
863       for (j = 0; j < bs; j++) {
864         for (n = 0; n < bs; n++) {
865           if (reuse == MAT_INITIAL_MATRIX) {
866             PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj]));
867             PetscCall(PetscMUMPSIntCast(cstart + bs * ajj[k] + j + shift, &col[jj]));
868           }
869           val[jj++] = v1[idx++];
870         }
871       }
872     }
873 
874     idx = 0;
875     /* B-part */
876     for (k = 0; k < countB; k++) {
877       for (j = 0; j < bs; j++) {
878         for (n = 0; n < bs; n++) {
879           if (reuse == MAT_INITIAL_MATRIX) {
880             PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj]));
881             PetscCall(PetscMUMPSIntCast(bs * garray[bjj[k]] + j + shift, &col[jj]));
882           }
883           val[jj++] = v2[idx++];
884         }
885       }
886     }
887     irow += bs;
888   }
889   PetscFunctionReturn(PETSC_SUCCESS);
890 }
891 
892 static PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
893 {
894   const PetscInt    *ai, *aj, *adiag, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj;
895   PetscInt64         rstart, nz, nza, nzb, i, j, jj, irow, countA, countB;
896   PetscMUMPSInt     *row, *col;
897   const PetscScalar *av, *bv, *v1, *v2;
898   PetscScalar       *val;
899   Mat                Ad, Ao;
900   Mat_SeqAIJ        *aa;
901   Mat_SeqAIJ        *bb;
902 #if defined(PETSC_USE_COMPLEX)
903   PetscBool hermitian, isset;
904 #endif
905 
906   PetscFunctionBegin;
907 #if defined(PETSC_USE_COMPLEX)
908   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
909   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
910 #endif
911   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray));
912   PetscCall(MatSeqAIJGetArrayRead(Ad, &av));
913   PetscCall(MatSeqAIJGetArrayRead(Ao, &bv));
914 
915   aa    = (Mat_SeqAIJ *)(Ad)->data;
916   bb    = (Mat_SeqAIJ *)(Ao)->data;
917   ai    = aa->i;
918   aj    = aa->j;
919   adiag = aa->diag;
920   bi    = bb->i;
921   bj    = bb->j;
922 
923   rstart = A->rmap->rstart;
924 
925   if (reuse == MAT_INITIAL_MATRIX) {
926     nza = 0; /* num of upper triangular entries in mat->A, including diagonals */
927     nzb = 0; /* num of upper triangular entries in mat->B */
928     for (i = 0; i < m; i++) {
929       nza += (ai[i + 1] - adiag[i]);
930       countB = bi[i + 1] - bi[i];
931       bjj    = bj + bi[i];
932       for (j = 0; j < countB; j++) {
933         if (garray[bjj[j]] > rstart) nzb++;
934       }
935     }
936 
937     nz = nza + nzb; /* total nz of upper triangular part of mat */
938     PetscCall(PetscMalloc2(nz, &row, nz, &col));
939     PetscCall(PetscMalloc1(nz, &val));
940     mumps->nnz = nz;
941     mumps->irn = row;
942     mumps->jcn = col;
943     mumps->val = mumps->val_alloc = val;
944   } else {
945     val = mumps->val;
946   }
947 
948   jj   = 0;
949   irow = rstart;
950   for (i = 0; i < m; i++) {
951     ajj    = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */
952     v1     = av + adiag[i];
953     countA = ai[i + 1] - adiag[i];
954     countB = bi[i + 1] - bi[i];
955     bjj    = bj + bi[i];
956     v2     = bv + bi[i];
957 
958     /* A-part */
959     for (j = 0; j < countA; j++) {
960       if (reuse == MAT_INITIAL_MATRIX) {
961         PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
962         PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj]));
963       }
964       val[jj++] = v1[j];
965     }
966 
967     /* B-part */
968     for (j = 0; j < countB; j++) {
969       if (garray[bjj[j]] > rstart) {
970         if (reuse == MAT_INITIAL_MATRIX) {
971           PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
972           PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]));
973         }
974         val[jj++] = v2[j];
975       }
976     }
977     irow++;
978   }
979   PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av));
980   PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv));
981   PetscFunctionReturn(PETSC_SUCCESS);
982 }
983 
984 static PetscErrorCode MatConvertToTriples_diagonal_xaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
985 {
986   const PetscScalar *av;
987   const PetscInt     M = A->rmap->n;
988   PetscInt64         i;
989   PetscMUMPSInt     *row, *col;
990   Vec                v;
991 
992   PetscFunctionBegin;
993   PetscCall(MatDiagonalGetDiagonal(A, &v));
994   PetscCall(VecGetArrayRead(v, &av));
995   if (reuse == MAT_INITIAL_MATRIX) {
996     PetscCall(PetscMalloc2(M, &row, M, &col));
997     for (i = 0; i < M; i++) {
998       PetscCall(PetscMUMPSIntCast(i + A->rmap->rstart, &row[i]));
999       col[i] = row[i];
1000     }
1001     mumps->val = (PetscScalar *)av;
1002     mumps->irn = row;
1003     mumps->jcn = col;
1004     mumps->nnz = M;
1005   } else PetscCall(PetscArraycpy(mumps->val, av, M));
1006   PetscCall(VecRestoreArrayRead(v, &av));
1007   PetscFunctionReturn(PETSC_SUCCESS);
1008 }
1009 
1010 static PetscErrorCode MatConvertToTriples_dense_xaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1011 {
1012   PetscScalar   *v;
1013   const PetscInt m = A->rmap->n, N = A->cmap->N;
1014   PetscInt       lda;
1015   PetscInt64     i, j;
1016   PetscMUMPSInt *row, *col;
1017 
1018   PetscFunctionBegin;
1019   PetscCall(MatDenseGetArray(A, &v));
1020   PetscCall(MatDenseGetLDA(A, &lda));
1021   if (reuse == MAT_INITIAL_MATRIX) {
1022     PetscCall(PetscMalloc2(m * N, &row, m * N, &col));
1023     for (i = 0; i < m; i++) {
1024       col[i] = 0;
1025       PetscCall(PetscMUMPSIntCast(i + A->rmap->rstart, &row[i]));
1026     }
1027     for (j = 1; j < N; j++) {
1028       for (i = 0; i < m; i++) PetscCall(PetscMUMPSIntCast(j, col + i + m * j));
1029       PetscCall(PetscArraycpy(row + m * j, row + m * (j - 1), m));
1030     }
1031     if (lda == m) mumps->val = v;
1032     else {
1033       PetscCall(PetscMalloc1(m * N, &mumps->val));
1034       mumps->val_alloc = mumps->val;
1035       for (j = 0; j < N; j++) PetscCall(PetscArraycpy(mumps->val + m * j, v + lda * j, m));
1036     }
1037     mumps->irn = row;
1038     mumps->jcn = col;
1039     mumps->nnz = m * N;
1040   } else {
1041     if (lda == m && !mumps->nest_vals) mumps->val = v;
1042     else {
1043       for (j = 0; j < N; j++) PetscCall(PetscArraycpy(mumps->val + m * j, v + lda * j, m));
1044     }
1045   }
1046   PetscCall(MatDenseRestoreArray(A, &v));
1047   PetscFunctionReturn(PETSC_SUCCESS);
1048 }
1049 
1050 static PetscErrorCode MatConvertToTriples_nest_xaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1051 {
1052   Mat     **mats;
1053   PetscInt  nr, nc;
1054   PetscBool chol = mumps->sym ? PETSC_TRUE : PETSC_FALSE;
1055 
1056   PetscFunctionBegin;
1057   PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats));
1058   if (reuse == MAT_INITIAL_MATRIX) {
1059     PetscMUMPSInt *irns, *jcns;
1060     PetscScalar   *vals;
1061     PetscInt64     totnnz, cumnnz, maxnnz;
1062     PetscInt      *pjcns_w;
1063     IS            *rows, *cols;
1064     PetscInt     **rows_idx, **cols_idx;
1065 
1066     cumnnz = 0;
1067     maxnnz = 0;
1068     PetscCall(PetscMalloc2(nr * nc + 1, &mumps->nest_vals_start, nr * nc, &mumps->nest_convert_to_triples));
1069     for (PetscInt r = 0; r < nr; r++) {
1070       for (PetscInt c = 0; c < nc; c++) {
1071         Mat sub = mats[r][c];
1072 
1073         mumps->nest_convert_to_triples[r * nc + c] = NULL;
1074         if (chol && c < r) continue; /* skip lower-triangular block for Cholesky */
1075         if (sub) {
1076           PetscErrorCode (*convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *) = NULL;
1077           PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isTrans, isHTrans = PETSC_FALSE, isDiag, isDense;
1078           MatInfo   info;
1079 
1080           PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans));
1081           if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub));
1082           else {
1083             PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans));
1084             if (isHTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub));
1085           }
1086           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ));
1087           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ));
1088           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ));
1089           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ));
1090           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ));
1091           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ));
1092           PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATDIAGONAL, &isDiag));
1093           PetscCall(PetscObjectTypeCompareAny((PetscObject)sub, &isDense, MATSEQDENSE, MATMPIDENSE, NULL));
1094 
1095           if (chol) {
1096             if (r == c) {
1097               if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqsbaij;
1098               else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpisbaij;
1099               else if (isSeqSBAIJ) convert_to_triples = MatConvertToTriples_seqsbaij_seqsbaij;
1100               else if (isMPISBAIJ) convert_to_triples = MatConvertToTriples_mpisbaij_mpisbaij;
1101               else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij;
1102               else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij;
1103             } else {
1104               if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij;
1105               else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij;
1106               else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij;
1107               else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij;
1108               else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij;
1109               else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij;
1110             }
1111           } else {
1112             if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij;
1113             else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij;
1114             else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij;
1115             else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij;
1116             else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij;
1117             else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij;
1118           }
1119           PetscCheck(convert_to_triples, PetscObjectComm((PetscObject)sub), PETSC_ERR_SUP, "Not for block of type %s", ((PetscObject)sub)->type_name);
1120           mumps->nest_convert_to_triples[r * nc + c] = convert_to_triples;
1121           PetscCall(MatGetInfo(sub, MAT_LOCAL, &info));
1122           cumnnz += (PetscInt64)info.nz_used; /* can be overestimated for Cholesky */
1123           maxnnz = PetscMax(maxnnz, info.nz_used);
1124         }
1125       }
1126     }
1127 
1128     /* Allocate total COO */
1129     totnnz = cumnnz;
1130     PetscCall(PetscMalloc2(totnnz, &irns, totnnz, &jcns));
1131     PetscCall(PetscMalloc1(totnnz, &vals));
1132 
1133     /* Handle rows and column maps
1134        We directly map rows and use an SF for the columns */
1135     PetscCall(PetscMalloc4(nr, &rows, nc, &cols, nr, &rows_idx, nc, &cols_idx));
1136     PetscCall(MatNestGetISs(A, rows, cols));
1137     for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetIndices(rows[r], (const PetscInt **)&rows_idx[r]));
1138     for (PetscInt c = 0; c < nc; c++) PetscCall(ISGetIndices(cols[c], (const PetscInt **)&cols_idx[c]));
1139     if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscMalloc1(maxnnz, &pjcns_w));
1140     else (void)maxnnz;
1141 
1142     cumnnz = 0;
1143     for (PetscInt r = 0; r < nr; r++) {
1144       for (PetscInt c = 0; c < nc; c++) {
1145         Mat             sub  = mats[r][c];
1146         const PetscInt *ridx = rows_idx[r];
1147         const PetscInt *cidx = cols_idx[c];
1148         PetscInt        rst;
1149         PetscSF         csf;
1150         PetscBool       isTrans, isHTrans = PETSC_FALSE, swap;
1151         PetscLayout     cmap;
1152 
1153         mumps->nest_vals_start[r * nc + c] = cumnnz;
1154         if (!mumps->nest_convert_to_triples[r * nc + c]) continue;
1155 
1156         /* Extract inner blocks if needed */
1157         PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans));
1158         if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub));
1159         else {
1160           PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans));
1161           if (isHTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub));
1162         }
1163         swap = (PetscBool)(isTrans || isHTrans);
1164 
1165         /* Get column layout to map off-process columns */
1166         PetscCall(MatGetLayouts(sub, NULL, &cmap));
1167 
1168         /* Get row start to map on-process rows */
1169         PetscCall(MatGetOwnershipRange(sub, &rst, NULL));
1170 
1171         /* Directly use the mumps datastructure and use C ordering for now */
1172         PetscCall((*mumps->nest_convert_to_triples[r * nc + c])(sub, 0, MAT_INITIAL_MATRIX, mumps));
1173 
1174         /* Swap the role of rows and columns indices for transposed blocks
1175            since we need values with global final ordering */
1176         if (swap) {
1177           cidx = rows_idx[r];
1178           ridx = cols_idx[c];
1179         }
1180 
1181         /* Communicate column indices
1182            This could have been done with a single SF but it would have complicated the code a lot.
1183            But since we do it only once, we pay the price of setting up an SF for each block */
1184         if (PetscDefined(USE_64BIT_INDICES)) {
1185           for (PetscInt k = 0; k < mumps->nnz; k++) pjcns_w[k] = mumps->jcn[k];
1186         } else pjcns_w = (PetscInt *)(mumps->jcn); /* This cast is needed only to silence warnings for 64bit integers builds */
1187         PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &csf));
1188         PetscCall(PetscSFSetGraphLayout(csf, cmap, mumps->nnz, NULL, PETSC_OWN_POINTER, pjcns_w));
1189         PetscCall(PetscSFBcastBegin(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE));
1190         PetscCall(PetscSFBcastEnd(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE));
1191         PetscCall(PetscSFDestroy(&csf));
1192 
1193         /* Import indices: use direct map for rows and mapped indices for columns */
1194         if (swap) {
1195           for (PetscInt k = 0; k < mumps->nnz; k++) {
1196             PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &jcns[cumnnz + k]));
1197             PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &irns[cumnnz + k]));
1198           }
1199         } else {
1200           for (PetscInt k = 0; k < mumps->nnz; k++) {
1201             PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &irns[cumnnz + k]));
1202             PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &jcns[cumnnz + k]));
1203           }
1204         }
1205 
1206         /* Import values to full COO */
1207         PetscCall(PetscArraycpy(vals + cumnnz, mumps->val, mumps->nnz));
1208         if (isHTrans) { /* conjugate the entries */
1209           PetscScalar *v = vals + cumnnz;
1210           for (PetscInt k = 0; k < mumps->nnz; k++) v[k] = PetscConj(v[k]);
1211         }
1212 
1213         /* Shift new starting point and sanity check */
1214         cumnnz += mumps->nnz;
1215         PetscCheck(cumnnz <= totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected number of nonzeros %" PetscInt64_FMT " != %" PetscInt64_FMT, cumnnz, totnnz);
1216 
1217         /* Free scratch memory */
1218         PetscCall(PetscFree2(mumps->irn, mumps->jcn));
1219         PetscCall(PetscFree(mumps->val_alloc));
1220         mumps->val = NULL;
1221         mumps->nnz = 0;
1222       }
1223     }
1224     if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscFree(pjcns_w));
1225     for (PetscInt r = 0; r < nr; r++) PetscCall(ISRestoreIndices(rows[r], (const PetscInt **)&rows_idx[r]));
1226     for (PetscInt c = 0; c < nc; c++) PetscCall(ISRestoreIndices(cols[c], (const PetscInt **)&cols_idx[c]));
1227     PetscCall(PetscFree4(rows, cols, rows_idx, cols_idx));
1228     if (!chol) PetscCheck(cumnnz == totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different number of nonzeros %" PetscInt64_FMT " != %" PetscInt64_FMT, cumnnz, totnnz);
1229     mumps->nest_vals_start[nr * nc] = cumnnz;
1230 
1231     /* Set pointers for final MUMPS data structure */
1232     mumps->nest_vals = vals;
1233     mumps->val_alloc = NULL; /* do not use val_alloc since it may be reallocated with the OMP callpath */
1234     mumps->val       = vals;
1235     mumps->irn       = irns;
1236     mumps->jcn       = jcns;
1237     mumps->nnz       = cumnnz;
1238   } else {
1239     PetscScalar *oval = mumps->nest_vals;
1240     for (PetscInt r = 0; r < nr; r++) {
1241       for (PetscInt c = 0; c < nc; c++) {
1242         PetscBool isTrans, isHTrans = PETSC_FALSE;
1243         Mat       sub  = mats[r][c];
1244         PetscInt  midx = r * nc + c;
1245 
1246         if (!mumps->nest_convert_to_triples[midx]) continue;
1247         PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans));
1248         if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub));
1249         else {
1250           PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans));
1251           if (isHTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub));
1252         }
1253         mumps->val = oval + mumps->nest_vals_start[midx];
1254         PetscCall((*mumps->nest_convert_to_triples[midx])(sub, shift, MAT_REUSE_MATRIX, mumps));
1255         if (isHTrans) {
1256           PetscInt nnz = mumps->nest_vals_start[midx + 1] - mumps->nest_vals_start[midx];
1257           for (PetscInt k = 0; k < nnz; k++) mumps->val[k] = PetscConj(mumps->val[k]);
1258         }
1259       }
1260     }
1261     mumps->val = oval;
1262   }
1263   PetscFunctionReturn(PETSC_SUCCESS);
1264 }
1265 
1266 static PetscErrorCode MatDestroy_MUMPS(Mat A)
1267 {
1268   Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;
1269 
1270   PetscFunctionBegin;
1271   PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc));
1272   PetscCall(VecScatterDestroy(&mumps->scat_rhs));
1273   PetscCall(VecScatterDestroy(&mumps->scat_sol));
1274   PetscCall(VecDestroy(&mumps->b_seq));
1275   PetscCall(VecDestroy(&mumps->x_seq));
1276   PetscCall(PetscFree(mumps->id.perm_in));
1277   PetscCall(PetscFree2(mumps->irn, mumps->jcn));
1278   PetscCall(PetscFree(mumps->val_alloc));
1279   PetscCall(PetscFree(mumps->info));
1280   PetscCall(PetscFree(mumps->ICNTL_pre));
1281   PetscCall(PetscFree(mumps->CNTL_pre));
1282   PetscCall(MatMumpsResetSchur_Private(mumps));
1283   if (mumps->id.job != JOB_NULL) { /* cannot call PetscMUMPS_c() if JOB_INIT has never been called for this instance */
1284     mumps->id.job = JOB_END;
1285     PetscMUMPS_c(mumps);
1286     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in termination: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
1287     if (mumps->mumps_comm != MPI_COMM_NULL) {
1288       if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) PetscCallMPI(MPI_Comm_free(&mumps->mumps_comm));
1289       else PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A), &mumps->mumps_comm));
1290     }
1291   }
1292 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1293   if (mumps->use_petsc_omp_support) {
1294     PetscCall(PetscOmpCtrlDestroy(&mumps->omp_ctrl));
1295     PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf));
1296     PetscCall(PetscFree3(mumps->rhs_nrow, mumps->rhs_recvcounts, mumps->rhs_disps));
1297   }
1298 #endif
1299   PetscCall(PetscFree(mumps->ia_alloc));
1300   PetscCall(PetscFree(mumps->ja_alloc));
1301   PetscCall(PetscFree(mumps->recvcount));
1302   PetscCall(PetscFree(mumps->reqs));
1303   PetscCall(PetscFree(mumps->irhs_loc));
1304   PetscCall(PetscFree2(mumps->nest_vals_start, mumps->nest_convert_to_triples));
1305   PetscCall(PetscFree(mumps->nest_vals));
1306   PetscCall(PetscFree(A->data));
1307 
1308   /* clear composed functions */
1309   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
1310   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorSetSchurIS_C", NULL));
1311   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorCreateSchurComplement_C", NULL));
1312   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetIcntl_C", NULL));
1313   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetIcntl_C", NULL));
1314   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetCntl_C", NULL));
1315   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetCntl_C", NULL));
1316   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfo_C", NULL));
1317   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfog_C", NULL));
1318   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfo_C", NULL));
1319   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfog_C", NULL));
1320   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetNullPivots_C", NULL));
1321   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverse_C", NULL));
1322   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverseTranspose_C", NULL));
1323   PetscFunctionReturn(PETSC_SUCCESS);
1324 }
1325 
1326 /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */
1327 static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A, PetscInt nrhs, const PetscScalar *array)
1328 {
1329   Mat_MUMPS        *mumps   = (Mat_MUMPS *)A->data;
1330   const PetscMPIInt ompsize = mumps->omp_comm_size;
1331   PetscInt          i, m, M, rstart;
1332 
1333   PetscFunctionBegin;
1334   PetscCall(MatGetSize(A, &M, NULL));
1335   PetscCall(MatGetLocalSize(A, &m, NULL));
1336   PetscCheck(M <= PETSC_MUMPS_INT_MAX, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt");
1337   if (ompsize == 1) {
1338     if (!mumps->irhs_loc) {
1339       mumps->nloc_rhs = m;
1340       PetscCall(PetscMalloc1(m, &mumps->irhs_loc));
1341       PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
1342       for (i = 0; i < m; i++) mumps->irhs_loc[i] = rstart + i + 1; /* use 1-based indices */
1343     }
1344     mumps->id.rhs_loc = (MumpsScalar *)array;
1345   } else {
1346 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1347     const PetscInt *ranges;
1348     PetscMPIInt     j, k, sendcount, *petsc_ranks, *omp_ranks;
1349     MPI_Group       petsc_group, omp_group;
1350     PetscScalar    *recvbuf = NULL;
1351 
1352     if (mumps->is_omp_master) {
1353       /* Lazily initialize the omp stuff for distributed rhs */
1354       if (!mumps->irhs_loc) {
1355         PetscCall(PetscMalloc2(ompsize, &omp_ranks, ompsize, &petsc_ranks));
1356         PetscCall(PetscMalloc3(ompsize, &mumps->rhs_nrow, ompsize, &mumps->rhs_recvcounts, ompsize, &mumps->rhs_disps));
1357         PetscCallMPI(MPI_Comm_group(mumps->petsc_comm, &petsc_group));
1358         PetscCallMPI(MPI_Comm_group(mumps->omp_comm, &omp_group));
1359         for (j = 0; j < ompsize; j++) omp_ranks[j] = j;
1360         PetscCallMPI(MPI_Group_translate_ranks(omp_group, ompsize, omp_ranks, petsc_group, petsc_ranks));
1361 
1362         /* Populate mumps->irhs_loc[], rhs_nrow[] */
1363         mumps->nloc_rhs = 0;
1364         PetscCall(MatGetOwnershipRanges(A, &ranges));
1365         for (j = 0; j < ompsize; j++) {
1366           mumps->rhs_nrow[j] = ranges[petsc_ranks[j] + 1] - ranges[petsc_ranks[j]];
1367           mumps->nloc_rhs += mumps->rhs_nrow[j];
1368         }
1369         PetscCall(PetscMalloc1(mumps->nloc_rhs, &mumps->irhs_loc));
1370         for (j = k = 0; j < ompsize; j++) {
1371           for (i = ranges[petsc_ranks[j]]; i < ranges[petsc_ranks[j] + 1]; i++, k++) mumps->irhs_loc[k] = i + 1; /* uses 1-based indices */
1372         }
1373 
1374         PetscCall(PetscFree2(omp_ranks, petsc_ranks));
1375         PetscCallMPI(MPI_Group_free(&petsc_group));
1376         PetscCallMPI(MPI_Group_free(&omp_group));
1377       }
1378 
1379       /* Realloc buffers when current nrhs is bigger than what we have met */
1380       if (nrhs > mumps->max_nrhs) {
1381         PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf));
1382         PetscCall(PetscMalloc2(mumps->nloc_rhs * nrhs, &mumps->rhs_loc, mumps->nloc_rhs * nrhs, &mumps->rhs_recvbuf));
1383         mumps->max_nrhs = nrhs;
1384       }
1385 
1386       /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */
1387       for (j = 0; j < ompsize; j++) PetscCall(PetscMPIIntCast(mumps->rhs_nrow[j] * nrhs, &mumps->rhs_recvcounts[j]));
1388       mumps->rhs_disps[0] = 0;
1389       for (j = 1; j < ompsize; j++) {
1390         mumps->rhs_disps[j] = mumps->rhs_disps[j - 1] + mumps->rhs_recvcounts[j - 1];
1391         PetscCheck(mumps->rhs_disps[j] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscMPIInt overflow!");
1392       }
1393       recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */
1394     }
1395 
1396     PetscCall(PetscMPIIntCast(m * nrhs, &sendcount));
1397     PetscCallMPI(MPI_Gatherv(array, sendcount, MPIU_SCALAR, recvbuf, mumps->rhs_recvcounts, mumps->rhs_disps, MPIU_SCALAR, 0, mumps->omp_comm));
1398 
1399     if (mumps->is_omp_master) {
1400       if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */
1401         PetscScalar *dst, *dstbase = mumps->rhs_loc;
1402         for (j = 0; j < ompsize; j++) {
1403           const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j];
1404           dst                    = dstbase;
1405           for (i = 0; i < nrhs; i++) {
1406             PetscCall(PetscArraycpy(dst, src, mumps->rhs_nrow[j]));
1407             src += mumps->rhs_nrow[j];
1408             dst += mumps->nloc_rhs;
1409           }
1410           dstbase += mumps->rhs_nrow[j];
1411         }
1412       }
1413       mumps->id.rhs_loc = (MumpsScalar *)mumps->rhs_loc;
1414     }
1415 #endif /* PETSC_HAVE_OPENMP_SUPPORT */
1416   }
1417   mumps->id.nrhs     = nrhs;
1418   mumps->id.nloc_rhs = mumps->nloc_rhs;
1419   mumps->id.lrhs_loc = mumps->nloc_rhs;
1420   mumps->id.irhs_loc = mumps->irhs_loc;
1421   PetscFunctionReturn(PETSC_SUCCESS);
1422 }
1423 
1424 static PetscErrorCode MatSolve_MUMPS(Mat A, Vec b, Vec x)
1425 {
1426   Mat_MUMPS         *mumps  = (Mat_MUMPS *)A->data;
1427   const PetscScalar *rarray = NULL;
1428   PetscScalar       *array;
1429   IS                 is_iden, is_petsc;
1430   PetscInt           i;
1431   PetscBool          second_solve = PETSC_FALSE;
1432   static PetscBool   cite1 = PETSC_FALSE, cite2 = PETSC_FALSE;
1433 
1434   PetscFunctionBegin;
1435   PetscCall(PetscCitationsRegister("@article{MUMPS01,\n  author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n  title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n  journal = {SIAM "
1436                                    "Journal on Matrix Analysis and Applications},\n  volume = {23},\n  number = {1},\n  pages = {15--41},\n  year = {2001}\n}\n",
1437                                    &cite1));
1438   PetscCall(PetscCitationsRegister("@article{MUMPS02,\n  author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n  title = {Hybrid scheduling for the parallel solution of linear systems},\n  journal = {Parallel "
1439                                    "Computing},\n  volume = {32},\n  number = {2},\n  pages = {136--156},\n  year = {2006}\n}\n",
1440                                    &cite2));
1441 
1442   if (A->factorerrortype) {
1443     PetscCall(PetscInfo(A, "MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1444     PetscCall(VecSetInf(x));
1445     PetscFunctionReturn(PETSC_SUCCESS);
1446   }
1447 
1448   mumps->id.nrhs = 1;
1449   if (mumps->petsc_size > 1) {
1450     if (mumps->ICNTL20 == 10) {
1451       mumps->id.ICNTL(20) = 10; /* dense distributed RHS */
1452       PetscCall(VecGetArrayRead(b, &rarray));
1453       PetscCall(MatMumpsSetUpDistRHSInfo(A, 1, rarray));
1454     } else {
1455       mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential rhs vector*/
1456       PetscCall(VecScatterBegin(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD));
1457       PetscCall(VecScatterEnd(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD));
1458       if (!mumps->myid) {
1459         PetscCall(VecGetArray(mumps->b_seq, &array));
1460         mumps->id.rhs = (MumpsScalar *)array;
1461       }
1462     }
1463   } else {                   /* petsc_size == 1 */
1464     mumps->id.ICNTL(20) = 0; /* dense centralized RHS */
1465     PetscCall(VecCopy(b, x));
1466     PetscCall(VecGetArray(x, &array));
1467     mumps->id.rhs = (MumpsScalar *)array;
1468   }
1469 
1470   /*
1471      handle condensation step of Schur complement (if any)
1472      We set by default ICNTL(26) == -1 when Schur indices have been provided by the user.
1473      According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase
1474      Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system.
1475      This requires an extra call to PetscMUMPS_c and the computation of the factors for S
1476   */
1477   if (mumps->id.size_schur > 0) {
1478     PetscCheck(mumps->petsc_size <= 1, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc");
1479     if (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2) {
1480       second_solve = PETSC_TRUE;
1481       PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE));
1482       mumps->id.ICNTL(26) = 1; /* condensation phase */
1483     } else if (mumps->id.ICNTL(26) == 1) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE));
1484   }
1485   /* solve phase */
1486   mumps->id.job = JOB_SOLVE;
1487   PetscMUMPS_c(mumps);
1488   PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
1489 
1490   /* handle expansion step of Schur complement (if any) */
1491   if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE));
1492   else if (mumps->id.ICNTL(26) == 1) {
1493     PetscCall(MatMumpsSolveSchur_Private(A));
1494     for (i = 0; i < mumps->id.size_schur; ++i) {
1495 #if !defined(PETSC_USE_COMPLEX)
1496       PetscScalar val = mumps->id.redrhs[i];
1497 #else
1498       PetscScalar val = mumps->id.redrhs[i].r + PETSC_i * mumps->id.redrhs[i].i;
1499 #endif
1500       array[mumps->id.listvar_schur[i] - 1] = val;
1501     }
1502   }
1503 
1504   if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */
1505     if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) {
1506       /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */
1507       PetscCall(VecScatterDestroy(&mumps->scat_sol));
1508     }
1509     if (!mumps->scat_sol) { /* create scatter scat_sol */
1510       PetscInt *isol2_loc = NULL;
1511       PetscCall(ISCreateStride(PETSC_COMM_SELF, mumps->id.lsol_loc, 0, 1, &is_iden)); /* from */
1512       PetscCall(PetscMalloc1(mumps->id.lsol_loc, &isol2_loc));
1513       for (i = 0; i < mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i] - 1;                        /* change Fortran style to C style */
1514       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, mumps->id.lsol_loc, isol2_loc, PETSC_OWN_POINTER, &is_petsc)); /* to */
1515       PetscCall(VecScatterCreate(mumps->x_seq, is_iden, x, is_petsc, &mumps->scat_sol));
1516       PetscCall(ISDestroy(&is_iden));
1517       PetscCall(ISDestroy(&is_petsc));
1518       mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */
1519     }
1520 
1521     PetscCall(VecScatterBegin(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD));
1522     PetscCall(VecScatterEnd(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD));
1523   }
1524 
1525   if (mumps->petsc_size > 1) {
1526     if (mumps->ICNTL20 == 10) {
1527       PetscCall(VecRestoreArrayRead(b, &rarray));
1528     } else if (!mumps->myid) {
1529       PetscCall(VecRestoreArray(mumps->b_seq, &array));
1530     }
1531   } else PetscCall(VecRestoreArray(x, &array));
1532 
1533   PetscCall(PetscLogFlops(2.0 * PetscMax(0, (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n)));
1534   PetscFunctionReturn(PETSC_SUCCESS);
1535 }
1536 
1537 static PetscErrorCode MatSolveTranspose_MUMPS(Mat A, Vec b, Vec x)
1538 {
1539   Mat_MUMPS          *mumps = (Mat_MUMPS *)A->data;
1540   const PetscMUMPSInt value = mumps->id.ICNTL(9);
1541 
1542   PetscFunctionBegin;
1543   mumps->id.ICNTL(9) = 0;
1544   PetscCall(MatSolve_MUMPS(A, b, x));
1545   mumps->id.ICNTL(9) = value;
1546   PetscFunctionReturn(PETSC_SUCCESS);
1547 }
1548 
1549 static PetscErrorCode MatMatSolve_MUMPS(Mat A, Mat B, Mat X)
1550 {
1551   Mat                Bt = NULL;
1552   PetscBool          denseX, denseB, flg, flgT;
1553   Mat_MUMPS         *mumps = (Mat_MUMPS *)A->data;
1554   PetscInt           i, nrhs, M;
1555   PetscScalar       *array;
1556   const PetscScalar *rbray;
1557   PetscInt           lsol_loc, nlsol_loc, *idxx, iidx = 0;
1558   PetscMUMPSInt     *isol_loc, *isol_loc_save;
1559   PetscScalar       *bray, *sol_loc, *sol_loc_save;
1560   IS                 is_to, is_from;
1561   PetscInt           k, proc, j, m, myrstart;
1562   const PetscInt    *rstart;
1563   Vec                v_mpi, msol_loc;
1564   VecScatter         scat_sol;
1565   Vec                b_seq;
1566   VecScatter         scat_rhs;
1567   PetscScalar       *aa;
1568   PetscInt           spnr, *ia, *ja;
1569   Mat_MPIAIJ        *b = NULL;
1570 
1571   PetscFunctionBegin;
1572   PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &denseX, MATSEQDENSE, MATMPIDENSE, NULL));
1573   PetscCheck(denseX, PetscObjectComm((PetscObject)X), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix");
1574 
1575   PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &denseB, MATSEQDENSE, MATMPIDENSE, NULL));
1576   if (denseB) {
1577     PetscCheck(B->rmap->n == X->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Matrix B and X must have same row distribution");
1578     mumps->id.ICNTL(20) = 0; /* dense RHS */
1579   } else {                   /* sparse B */
1580     PetscCheck(X != B, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_IDN, "X and B must be different matrices");
1581     PetscCall(PetscObjectTypeCompare((PetscObject)B, MATTRANSPOSEVIRTUAL, &flgT));
1582     if (flgT) { /* input B is transpose of actual RHS matrix,
1583                  because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */
1584       PetscCall(MatTransposeGetMat(B, &Bt));
1585     } else SETERRQ(PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_WRONG, "Matrix B must be MATTRANSPOSEVIRTUAL matrix");
1586     mumps->id.ICNTL(20) = 1; /* sparse RHS */
1587   }
1588 
1589   PetscCall(MatGetSize(B, &M, &nrhs));
1590   mumps->id.nrhs = nrhs;
1591   mumps->id.lrhs = M;
1592   mumps->id.rhs  = NULL;
1593 
1594   if (mumps->petsc_size == 1) {
1595     PetscScalar *aa;
1596     PetscInt     spnr, *ia, *ja;
1597     PetscBool    second_solve = PETSC_FALSE;
1598 
1599     PetscCall(MatDenseGetArray(X, &array));
1600     mumps->id.rhs = (MumpsScalar *)array;
1601 
1602     if (denseB) {
1603       /* copy B to X */
1604       PetscCall(MatDenseGetArrayRead(B, &rbray));
1605       PetscCall(PetscArraycpy(array, rbray, M * nrhs));
1606       PetscCall(MatDenseRestoreArrayRead(B, &rbray));
1607     } else { /* sparse B */
1608       PetscCall(MatSeqAIJGetArray(Bt, &aa));
1609       PetscCall(MatGetRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
1610       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
1611       PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs));
1612       mumps->id.rhs_sparse = (MumpsScalar *)aa;
1613     }
1614     /* handle condensation step of Schur complement (if any) */
1615     if (mumps->id.size_schur > 0) {
1616       PetscCheck(mumps->petsc_size <= 1, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc");
1617       if (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2) {
1618         second_solve = PETSC_TRUE;
1619         PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE));
1620         mumps->id.ICNTL(26) = 1; /* condensation phase */
1621       } else if (mumps->id.ICNTL(26) == 1) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE));
1622     }
1623     /* solve phase */
1624     mumps->id.job = JOB_SOLVE;
1625     PetscMUMPS_c(mumps);
1626     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
1627 
1628     /* handle expansion step of Schur complement (if any) */
1629     if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE));
1630     else if (mumps->id.ICNTL(26) == 1) {
1631       PetscCall(MatMumpsSolveSchur_Private(A));
1632       for (j = 0; j < nrhs; ++j)
1633         for (i = 0; i < mumps->id.size_schur; ++i) {
1634 #if !defined(PETSC_USE_COMPLEX)
1635           PetscScalar val = mumps->id.redrhs[i + j * mumps->id.lredrhs];
1636 #else
1637           PetscScalar val = mumps->id.redrhs[i + j * mumps->id.lredrhs].r + PETSC_i * mumps->id.redrhs[i + j * mumps->id.lredrhs].i;
1638 #endif
1639           array[mumps->id.listvar_schur[i] - 1 + j * M] = val;
1640         }
1641     }
1642     if (!denseB) { /* sparse B */
1643       PetscCall(MatSeqAIJRestoreArray(Bt, &aa));
1644       PetscCall(MatRestoreRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
1645       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure");
1646     }
1647     PetscCall(MatDenseRestoreArray(X, &array));
1648     PetscFunctionReturn(PETSC_SUCCESS);
1649   }
1650 
1651   /* parallel case: MUMPS requires rhs B to be centralized on the host! */
1652   PetscCheck(mumps->petsc_size <= 1 || !mumps->id.ICNTL(19), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc");
1653 
1654   /* create msol_loc to hold mumps local solution */
1655   isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */
1656   sol_loc_save  = (PetscScalar *)mumps->id.sol_loc;
1657 
1658   lsol_loc  = mumps->id.lsol_loc;
1659   nlsol_loc = nrhs * lsol_loc; /* length of sol_loc */
1660   PetscCall(PetscMalloc2(nlsol_loc, &sol_loc, lsol_loc, &isol_loc));
1661   mumps->id.sol_loc  = (MumpsScalar *)sol_loc;
1662   mumps->id.isol_loc = isol_loc;
1663 
1664   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nlsol_loc, (PetscScalar *)sol_loc, &msol_loc));
1665 
1666   if (denseB) {
1667     if (mumps->ICNTL20 == 10) {
1668       mumps->id.ICNTL(20) = 10; /* dense distributed RHS */
1669       PetscCall(MatDenseGetArrayRead(B, &rbray));
1670       PetscCall(MatMumpsSetUpDistRHSInfo(A, nrhs, rbray));
1671       PetscCall(MatDenseRestoreArrayRead(B, &rbray));
1672       PetscCall(MatGetLocalSize(B, &m, NULL));
1673       PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhs * M, NULL, &v_mpi));
1674     } else {
1675       mumps->id.ICNTL(20) = 0; /* dense centralized RHS */
1676       /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in
1677         very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank
1678         0, re-arrange B into desired order, which is a local operation.
1679       */
1680 
1681       /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */
1682       /* wrap dense rhs matrix B into a vector v_mpi */
1683       PetscCall(MatGetLocalSize(B, &m, NULL));
1684       PetscCall(MatDenseGetArray(B, &bray));
1685       PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhs * M, (const PetscScalar *)bray, &v_mpi));
1686       PetscCall(MatDenseRestoreArray(B, &bray));
1687 
1688       /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */
1689       if (!mumps->myid) {
1690         PetscInt *idx;
1691         /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */
1692         PetscCall(PetscMalloc1(nrhs * M, &idx));
1693         PetscCall(MatGetOwnershipRanges(B, &rstart));
1694         k = 0;
1695         for (proc = 0; proc < mumps->petsc_size; proc++) {
1696           for (j = 0; j < nrhs; j++) {
1697             for (i = rstart[proc]; i < rstart[proc + 1]; i++) idx[k++] = j * M + i;
1698           }
1699         }
1700 
1701         PetscCall(VecCreateSeq(PETSC_COMM_SELF, nrhs * M, &b_seq));
1702         PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nrhs * M, idx, PETSC_OWN_POINTER, &is_to));
1703         PetscCall(ISCreateStride(PETSC_COMM_SELF, nrhs * M, 0, 1, &is_from));
1704       } else {
1705         PetscCall(VecCreateSeq(PETSC_COMM_SELF, 0, &b_seq));
1706         PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_to));
1707         PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_from));
1708       }
1709       PetscCall(VecScatterCreate(v_mpi, is_from, b_seq, is_to, &scat_rhs));
1710       PetscCall(VecScatterBegin(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD));
1711       PetscCall(ISDestroy(&is_to));
1712       PetscCall(ISDestroy(&is_from));
1713       PetscCall(VecScatterEnd(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD));
1714 
1715       if (!mumps->myid) { /* define rhs on the host */
1716         PetscCall(VecGetArray(b_seq, &bray));
1717         mumps->id.rhs = (MumpsScalar *)bray;
1718         PetscCall(VecRestoreArray(b_seq, &bray));
1719       }
1720     }
1721   } else { /* sparse B */
1722     b = (Mat_MPIAIJ *)Bt->data;
1723 
1724     /* wrap dense X into a vector v_mpi */
1725     PetscCall(MatGetLocalSize(X, &m, NULL));
1726     PetscCall(MatDenseGetArray(X, &bray));
1727     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)X), 1, nrhs * m, nrhs * M, (const PetscScalar *)bray, &v_mpi));
1728     PetscCall(MatDenseRestoreArray(X, &bray));
1729 
1730     if (!mumps->myid) {
1731       PetscCall(MatSeqAIJGetArray(b->A, &aa));
1732       PetscCall(MatGetRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
1733       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
1734       PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs));
1735       mumps->id.rhs_sparse = (MumpsScalar *)aa;
1736     } else {
1737       mumps->id.irhs_ptr    = NULL;
1738       mumps->id.irhs_sparse = NULL;
1739       mumps->id.nz_rhs      = 0;
1740       mumps->id.rhs_sparse  = NULL;
1741     }
1742   }
1743 
1744   /* solve phase */
1745   mumps->id.job = JOB_SOLVE;
1746   PetscMUMPS_c(mumps);
1747   PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
1748 
1749   /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */
1750   PetscCall(MatDenseGetArray(X, &array));
1751   PetscCall(VecPlaceArray(v_mpi, array));
1752 
1753   /* create scatter scat_sol */
1754   PetscCall(MatGetOwnershipRanges(X, &rstart));
1755   /* iidx: index for scatter mumps solution to petsc X */
1756 
1757   PetscCall(ISCreateStride(PETSC_COMM_SELF, nlsol_loc, 0, 1, &is_from));
1758   PetscCall(PetscMalloc1(nlsol_loc, &idxx));
1759   for (i = 0; i < lsol_loc; i++) {
1760     isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */
1761 
1762     for (proc = 0; proc < mumps->petsc_size; proc++) {
1763       if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc + 1]) {
1764         myrstart = rstart[proc];
1765         k        = isol_loc[i] - myrstart;          /* local index on 1st column of petsc vector X */
1766         iidx     = k + myrstart * nrhs;             /* maps mumps isol_loc[i] to petsc index in X */
1767         m        = rstart[proc + 1] - rstart[proc]; /* rows of X for this proc */
1768         break;
1769       }
1770     }
1771 
1772     for (j = 0; j < nrhs; j++) idxx[i + j * lsol_loc] = iidx + j * m;
1773   }
1774   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nlsol_loc, idxx, PETSC_COPY_VALUES, &is_to));
1775   PetscCall(VecScatterCreate(msol_loc, is_from, v_mpi, is_to, &scat_sol));
1776   PetscCall(VecScatterBegin(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD));
1777   PetscCall(ISDestroy(&is_from));
1778   PetscCall(ISDestroy(&is_to));
1779   PetscCall(VecScatterEnd(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD));
1780   PetscCall(MatDenseRestoreArray(X, &array));
1781 
1782   /* free spaces */
1783   mumps->id.sol_loc  = (MumpsScalar *)sol_loc_save;
1784   mumps->id.isol_loc = isol_loc_save;
1785 
1786   PetscCall(PetscFree2(sol_loc, isol_loc));
1787   PetscCall(PetscFree(idxx));
1788   PetscCall(VecDestroy(&msol_loc));
1789   PetscCall(VecDestroy(&v_mpi));
1790   if (!denseB) {
1791     if (!mumps->myid) {
1792       b = (Mat_MPIAIJ *)Bt->data;
1793       PetscCall(MatSeqAIJRestoreArray(b->A, &aa));
1794       PetscCall(MatRestoreRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
1795       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure");
1796     }
1797   } else {
1798     if (mumps->ICNTL20 == 0) {
1799       PetscCall(VecDestroy(&b_seq));
1800       PetscCall(VecScatterDestroy(&scat_rhs));
1801     }
1802   }
1803   PetscCall(VecScatterDestroy(&scat_sol));
1804   PetscCall(PetscLogFlops(nrhs * PetscMax(0, (2.0 * (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n))));
1805   PetscFunctionReturn(PETSC_SUCCESS);
1806 }
1807 
1808 static PetscErrorCode MatMatSolveTranspose_MUMPS(Mat A, Mat B, Mat X)
1809 {
1810   Mat_MUMPS          *mumps = (Mat_MUMPS *)A->data;
1811   const PetscMUMPSInt value = mumps->id.ICNTL(9);
1812 
1813   PetscFunctionBegin;
1814   mumps->id.ICNTL(9) = 0;
1815   PetscCall(MatMatSolve_MUMPS(A, B, X));
1816   mumps->id.ICNTL(9) = value;
1817   PetscFunctionReturn(PETSC_SUCCESS);
1818 }
1819 
1820 static PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A, Mat Bt, Mat X)
1821 {
1822   PetscBool flg;
1823   Mat       B;
1824 
1825   PetscFunctionBegin;
1826   PetscCall(PetscObjectTypeCompareAny((PetscObject)Bt, &flg, MATSEQAIJ, MATMPIAIJ, NULL));
1827   PetscCheck(flg, PetscObjectComm((PetscObject)Bt), PETSC_ERR_ARG_WRONG, "Matrix Bt must be MATAIJ matrix");
1828 
1829   /* Create B=Bt^T that uses Bt's data structure */
1830   PetscCall(MatCreateTranspose(Bt, &B));
1831 
1832   PetscCall(MatMatSolve_MUMPS(A, B, X));
1833   PetscCall(MatDestroy(&B));
1834   PetscFunctionReturn(PETSC_SUCCESS);
1835 }
1836 
1837 #if !defined(PETSC_USE_COMPLEX)
1838 /*
1839   input:
1840    F:        numeric factor
1841   output:
1842    nneg:     total number of negative pivots
1843    nzero:    total number of zero pivots
1844    npos:     (global dimension of F) - nneg - nzero
1845 */
1846 static PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos)
1847 {
1848   Mat_MUMPS  *mumps = (Mat_MUMPS *)F->data;
1849   PetscMPIInt size;
1850 
1851   PetscFunctionBegin;
1852   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)F), &size));
1853   /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */
1854   PetscCheck(size <= 1 || mumps->id.ICNTL(13) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia", mumps->id.INFOG(13));
1855 
1856   if (nneg) *nneg = mumps->id.INFOG(12);
1857   if (nzero || npos) {
1858     PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection");
1859     if (nzero) *nzero = mumps->id.INFOG(28);
1860     if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28));
1861   }
1862   PetscFunctionReturn(PETSC_SUCCESS);
1863 }
1864 #endif
1865 
1866 static PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse, Mat_MUMPS *mumps)
1867 {
1868   PetscInt       i, nreqs;
1869   PetscMUMPSInt *irn, *jcn;
1870   PetscMPIInt    count;
1871   PetscInt64     totnnz, remain;
1872   const PetscInt osize = mumps->omp_comm_size;
1873   PetscScalar   *val;
1874 
1875   PetscFunctionBegin;
1876   if (osize > 1) {
1877     if (reuse == MAT_INITIAL_MATRIX) {
1878       /* master first gathers counts of nonzeros to receive */
1879       if (mumps->is_omp_master) PetscCall(PetscMalloc1(osize, &mumps->recvcount));
1880       PetscCallMPI(MPI_Gather(&mumps->nnz, 1, MPIU_INT64, mumps->recvcount, 1, MPIU_INT64, 0 /*master*/, mumps->omp_comm));
1881 
1882       /* Then each computes number of send/recvs */
1883       if (mumps->is_omp_master) {
1884         /* Start from 1 since self communication is not done in MPI */
1885         nreqs = 0;
1886         for (i = 1; i < osize; i++) nreqs += (mumps->recvcount[i] + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX;
1887       } else {
1888         nreqs = (mumps->nnz + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX;
1889       }
1890       PetscCall(PetscMalloc1(nreqs * 3, &mumps->reqs)); /* Triple the requests since we send irn, jcn and val separately */
1891 
1892       /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others.
1893          MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz
1894          might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size
1895          is very small, the current approach should have no extra overhead compared to MPI_Gatherv.
1896        */
1897       nreqs = 0; /* counter for actual send/recvs */
1898       if (mumps->is_omp_master) {
1899         for (i = 0, totnnz = 0; i < osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */
1900         PetscCall(PetscMalloc2(totnnz, &irn, totnnz, &jcn));
1901         PetscCall(PetscMalloc1(totnnz, &val));
1902 
1903         /* Self communication */
1904         PetscCall(PetscArraycpy(irn, mumps->irn, mumps->nnz));
1905         PetscCall(PetscArraycpy(jcn, mumps->jcn, mumps->nnz));
1906         PetscCall(PetscArraycpy(val, mumps->val, mumps->nnz));
1907 
1908         /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */
1909         PetscCall(PetscFree2(mumps->irn, mumps->jcn));
1910         PetscCall(PetscFree(mumps->val_alloc));
1911         mumps->nnz = totnnz;
1912         mumps->irn = irn;
1913         mumps->jcn = jcn;
1914         mumps->val = mumps->val_alloc = val;
1915 
1916         irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */
1917         jcn += mumps->recvcount[0];
1918         val += mumps->recvcount[0];
1919 
1920         /* Remote communication */
1921         for (i = 1; i < osize; i++) {
1922           count  = PetscMin(mumps->recvcount[i], PETSC_MPI_INT_MAX);
1923           remain = mumps->recvcount[i] - count;
1924           while (count > 0) {
1925             PetscCallMPI(MPI_Irecv(irn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1926             PetscCallMPI(MPI_Irecv(jcn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1927             PetscCallMPI(MPI_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1928             irn += count;
1929             jcn += count;
1930             val += count;
1931             count = PetscMin(remain, PETSC_MPI_INT_MAX);
1932             remain -= count;
1933           }
1934         }
1935       } else {
1936         irn    = mumps->irn;
1937         jcn    = mumps->jcn;
1938         val    = mumps->val;
1939         count  = PetscMin(mumps->nnz, PETSC_MPI_INT_MAX);
1940         remain = mumps->nnz - count;
1941         while (count > 0) {
1942           PetscCallMPI(MPI_Isend(irn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1943           PetscCallMPI(MPI_Isend(jcn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1944           PetscCallMPI(MPI_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1945           irn += count;
1946           jcn += count;
1947           val += count;
1948           count = PetscMin(remain, PETSC_MPI_INT_MAX);
1949           remain -= count;
1950         }
1951       }
1952     } else {
1953       nreqs = 0;
1954       if (mumps->is_omp_master) {
1955         val = mumps->val + mumps->recvcount[0];
1956         for (i = 1; i < osize; i++) { /* Remote communication only since self data is already in place */
1957           count  = PetscMin(mumps->recvcount[i], PETSC_MPI_INT_MAX);
1958           remain = mumps->recvcount[i] - count;
1959           while (count > 0) {
1960             PetscCallMPI(MPI_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1961             val += count;
1962             count = PetscMin(remain, PETSC_MPI_INT_MAX);
1963             remain -= count;
1964           }
1965         }
1966       } else {
1967         val    = mumps->val;
1968         count  = PetscMin(mumps->nnz, PETSC_MPI_INT_MAX);
1969         remain = mumps->nnz - count;
1970         while (count > 0) {
1971           PetscCallMPI(MPI_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1972           val += count;
1973           count = PetscMin(remain, PETSC_MPI_INT_MAX);
1974           remain -= count;
1975         }
1976       }
1977     }
1978     PetscCallMPI(MPI_Waitall(nreqs, mumps->reqs, MPI_STATUSES_IGNORE));
1979     mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */
1980   }
1981   PetscFunctionReturn(PETSC_SUCCESS);
1982 }
1983 
1984 static PetscErrorCode MatFactorNumeric_MUMPS(Mat F, Mat A, const MatFactorInfo *info)
1985 {
1986   Mat_MUMPS *mumps = (Mat_MUMPS *)(F)->data;
1987   PetscBool  isMPIAIJ;
1988 
1989   PetscFunctionBegin;
1990   if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) {
1991     if (mumps->id.INFOG(1) == -6) PetscCall(PetscInfo(A, "MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1992     PetscCall(PetscInfo(A, "MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1993     PetscFunctionReturn(PETSC_SUCCESS);
1994   }
1995 
1996   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps));
1997   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX, mumps));
1998 
1999   /* numerical factorization phase */
2000   mumps->id.job = JOB_FACTNUMERIC;
2001   if (!mumps->id.ICNTL(18)) { /* A is centralized */
2002     if (!mumps->myid) mumps->id.a = (MumpsScalar *)mumps->val;
2003   } else {
2004     mumps->id.a_loc = (MumpsScalar *)mumps->val;
2005   }
2006   PetscMUMPS_c(mumps);
2007   if (mumps->id.INFOG(1) < 0) {
2008     PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2));
2009     if (mumps->id.INFOG(1) == -10) {
2010       PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2011       F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
2012     } else if (mumps->id.INFOG(1) == -13) {
2013       PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, cannot allocate required memory %d megabytes\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2014       F->factorerrortype = MAT_FACTOR_OUTMEMORY;
2015     } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) {
2016       PetscCall(PetscInfo(F, "MUMPS error in numerical factorizatione: INFOG(1)=%d, INFO(2)=%d, problem with work array\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2017       F->factorerrortype = MAT_FACTOR_OUTMEMORY;
2018     } else {
2019       PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2020       F->factorerrortype = MAT_FACTOR_OTHER;
2021     }
2022   }
2023   PetscCheck(mumps->myid || mumps->id.ICNTL(16) <= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in numerical factorization: ICNTL(16)=%d " MUMPS_MANUALS, mumps->id.INFOG(16));
2024 
2025   F->assembled = PETSC_TRUE;
2026 
2027   if (F->schur) { /* reset Schur status to unfactored */
2028 #if defined(PETSC_HAVE_CUDA)
2029     F->schur->offloadmask = PETSC_OFFLOAD_CPU;
2030 #endif
2031     if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
2032       mumps->id.ICNTL(19) = 2;
2033       PetscCall(MatTranspose(F->schur, MAT_INPLACE_MATRIX, &F->schur));
2034     }
2035     PetscCall(MatFactorRestoreSchurComplement(F, NULL, MAT_FACTOR_SCHUR_UNFACTORED));
2036   }
2037 
2038   /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */
2039   if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3;
2040 
2041   if (!mumps->is_omp_master) mumps->id.INFO(23) = 0;
2042   if (mumps->petsc_size > 1) {
2043     PetscInt     lsol_loc;
2044     PetscScalar *sol_loc;
2045 
2046     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &isMPIAIJ));
2047 
2048     /* distributed solution; Create x_seq=sol_loc for repeated use */
2049     if (mumps->x_seq) {
2050       PetscCall(VecScatterDestroy(&mumps->scat_sol));
2051       PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc));
2052       PetscCall(VecDestroy(&mumps->x_seq));
2053     }
2054     lsol_loc = mumps->id.INFO(23); /* length of sol_loc */
2055     PetscCall(PetscMalloc2(lsol_loc, &sol_loc, lsol_loc, &mumps->id.isol_loc));
2056     mumps->id.lsol_loc = lsol_loc;
2057     mumps->id.sol_loc  = (MumpsScalar *)sol_loc;
2058     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, lsol_loc, sol_loc, &mumps->x_seq));
2059   }
2060   PetscCall(PetscLogFlops(mumps->id.RINFO(2)));
2061   PetscFunctionReturn(PETSC_SUCCESS);
2062 }
2063 
2064 /* Sets MUMPS options from the options database */
2065 static PetscErrorCode MatSetFromOptions_MUMPS(Mat F, Mat A)
2066 {
2067   Mat_MUMPS    *mumps = (Mat_MUMPS *)F->data;
2068   PetscMUMPSInt icntl = 0, size, *listvar_schur;
2069   PetscInt      info[80], i, ninfo = 80, rbs, cbs;
2070   PetscBool     flg = PETSC_FALSE, schur = (PetscBool)(mumps->id.ICNTL(26) == -1);
2071   MumpsScalar  *arr;
2072 
2073   PetscFunctionBegin;
2074   PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "MUMPS Options", "Mat");
2075   if (mumps->id.job == JOB_NULL) { /* MatSetFromOptions_MUMPS() has never been called before */
2076     PetscInt nthreads   = 0;
2077     PetscInt nCNTL_pre  = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0;
2078     PetscInt nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0;
2079 
2080     mumps->petsc_comm = PetscObjectComm((PetscObject)A);
2081     PetscCallMPI(MPI_Comm_size(mumps->petsc_comm, &mumps->petsc_size));
2082     PetscCallMPI(MPI_Comm_rank(mumps->petsc_comm, &mumps->myid)); /* "if (!myid)" still works even if mumps_comm is different */
2083 
2084     PetscCall(PetscOptionsName("-mat_mumps_use_omp_threads", "Convert MPI processes into OpenMP threads", "None", &mumps->use_petsc_omp_support));
2085     if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */
2086     /* do not use PetscOptionsInt() so that the option -mat_mumps_use_omp_threads is not displayed twice in the help */
2087     PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)F)->prefix, "-mat_mumps_use_omp_threads", &nthreads, NULL));
2088     if (mumps->use_petsc_omp_support) {
2089       PetscCheck(PetscDefined(HAVE_OPENMP_SUPPORT), PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "The system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual",
2090                  ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : "");
2091       PetscCheck(!schur, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use -%smat_mumps_use_omp_threads with the Schur complement feature", ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : "");
2092 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
2093       PetscCall(PetscOmpCtrlCreate(mumps->petsc_comm, nthreads, &mumps->omp_ctrl));
2094       PetscCall(PetscOmpCtrlGetOmpComms(mumps->omp_ctrl, &mumps->omp_comm, &mumps->mumps_comm, &mumps->is_omp_master));
2095 #endif
2096     } else {
2097       mumps->omp_comm      = PETSC_COMM_SELF;
2098       mumps->mumps_comm    = mumps->petsc_comm;
2099       mumps->is_omp_master = PETSC_TRUE;
2100     }
2101     PetscCallMPI(MPI_Comm_size(mumps->omp_comm, &mumps->omp_comm_size));
2102     mumps->reqs = NULL;
2103     mumps->tag  = 0;
2104 
2105     if (mumps->mumps_comm != MPI_COMM_NULL) {
2106       if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) {
2107         /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */
2108         MPI_Comm comm;
2109         PetscCallMPI(MPI_Comm_dup(mumps->mumps_comm, &comm));
2110         mumps->mumps_comm = comm;
2111       } else PetscCall(PetscCommGetComm(mumps->petsc_comm, &mumps->mumps_comm));
2112     }
2113 
2114     mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm);
2115     mumps->id.job          = JOB_INIT;
2116     mumps->id.par          = 1; /* host participates factorizaton and solve */
2117     mumps->id.sym          = mumps->sym;
2118 
2119     size          = mumps->id.size_schur;
2120     arr           = mumps->id.schur;
2121     listvar_schur = mumps->id.listvar_schur;
2122     PetscMUMPS_c(mumps);
2123     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
2124     /* restore cached ICNTL and CNTL values */
2125     for (icntl = 0; icntl < nICNTL_pre; ++icntl) mumps->id.ICNTL(mumps->ICNTL_pre[1 + 2 * icntl]) = mumps->ICNTL_pre[2 + 2 * icntl];
2126     for (icntl = 0; icntl < nCNTL_pre; ++icntl) mumps->id.CNTL((PetscInt)mumps->CNTL_pre[1 + 2 * icntl]) = mumps->CNTL_pre[2 + 2 * icntl];
2127     PetscCall(PetscFree(mumps->ICNTL_pre));
2128     PetscCall(PetscFree(mumps->CNTL_pre));
2129 
2130     if (schur) {
2131       mumps->id.size_schur    = size;
2132       mumps->id.schur_lld     = size;
2133       mumps->id.schur         = arr;
2134       mumps->id.listvar_schur = listvar_schur;
2135       if (mumps->petsc_size > 1) {
2136         PetscBool gs; /* gs is false if any rank other than root has non-empty IS */
2137 
2138         mumps->id.ICNTL(19) = 1;                                                                            /* MUMPS returns Schur centralized on the host */
2139         gs                  = mumps->myid ? (mumps->id.size_schur ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */
2140         PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &gs, 1, MPIU_BOOL, MPI_LAND, mumps->petsc_comm));
2141         PetscCheck(gs, PETSC_COMM_SELF, PETSC_ERR_SUP, "MUMPS distributed parallel Schur complements not yet supported from PETSc");
2142       } else {
2143         if (F->factortype == MAT_FACTOR_LU) {
2144           mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */
2145         } else {
2146           mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */
2147         }
2148       }
2149       mumps->id.ICNTL(26) = -1;
2150     }
2151 
2152     /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code.
2153        For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS.
2154      */
2155     PetscCallMPI(MPI_Bcast(mumps->id.icntl, 40, MPI_INT, 0, mumps->omp_comm));
2156     PetscCallMPI(MPI_Bcast(mumps->id.cntl, 15, MPIU_REAL, 0, mumps->omp_comm));
2157 
2158     mumps->scat_rhs = NULL;
2159     mumps->scat_sol = NULL;
2160 
2161     /* set PETSc-MUMPS default options - override MUMPS default */
2162     mumps->id.ICNTL(3) = 0;
2163     mumps->id.ICNTL(4) = 0;
2164     if (mumps->petsc_size == 1) {
2165       mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */
2166       mumps->id.ICNTL(7)  = 7; /* automatic choice of ordering done by the package */
2167     } else {
2168       mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */
2169       mumps->id.ICNTL(21) = 1; /* distributed solution */
2170     }
2171   }
2172   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_1", "ICNTL(1): output stream for error messages", "None", mumps->id.ICNTL(1), &icntl, &flg));
2173   if (flg) mumps->id.ICNTL(1) = icntl;
2174   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_2", "ICNTL(2): output stream for diagnostic printing, statistics, and warning", "None", mumps->id.ICNTL(2), &icntl, &flg));
2175   if (flg) mumps->id.ICNTL(2) = icntl;
2176   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_3", "ICNTL(3): output stream for global information, collected on the host", "None", mumps->id.ICNTL(3), &icntl, &flg));
2177   if (flg) mumps->id.ICNTL(3) = icntl;
2178 
2179   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_4", "ICNTL(4): level of printing (0 to 4)", "None", mumps->id.ICNTL(4), &icntl, &flg));
2180   if (flg) mumps->id.ICNTL(4) = icntl;
2181   if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */
2182 
2183   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_6", "ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)", "None", mumps->id.ICNTL(6), &icntl, &flg));
2184   if (flg) mumps->id.ICNTL(6) = icntl;
2185 
2186   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_7", "ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)", "None", mumps->id.ICNTL(7), &icntl, &flg));
2187   if (flg) {
2188     PetscCheck(icntl != 1 && icntl >= 0 && icntl <= 7, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto");
2189     mumps->id.ICNTL(7) = icntl;
2190   }
2191 
2192   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_8", "ICNTL(8): scaling strategy (-2 to 8 or 77)", "None", mumps->id.ICNTL(8), &mumps->id.ICNTL(8), NULL));
2193   /* PetscCall(PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL)); handled by MatSolveTranspose_MUMPS() */
2194   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_10", "ICNTL(10): max num of refinements", "None", mumps->id.ICNTL(10), &mumps->id.ICNTL(10), NULL));
2195   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_11", "ICNTL(11): statistics related to an error analysis (via -ksp_view)", "None", mumps->id.ICNTL(11), &mumps->id.ICNTL(11), NULL));
2196   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_12", "ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)", "None", mumps->id.ICNTL(12), &mumps->id.ICNTL(12), NULL));
2197   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_13", "ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting", "None", mumps->id.ICNTL(13), &mumps->id.ICNTL(13), NULL));
2198   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_14", "ICNTL(14): percentage increase in the estimated working space", "None", mumps->id.ICNTL(14), &mumps->id.ICNTL(14), NULL));
2199   PetscCall(MatGetBlockSizes(A, &rbs, &cbs));
2200   if (rbs == cbs && rbs > 1) mumps->id.ICNTL(15) = -rbs;
2201   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_15", "ICNTL(15): compression of the input matrix resulting from a block format", "None", mumps->id.ICNTL(15), &mumps->id.ICNTL(15), &flg));
2202   if (flg) {
2203     PetscCheck(mumps->id.ICNTL(15) <= 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Positive -mat_mumps_icntl_15 not handled");
2204     PetscCheck((-mumps->id.ICNTL(15) % cbs == 0) && (-mumps->id.ICNTL(15) % rbs == 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "The opposite of -mat_mumps_icntl_15 must be a multiple of the column and row blocksizes");
2205   }
2206   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_19", "ICNTL(19): computes the Schur complement", "None", mumps->id.ICNTL(19), &mumps->id.ICNTL(19), NULL));
2207   if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */
2208     PetscCall(MatDestroy(&F->schur));
2209     PetscCall(MatMumpsResetSchur_Private(mumps));
2210   }
2211 
2212   /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps",
2213      and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran
2214      and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...'
2215      This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug,
2216      see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590.
2217      In short, we could not use distributed RHS until with MPICH v4.0b1 or we enabled a workaround in mumps-5.6.2+
2218    */
2219 #if PETSC_PKG_MUMPS_VERSION_GE(5, 6, 2) && defined(PETSC_HAVE_MUMPS_AVOID_MPI_IN_PLACE)
2220   mumps->ICNTL20 = 10;
2221 #elif PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) || (defined(PETSC_HAVE_MPICH_NUMVERSION) && (PETSC_HAVE_MPICH_NUMVERSION < 40000101))
2222   mumps->ICNTL20     = 0; /* Centralized dense RHS*/
2223 #else
2224   mumps->ICNTL20 = 10; /* Distributed dense RHS*/
2225 #endif
2226   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_20", "ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides", "None", mumps->ICNTL20, &mumps->ICNTL20, &flg));
2227   PetscCheck(!flg || mumps->ICNTL20 == 10 || mumps->ICNTL20 == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10", (int)mumps->ICNTL20);
2228 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0)
2229   PetscCheck(!flg || mumps->ICNTL20 != 10, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=10 is not supported before MUMPS-5.3.0");
2230 #endif
2231   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL)); we only use distributed solution vector */
2232 
2233   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_22", "ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)", "None", mumps->id.ICNTL(22), &mumps->id.ICNTL(22), NULL));
2234   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_23", "ICNTL(23): max size of the working memory (MB) that can allocate per processor", "None", mumps->id.ICNTL(23), &mumps->id.ICNTL(23), NULL));
2235   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_24", "ICNTL(24): detection of null pivot rows (0 or 1)", "None", mumps->id.ICNTL(24), &mumps->id.ICNTL(24), NULL));
2236   if (mumps->id.ICNTL(24)) { mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ }
2237 
2238   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_25", "ICNTL(25): computes a solution of a deficient matrix and a null space basis", "None", mumps->id.ICNTL(25), &mumps->id.ICNTL(25), NULL));
2239   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_26", "ICNTL(26): drives the solution phase if a Schur complement matrix", "None", mumps->id.ICNTL(26), &mumps->id.ICNTL(26), NULL));
2240   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_27", "ICNTL(27): controls the blocking size for multiple right-hand sides", "None", mumps->id.ICNTL(27), &mumps->id.ICNTL(27), NULL));
2241   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_28", "ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering", "None", mumps->id.ICNTL(28), &mumps->id.ICNTL(28), NULL));
2242   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_29", "ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis", "None", mumps->id.ICNTL(29), &mumps->id.ICNTL(29), NULL));
2243   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL)); */ /* call MatMumpsGetInverse() directly */
2244   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_31", "ICNTL(31): indicates which factors may be discarded during factorization", "None", mumps->id.ICNTL(31), &mumps->id.ICNTL(31), NULL));
2245   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elimination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL));  -- not supported by PETSc API */
2246   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_33", "ICNTL(33): compute determinant", "None", mumps->id.ICNTL(33), &mumps->id.ICNTL(33), NULL));
2247   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_35", "ICNTL(35): activates Block Low Rank (BLR) based factorization", "None", mumps->id.ICNTL(35), &mumps->id.ICNTL(35), NULL));
2248   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_36", "ICNTL(36): choice of BLR factorization variant", "None", mumps->id.ICNTL(36), &mumps->id.ICNTL(36), NULL));
2249   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_38", "ICNTL(38): estimated compression rate of LU factors with BLR", "None", mumps->id.ICNTL(38), &mumps->id.ICNTL(38), NULL));
2250   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_58", "ICNTL(58): defines options for symbolic factorization", "None", mumps->id.ICNTL(58), &mumps->id.ICNTL(58), NULL));
2251 
2252   PetscCall(PetscOptionsReal("-mat_mumps_cntl_1", "CNTL(1): relative pivoting threshold", "None", mumps->id.CNTL(1), &mumps->id.CNTL(1), NULL));
2253   PetscCall(PetscOptionsReal("-mat_mumps_cntl_2", "CNTL(2): stopping criterion of refinement", "None", mumps->id.CNTL(2), &mumps->id.CNTL(2), NULL));
2254   PetscCall(PetscOptionsReal("-mat_mumps_cntl_3", "CNTL(3): absolute pivoting threshold", "None", mumps->id.CNTL(3), &mumps->id.CNTL(3), NULL));
2255   PetscCall(PetscOptionsReal("-mat_mumps_cntl_4", "CNTL(4): value for static pivoting", "None", mumps->id.CNTL(4), &mumps->id.CNTL(4), NULL));
2256   PetscCall(PetscOptionsReal("-mat_mumps_cntl_5", "CNTL(5): fixation for null pivots", "None", mumps->id.CNTL(5), &mumps->id.CNTL(5), NULL));
2257   PetscCall(PetscOptionsReal("-mat_mumps_cntl_7", "CNTL(7): dropping parameter used during BLR", "None", mumps->id.CNTL(7), &mumps->id.CNTL(7), NULL));
2258 
2259   PetscCall(PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL));
2260 
2261   PetscCall(PetscOptionsIntArray("-mat_mumps_view_info", "request INFO local to each processor", "", info, &ninfo, NULL));
2262   if (ninfo) {
2263     PetscCheck(ninfo <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "number of INFO %" PetscInt_FMT " must <= 80", ninfo);
2264     PetscCall(PetscMalloc1(ninfo, &mumps->info));
2265     mumps->ninfo = ninfo;
2266     for (i = 0; i < ninfo; i++) {
2267       PetscCheck(info[i] >= 0 && info[i] <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "index of INFO %" PetscInt_FMT " must between 1 and 80", ninfo);
2268       mumps->info[i] = info[i];
2269     }
2270   }
2271   PetscOptionsEnd();
2272   PetscFunctionReturn(PETSC_SUCCESS);
2273 }
2274 
2275 static PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F, Mat A, const MatFactorInfo *info, Mat_MUMPS *mumps)
2276 {
2277   PetscFunctionBegin;
2278   if (mumps->id.INFOG(1) < 0) {
2279     PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in analysis: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
2280     if (mumps->id.INFOG(1) == -6) {
2281       PetscCall(PetscInfo(F, "MUMPS error in analysis: matrix is singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2282       F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT;
2283     } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) {
2284       PetscCall(PetscInfo(F, "MUMPS error in analysis: problem with work array, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2285       F->factorerrortype = MAT_FACTOR_OUTMEMORY;
2286     } else if (mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0) {
2287       PetscCall(PetscInfo(F, "MUMPS error in analysis: empty matrix\n"));
2288     } else {
2289       PetscCall(PetscInfo(F, "MUMPS error in analysis: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS "\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2290       F->factorerrortype = MAT_FACTOR_OTHER;
2291     }
2292   }
2293   PetscFunctionReturn(PETSC_SUCCESS);
2294 }
2295 
2296 static PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info)
2297 {
2298   Mat_MUMPS     *mumps = (Mat_MUMPS *)F->data;
2299   Vec            b;
2300   const PetscInt M = A->rmap->N;
2301 
2302   PetscFunctionBegin;
2303   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
2304     /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */
2305     PetscFunctionReturn(PETSC_SUCCESS);
2306   }
2307 
2308   /* Set MUMPS options from the options database */
2309   PetscCall(MatSetFromOptions_MUMPS(F, A));
2310 
2311   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
2312   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps));
2313 
2314   /* analysis phase */
2315   mumps->id.job = JOB_FACTSYMBOLIC;
2316   mumps->id.n   = M;
2317   switch (mumps->id.ICNTL(18)) {
2318   case 0: /* centralized assembled matrix input */
2319     if (!mumps->myid) {
2320       mumps->id.nnz = mumps->nnz;
2321       mumps->id.irn = mumps->irn;
2322       mumps->id.jcn = mumps->jcn;
2323       if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val;
2324       if (r) {
2325         mumps->id.ICNTL(7) = 1;
2326         if (!mumps->myid) {
2327           const PetscInt *idx;
2328           PetscInt        i;
2329 
2330           PetscCall(PetscMalloc1(M, &mumps->id.perm_in));
2331           PetscCall(ISGetIndices(r, &idx));
2332           for (i = 0; i < M; i++) PetscCall(PetscMUMPSIntCast(idx[i] + 1, &(mumps->id.perm_in[i]))); /* perm_in[]: start from 1, not 0! */
2333           PetscCall(ISRestoreIndices(r, &idx));
2334         }
2335       }
2336     }
2337     break;
2338   case 3: /* distributed assembled matrix input (size>1) */
2339     mumps->id.nnz_loc = mumps->nnz;
2340     mumps->id.irn_loc = mumps->irn;
2341     mumps->id.jcn_loc = mumps->jcn;
2342     if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val;
2343     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2344       PetscCall(MatCreateVecs(A, NULL, &b));
2345       PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq));
2346       PetscCall(VecDestroy(&b));
2347     }
2348     break;
2349   }
2350   PetscMUMPS_c(mumps);
2351   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps));
2352 
2353   F->ops->lufactornumeric   = MatFactorNumeric_MUMPS;
2354   F->ops->solve             = MatSolve_MUMPS;
2355   F->ops->solvetranspose    = MatSolveTranspose_MUMPS;
2356   F->ops->matsolve          = MatMatSolve_MUMPS;
2357   F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS;
2358   F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS;
2359 
2360   mumps->matstruc = SAME_NONZERO_PATTERN;
2361   PetscFunctionReturn(PETSC_SUCCESS);
2362 }
2363 
2364 /* Note the Petsc r and c permutations are ignored */
2365 static PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info)
2366 {
2367   Mat_MUMPS     *mumps = (Mat_MUMPS *)F->data;
2368   Vec            b;
2369   const PetscInt M = A->rmap->N;
2370 
2371   PetscFunctionBegin;
2372   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
2373     /* F is assembled by a previous call of MatLUFactorSymbolic_BAIJMUMPS() */
2374     PetscFunctionReturn(PETSC_SUCCESS);
2375   }
2376 
2377   /* Set MUMPS options from the options database */
2378   PetscCall(MatSetFromOptions_MUMPS(F, A));
2379 
2380   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
2381   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps));
2382 
2383   /* analysis phase */
2384   mumps->id.job = JOB_FACTSYMBOLIC;
2385   mumps->id.n   = M;
2386   switch (mumps->id.ICNTL(18)) {
2387   case 0: /* centralized assembled matrix input */
2388     if (!mumps->myid) {
2389       mumps->id.nnz = mumps->nnz;
2390       mumps->id.irn = mumps->irn;
2391       mumps->id.jcn = mumps->jcn;
2392       if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val;
2393     }
2394     break;
2395   case 3: /* distributed assembled matrix input (size>1) */
2396     mumps->id.nnz_loc = mumps->nnz;
2397     mumps->id.irn_loc = mumps->irn;
2398     mumps->id.jcn_loc = mumps->jcn;
2399     if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val;
2400     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2401       PetscCall(MatCreateVecs(A, NULL, &b));
2402       PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq));
2403       PetscCall(VecDestroy(&b));
2404     }
2405     break;
2406   }
2407   PetscMUMPS_c(mumps);
2408   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps));
2409 
2410   F->ops->lufactornumeric   = MatFactorNumeric_MUMPS;
2411   F->ops->solve             = MatSolve_MUMPS;
2412   F->ops->solvetranspose    = MatSolveTranspose_MUMPS;
2413   F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS;
2414 
2415   mumps->matstruc = SAME_NONZERO_PATTERN;
2416   PetscFunctionReturn(PETSC_SUCCESS);
2417 }
2418 
2419 /* Note the Petsc r permutation and factor info are ignored */
2420 static PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F, Mat A, IS r, const MatFactorInfo *info)
2421 {
2422   Mat_MUMPS     *mumps = (Mat_MUMPS *)F->data;
2423   Vec            b;
2424   const PetscInt M = A->rmap->N;
2425 
2426   PetscFunctionBegin;
2427   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
2428     /* F is assembled by a previous call of MatCholeskyFactorSymbolic_MUMPS() */
2429     PetscFunctionReturn(PETSC_SUCCESS);
2430   }
2431 
2432   /* Set MUMPS options from the options database */
2433   PetscCall(MatSetFromOptions_MUMPS(F, A));
2434 
2435   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
2436   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps));
2437 
2438   /* analysis phase */
2439   mumps->id.job = JOB_FACTSYMBOLIC;
2440   mumps->id.n   = M;
2441   switch (mumps->id.ICNTL(18)) {
2442   case 0: /* centralized assembled matrix input */
2443     if (!mumps->myid) {
2444       mumps->id.nnz = mumps->nnz;
2445       mumps->id.irn = mumps->irn;
2446       mumps->id.jcn = mumps->jcn;
2447       if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val;
2448     }
2449     break;
2450   case 3: /* distributed assembled matrix input (size>1) */
2451     mumps->id.nnz_loc = mumps->nnz;
2452     mumps->id.irn_loc = mumps->irn;
2453     mumps->id.jcn_loc = mumps->jcn;
2454     if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val;
2455     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2456       PetscCall(MatCreateVecs(A, NULL, &b));
2457       PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq));
2458       PetscCall(VecDestroy(&b));
2459     }
2460     break;
2461   }
2462   PetscMUMPS_c(mumps);
2463   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps));
2464 
2465   F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS;
2466   F->ops->solve                 = MatSolve_MUMPS;
2467   F->ops->solvetranspose        = MatSolve_MUMPS;
2468   F->ops->matsolve              = MatMatSolve_MUMPS;
2469   F->ops->mattransposesolve     = MatMatTransposeSolve_MUMPS;
2470   F->ops->matsolvetranspose     = MatMatSolveTranspose_MUMPS;
2471 #if defined(PETSC_USE_COMPLEX)
2472   F->ops->getinertia = NULL;
2473 #else
2474   F->ops->getinertia = MatGetInertia_SBAIJMUMPS;
2475 #endif
2476 
2477   mumps->matstruc = SAME_NONZERO_PATTERN;
2478   PetscFunctionReturn(PETSC_SUCCESS);
2479 }
2480 
2481 static PetscErrorCode MatView_MUMPS(Mat A, PetscViewer viewer)
2482 {
2483   PetscBool         iascii;
2484   PetscViewerFormat format;
2485   Mat_MUMPS        *mumps = (Mat_MUMPS *)A->data;
2486 
2487   PetscFunctionBegin;
2488   /* check if matrix is mumps type */
2489   if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(PETSC_SUCCESS);
2490 
2491   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
2492   if (iascii) {
2493     PetscCall(PetscViewerGetFormat(viewer, &format));
2494     if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
2495       PetscCall(PetscViewerASCIIPrintf(viewer, "MUMPS run parameters:\n"));
2496       if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
2497         PetscCall(PetscViewerASCIIPrintf(viewer, "  SYM (matrix type):                   %d\n", mumps->id.sym));
2498         PetscCall(PetscViewerASCIIPrintf(viewer, "  PAR (host participation):            %d\n", mumps->id.par));
2499         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(1) (output for error):         %d\n", mumps->id.ICNTL(1)));
2500         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(2) (output of diagnostic msg): %d\n", mumps->id.ICNTL(2)));
2501         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(3) (output for global info):   %d\n", mumps->id.ICNTL(3)));
2502         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(4) (level of printing):        %d\n", mumps->id.ICNTL(4)));
2503         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(5) (input mat struct):         %d\n", mumps->id.ICNTL(5)));
2504         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(6) (matrix prescaling):        %d\n", mumps->id.ICNTL(6)));
2505         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(7) (sequential matrix ordering):%d\n", mumps->id.ICNTL(7)));
2506         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(8) (scaling strategy):         %d\n", mumps->id.ICNTL(8)));
2507         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(10) (max num of refinements):  %d\n", mumps->id.ICNTL(10)));
2508         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(11) (error analysis):          %d\n", mumps->id.ICNTL(11)));
2509         if (mumps->id.ICNTL(11) > 0) {
2510           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(4) (inf norm of input mat):        %g\n", mumps->id.RINFOG(4)));
2511           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(5) (inf norm of solution):         %g\n", mumps->id.RINFOG(5)));
2512           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(6) (inf norm of residual):         %g\n", mumps->id.RINFOG(6)));
2513           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(7),RINFOG(8) (backward error est): %g, %g\n", mumps->id.RINFOG(7), mumps->id.RINFOG(8)));
2514           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(9) (error estimate):               %g\n", mumps->id.RINFOG(9)));
2515           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n", mumps->id.RINFOG(10), mumps->id.RINFOG(11)));
2516         }
2517         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(12) (efficiency control):                         %d\n", mumps->id.ICNTL(12)));
2518         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(13) (sequential factorization of the root node):  %d\n", mumps->id.ICNTL(13)));
2519         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(14) (percentage of estimated workspace increase): %d\n", mumps->id.ICNTL(14)));
2520         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(15) (compression of the input matrix):            %d\n", mumps->id.ICNTL(15)));
2521         /* ICNTL(15-17) not used */
2522         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(18) (input mat struct):                           %d\n", mumps->id.ICNTL(18)));
2523         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(19) (Schur complement info):                      %d\n", mumps->id.ICNTL(19)));
2524         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(20) (RHS sparse pattern):                         %d\n", mumps->id.ICNTL(20)));
2525         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(21) (solution struct):                            %d\n", mumps->id.ICNTL(21)));
2526         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(22) (in-core/out-of-core facility):               %d\n", mumps->id.ICNTL(22)));
2527         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(23) (max size of memory can be allocated locally):%d\n", mumps->id.ICNTL(23)));
2528 
2529         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(24) (detection of null pivot rows):               %d\n", mumps->id.ICNTL(24)));
2530         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(25) (computation of a null space basis):          %d\n", mumps->id.ICNTL(25)));
2531         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(26) (Schur options for RHS or solution):          %d\n", mumps->id.ICNTL(26)));
2532         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(27) (blocking size for multiple RHS):             %d\n", mumps->id.ICNTL(27)));
2533         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(28) (use parallel or sequential ordering):        %d\n", mumps->id.ICNTL(28)));
2534         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(29) (parallel ordering):                          %d\n", mumps->id.ICNTL(29)));
2535 
2536         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(30) (user-specified set of entries in inv(A)):    %d\n", mumps->id.ICNTL(30)));
2537         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(31) (factors is discarded in the solve phase):    %d\n", mumps->id.ICNTL(31)));
2538         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(33) (compute determinant):                        %d\n", mumps->id.ICNTL(33)));
2539         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(35) (activate BLR based factorization):           %d\n", mumps->id.ICNTL(35)));
2540         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(36) (choice of BLR factorization variant):        %d\n", mumps->id.ICNTL(36)));
2541         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(38) (estimated compression rate of LU factors):   %d\n", mumps->id.ICNTL(38)));
2542         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(58) (options for symbolic factorization):         %d\n", mumps->id.ICNTL(58)));
2543 
2544         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(1) (relative pivoting threshold):      %g\n", mumps->id.CNTL(1)));
2545         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(2) (stopping criterion of refinement): %g\n", mumps->id.CNTL(2)));
2546         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(3) (absolute pivoting threshold):      %g\n", mumps->id.CNTL(3)));
2547         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(4) (value of static pivoting):         %g\n", mumps->id.CNTL(4)));
2548         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(5) (fixation for null pivots):         %g\n", mumps->id.CNTL(5)));
2549         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(7) (dropping parameter for BLR):       %g\n", mumps->id.CNTL(7)));
2550 
2551         /* information local to each processor */
2552         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(1) (local estimated flops for the elimination after analysis):\n"));
2553         PetscCall(PetscViewerASCIIPushSynchronized(viewer));
2554         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %g\n", mumps->myid, mumps->id.RINFO(1)));
2555         PetscCall(PetscViewerFlush(viewer));
2556         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(2) (local estimated flops for the assembly after factorization):\n"));
2557         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %g\n", mumps->myid, mumps->id.RINFO(2)));
2558         PetscCall(PetscViewerFlush(viewer));
2559         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(3) (local estimated flops for the elimination after factorization):\n"));
2560         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %g\n", mumps->myid, mumps->id.RINFO(3)));
2561         PetscCall(PetscViewerFlush(viewer));
2562 
2563         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization):\n"));
2564         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(15)));
2565         PetscCall(PetscViewerFlush(viewer));
2566 
2567         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization):\n"));
2568         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(16)));
2569         PetscCall(PetscViewerFlush(viewer));
2570 
2571         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(23) (num of pivots eliminated on this processor after factorization):\n"));
2572         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(23)));
2573         PetscCall(PetscViewerFlush(viewer));
2574 
2575         if (mumps->ninfo && mumps->ninfo <= 80) {
2576           PetscInt i;
2577           for (i = 0; i < mumps->ninfo; i++) {
2578             PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(%" PetscInt_FMT "):\n", mumps->info[i]));
2579             PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(mumps->info[i])));
2580             PetscCall(PetscViewerFlush(viewer));
2581           }
2582         }
2583         PetscCall(PetscViewerASCIIPopSynchronized(viewer));
2584       } else PetscCall(PetscViewerASCIIPrintf(viewer, "  Use -%sksp_view ::ascii_info_detail to display information for all processes\n", ((PetscObject)A)->prefix ? ((PetscObject)A)->prefix : ""));
2585 
2586       if (mumps->myid == 0) { /* information from the host */
2587         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFOG(1) (global estimated flops for the elimination after analysis): %g\n", mumps->id.RINFOG(1)));
2588         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFOG(2) (global estimated flops for the assembly after factorization): %g\n", mumps->id.RINFOG(2)));
2589         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFOG(3) (global estimated flops for the elimination after factorization): %g\n", mumps->id.RINFOG(3)));
2590         PetscCall(PetscViewerASCIIPrintf(viewer, "  (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n", mumps->id.RINFOG(12), mumps->id.RINFOG(13), mumps->id.INFOG(34)));
2591 
2592         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(3)));
2593         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(4)));
2594         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(5) (estimated maximum front size in the complete tree): %d\n", mumps->id.INFOG(5)));
2595         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(6) (number of nodes in the complete tree): %d\n", mumps->id.INFOG(6)));
2596         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(7) (ordering option effectively used after analysis): %d\n", mumps->id.INFOG(7)));
2597         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n", mumps->id.INFOG(8)));
2598         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n", mumps->id.INFOG(9)));
2599         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(10) (total integer space store the matrix factors after factorization): %d\n", mumps->id.INFOG(10)));
2600         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(11) (order of largest frontal matrix after factorization): %d\n", mumps->id.INFOG(11)));
2601         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(12) (number of off-diagonal pivots): %d\n", mumps->id.INFOG(12)));
2602         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(13) (number of delayed pivots after factorization): %d\n", mumps->id.INFOG(13)));
2603         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(14) (number of memory compress after factorization): %d\n", mumps->id.INFOG(14)));
2604         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(15) (number of steps of iterative refinement after solution): %d\n", mumps->id.INFOG(15)));
2605         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n", mumps->id.INFOG(16)));
2606         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n", mumps->id.INFOG(17)));
2607         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n", mumps->id.INFOG(18)));
2608         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n", mumps->id.INFOG(19)));
2609         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(20) (estimated number of entries in the factors): %d\n", mumps->id.INFOG(20)));
2610         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n", mumps->id.INFOG(21)));
2611         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n", mumps->id.INFOG(22)));
2612         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n", mumps->id.INFOG(23)));
2613         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n", mumps->id.INFOG(24)));
2614         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n", mumps->id.INFOG(25)));
2615         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(28) (after factorization: number of null pivots encountered): %d\n", mumps->id.INFOG(28)));
2616         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n", mumps->id.INFOG(29)));
2617         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n", mumps->id.INFOG(30), mumps->id.INFOG(31)));
2618         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(32) (after analysis: type of analysis done): %d\n", mumps->id.INFOG(32)));
2619         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(33) (value used for ICNTL(8)): %d\n", mumps->id.INFOG(33)));
2620         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(34) (exponent of the determinant if determinant is requested): %d\n", mumps->id.INFOG(34)));
2621         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n", mumps->id.INFOG(35)));
2622         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(36)));
2623         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n", mumps->id.INFOG(37)));
2624         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(38)));
2625         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n", mumps->id.INFOG(39)));
2626       }
2627     }
2628   }
2629   PetscFunctionReturn(PETSC_SUCCESS);
2630 }
2631 
2632 static PetscErrorCode MatGetInfo_MUMPS(Mat A, MatInfoType flag, MatInfo *info)
2633 {
2634   Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;
2635 
2636   PetscFunctionBegin;
2637   info->block_size        = 1.0;
2638   info->nz_allocated      = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20);
2639   info->nz_used           = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20);
2640   info->nz_unneeded       = 0.0;
2641   info->assemblies        = 0.0;
2642   info->mallocs           = 0.0;
2643   info->memory            = 0.0;
2644   info->fill_ratio_given  = 0;
2645   info->fill_ratio_needed = 0;
2646   info->factor_mallocs    = 0;
2647   PetscFunctionReturn(PETSC_SUCCESS);
2648 }
2649 
2650 static PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is)
2651 {
2652   Mat_MUMPS         *mumps = (Mat_MUMPS *)F->data;
2653   const PetscScalar *arr;
2654   const PetscInt    *idxs;
2655   PetscInt           size, i;
2656 
2657   PetscFunctionBegin;
2658   PetscCall(ISGetLocalSize(is, &size));
2659   /* Schur complement matrix */
2660   PetscCall(MatDestroy(&F->schur));
2661   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size, size, NULL, &F->schur));
2662   PetscCall(MatDenseGetArrayRead(F->schur, &arr));
2663   mumps->id.schur      = (MumpsScalar *)arr;
2664   mumps->id.size_schur = size;
2665   mumps->id.schur_lld  = size;
2666   PetscCall(MatDenseRestoreArrayRead(F->schur, &arr));
2667   if (mumps->sym == 1) PetscCall(MatSetOption(F->schur, MAT_SPD, PETSC_TRUE));
2668 
2669   /* MUMPS expects Fortran style indices */
2670   PetscCall(PetscFree(mumps->id.listvar_schur));
2671   PetscCall(PetscMalloc1(size, &mumps->id.listvar_schur));
2672   PetscCall(ISGetIndices(is, &idxs));
2673   for (i = 0; i < size; i++) PetscCall(PetscMUMPSIntCast(idxs[i] + 1, &(mumps->id.listvar_schur[i])));
2674   PetscCall(ISRestoreIndices(is, &idxs));
2675   /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */
2676   mumps->id.ICNTL(26) = -1;
2677   PetscFunctionReturn(PETSC_SUCCESS);
2678 }
2679 
2680 static PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F, Mat *S)
2681 {
2682   Mat          St;
2683   Mat_MUMPS   *mumps = (Mat_MUMPS *)F->data;
2684   PetscScalar *array;
2685 
2686   PetscFunctionBegin;
2687   PetscCheck(mumps->id.ICNTL(19), PetscObjectComm((PetscObject)F), PETSC_ERR_ORDER, "Schur complement mode not selected! Call MatFactorSetSchurIS() to enable it");
2688   PetscCall(MatCreate(PETSC_COMM_SELF, &St));
2689   PetscCall(MatSetSizes(St, PETSC_DECIDE, PETSC_DECIDE, mumps->id.size_schur, mumps->id.size_schur));
2690   PetscCall(MatSetType(St, MATDENSE));
2691   PetscCall(MatSetUp(St));
2692   PetscCall(MatDenseGetArray(St, &array));
2693   if (!mumps->sym) {                /* MUMPS always return a full matrix */
2694     if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
2695       PetscInt i, j, N = mumps->id.size_schur;
2696       for (i = 0; i < N; i++) {
2697         for (j = 0; j < N; j++) {
2698 #if !defined(PETSC_USE_COMPLEX)
2699           PetscScalar val = mumps->id.schur[i * N + j];
2700 #else
2701           PetscScalar val = mumps->id.schur[i * N + j].r + PETSC_i * mumps->id.schur[i * N + j].i;
2702 #endif
2703           array[j * N + i] = val;
2704         }
2705       }
2706     } else { /* stored by columns */
2707       PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur));
2708     }
2709   } else {                          /* either full or lower-triangular (not packed) */
2710     if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */
2711       PetscInt i, j, N = mumps->id.size_schur;
2712       for (i = 0; i < N; i++) {
2713         for (j = i; j < N; j++) {
2714 #if !defined(PETSC_USE_COMPLEX)
2715           PetscScalar val = mumps->id.schur[i * N + j];
2716 #else
2717           PetscScalar val = mumps->id.schur[i * N + j].r + PETSC_i * mumps->id.schur[i * N + j].i;
2718 #endif
2719           array[i * N + j] = array[j * N + i] = val;
2720         }
2721       }
2722     } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */
2723       PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur));
2724     } else { /* ICNTL(19) == 1 lower triangular stored by rows */
2725       PetscInt i, j, N = mumps->id.size_schur;
2726       for (i = 0; i < N; i++) {
2727         for (j = 0; j < i + 1; j++) {
2728 #if !defined(PETSC_USE_COMPLEX)
2729           PetscScalar val = mumps->id.schur[i * N + j];
2730 #else
2731           PetscScalar val = mumps->id.schur[i * N + j].r + PETSC_i * mumps->id.schur[i * N + j].i;
2732 #endif
2733           array[i * N + j] = array[j * N + i] = val;
2734         }
2735       }
2736     }
2737   }
2738   PetscCall(MatDenseRestoreArray(St, &array));
2739   *S = St;
2740   PetscFunctionReturn(PETSC_SUCCESS);
2741 }
2742 
2743 static PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt ival)
2744 {
2745   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2746 
2747   PetscFunctionBegin;
2748   if (mumps->id.job == JOB_NULL) {                                       /* need to cache icntl and ival since PetscMUMPS_c() has never been called */
2749     PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; /* number of already cached ICNTL */
2750     for (i = 0; i < nICNTL_pre; ++i)
2751       if (mumps->ICNTL_pre[1 + 2 * i] == icntl) break; /* is this ICNTL already cached? */
2752     if (i == nICNTL_pre) {                             /* not already cached */
2753       if (i > 0) PetscCall(PetscRealloc(sizeof(PetscMUMPSInt) * (2 * nICNTL_pre + 3), &mumps->ICNTL_pre));
2754       else PetscCall(PetscCalloc(sizeof(PetscMUMPSInt) * 3, &mumps->ICNTL_pre));
2755       mumps->ICNTL_pre[0]++;
2756     }
2757     mumps->ICNTL_pre[1 + 2 * i] = icntl;
2758     PetscCall(PetscMUMPSIntCast(ival, mumps->ICNTL_pre + 2 + 2 * i));
2759   } else PetscCall(PetscMUMPSIntCast(ival, &mumps->id.ICNTL(icntl)));
2760   PetscFunctionReturn(PETSC_SUCCESS);
2761 }
2762 
2763 static PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt *ival)
2764 {
2765   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2766 
2767   PetscFunctionBegin;
2768   if (mumps->id.job == JOB_NULL) {
2769     PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0;
2770     *ival = 0;
2771     for (i = 0; i < nICNTL_pre; ++i) {
2772       if (mumps->ICNTL_pre[1 + 2 * i] == icntl) *ival = mumps->ICNTL_pre[2 + 2 * i];
2773     }
2774   } else *ival = mumps->id.ICNTL(icntl);
2775   PetscFunctionReturn(PETSC_SUCCESS);
2776 }
2777 
2778 /*@
2779   MatMumpsSetIcntl - Set MUMPS parameter ICNTL()
2780 
2781   Logically Collective
2782 
2783   Input Parameters:
2784 + F     - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2785 . icntl - index of MUMPS parameter array ICNTL()
2786 - ival  - value of MUMPS ICNTL(icntl)
2787 
2788   Options Database Key:
2789 . -mat_mumps_icntl_<icntl> <ival> - change the option numbered icntl to ival
2790 
2791   Level: beginner
2792 
2793   References:
2794 .  * - MUMPS Users' Guide
2795 
2796 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2797 @*/
2798 PetscErrorCode MatMumpsSetIcntl(Mat F, PetscInt icntl, PetscInt ival)
2799 {
2800   PetscFunctionBegin;
2801   PetscValidType(F, 1);
2802   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2803   PetscValidLogicalCollectiveInt(F, icntl, 2);
2804   PetscValidLogicalCollectiveInt(F, ival, 3);
2805   PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl);
2806   PetscTryMethod(F, "MatMumpsSetIcntl_C", (Mat, PetscInt, PetscInt), (F, icntl, ival));
2807   PetscFunctionReturn(PETSC_SUCCESS);
2808 }
2809 
2810 /*@
2811   MatMumpsGetIcntl - Get MUMPS parameter ICNTL()
2812 
2813   Logically Collective
2814 
2815   Input Parameters:
2816 + F     - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2817 - icntl - index of MUMPS parameter array ICNTL()
2818 
2819   Output Parameter:
2820 . ival - value of MUMPS ICNTL(icntl)
2821 
2822   Level: beginner
2823 
2824   References:
2825 .  * - MUMPS Users' Guide
2826 
2827 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2828 @*/
2829 PetscErrorCode MatMumpsGetIcntl(Mat F, PetscInt icntl, PetscInt *ival)
2830 {
2831   PetscFunctionBegin;
2832   PetscValidType(F, 1);
2833   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2834   PetscValidLogicalCollectiveInt(F, icntl, 2);
2835   PetscAssertPointer(ival, 3);
2836   PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl);
2837   PetscUseMethod(F, "MatMumpsGetIcntl_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival));
2838   PetscFunctionReturn(PETSC_SUCCESS);
2839 }
2840 
2841 static PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal val)
2842 {
2843   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2844 
2845   PetscFunctionBegin;
2846   if (mumps->id.job == JOB_NULL) {
2847     PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0;
2848     for (i = 0; i < nCNTL_pre; ++i)
2849       if (mumps->CNTL_pre[1 + 2 * i] == icntl) break;
2850     if (i == nCNTL_pre) {
2851       if (i > 0) PetscCall(PetscRealloc(sizeof(PetscReal) * (2 * nCNTL_pre + 3), &mumps->CNTL_pre));
2852       else PetscCall(PetscCalloc(sizeof(PetscReal) * 3, &mumps->CNTL_pre));
2853       mumps->CNTL_pre[0]++;
2854     }
2855     mumps->CNTL_pre[1 + 2 * i] = icntl;
2856     mumps->CNTL_pre[2 + 2 * i] = val;
2857   } else mumps->id.CNTL(icntl) = val;
2858   PetscFunctionReturn(PETSC_SUCCESS);
2859 }
2860 
2861 static PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal *val)
2862 {
2863   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2864 
2865   PetscFunctionBegin;
2866   if (mumps->id.job == JOB_NULL) {
2867     PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0;
2868     *val = 0.0;
2869     for (i = 0; i < nCNTL_pre; ++i) {
2870       if (mumps->CNTL_pre[1 + 2 * i] == icntl) *val = mumps->CNTL_pre[2 + 2 * i];
2871     }
2872   } else *val = mumps->id.CNTL(icntl);
2873   PetscFunctionReturn(PETSC_SUCCESS);
2874 }
2875 
2876 /*@
2877   MatMumpsSetCntl - Set MUMPS parameter CNTL()
2878 
2879   Logically Collective
2880 
2881   Input Parameters:
2882 + F     - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2883 . icntl - index of MUMPS parameter array CNTL()
2884 - val   - value of MUMPS CNTL(icntl)
2885 
2886   Options Database Key:
2887 . -mat_mumps_cntl_<icntl> <val> - change the option numbered icntl to ival
2888 
2889   Level: beginner
2890 
2891   References:
2892 .  * - MUMPS Users' Guide
2893 
2894 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2895 @*/
2896 PetscErrorCode MatMumpsSetCntl(Mat F, PetscInt icntl, PetscReal val)
2897 {
2898   PetscFunctionBegin;
2899   PetscValidType(F, 1);
2900   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2901   PetscValidLogicalCollectiveInt(F, icntl, 2);
2902   PetscValidLogicalCollectiveReal(F, val, 3);
2903   PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl);
2904   PetscTryMethod(F, "MatMumpsSetCntl_C", (Mat, PetscInt, PetscReal), (F, icntl, val));
2905   PetscFunctionReturn(PETSC_SUCCESS);
2906 }
2907 
2908 /*@
2909   MatMumpsGetCntl - Get MUMPS parameter CNTL()
2910 
2911   Logically Collective
2912 
2913   Input Parameters:
2914 + F     - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2915 - icntl - index of MUMPS parameter array CNTL()
2916 
2917   Output Parameter:
2918 . val - value of MUMPS CNTL(icntl)
2919 
2920   Level: beginner
2921 
2922   References:
2923 .  * - MUMPS Users' Guide
2924 
2925 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2926 @*/
2927 PetscErrorCode MatMumpsGetCntl(Mat F, PetscInt icntl, PetscReal *val)
2928 {
2929   PetscFunctionBegin;
2930   PetscValidType(F, 1);
2931   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2932   PetscValidLogicalCollectiveInt(F, icntl, 2);
2933   PetscAssertPointer(val, 3);
2934   PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl);
2935   PetscUseMethod(F, "MatMumpsGetCntl_C", (Mat, PetscInt, PetscReal *), (F, icntl, val));
2936   PetscFunctionReturn(PETSC_SUCCESS);
2937 }
2938 
2939 static PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F, PetscInt icntl, PetscInt *info)
2940 {
2941   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2942 
2943   PetscFunctionBegin;
2944   *info = mumps->id.INFO(icntl);
2945   PetscFunctionReturn(PETSC_SUCCESS);
2946 }
2947 
2948 static PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F, PetscInt icntl, PetscInt *infog)
2949 {
2950   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2951 
2952   PetscFunctionBegin;
2953   *infog = mumps->id.INFOG(icntl);
2954   PetscFunctionReturn(PETSC_SUCCESS);
2955 }
2956 
2957 static PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfo)
2958 {
2959   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2960 
2961   PetscFunctionBegin;
2962   *rinfo = mumps->id.RINFO(icntl);
2963   PetscFunctionReturn(PETSC_SUCCESS);
2964 }
2965 
2966 static PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfog)
2967 {
2968   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2969 
2970   PetscFunctionBegin;
2971   *rinfog = mumps->id.RINFOG(icntl);
2972   PetscFunctionReturn(PETSC_SUCCESS);
2973 }
2974 
2975 static PetscErrorCode MatMumpsGetNullPivots_MUMPS(Mat F, PetscInt *size, PetscInt **array)
2976 {
2977   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2978 
2979   PetscFunctionBegin;
2980   PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection");
2981   *size  = 0;
2982   *array = NULL;
2983   if (!mumps->myid) {
2984     *size = mumps->id.INFOG(28);
2985     PetscCall(PetscMalloc1(*size, array));
2986     for (int i = 0; i < *size; i++) (*array)[i] = mumps->id.pivnul_list[i] - 1;
2987   }
2988   PetscFunctionReturn(PETSC_SUCCESS);
2989 }
2990 
2991 static PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F, Mat spRHS)
2992 {
2993   Mat          Bt = NULL, Btseq = NULL;
2994   PetscBool    flg;
2995   Mat_MUMPS   *mumps = (Mat_MUMPS *)F->data;
2996   PetscScalar *aa;
2997   PetscInt     spnr, *ia, *ja, M, nrhs;
2998 
2999   PetscFunctionBegin;
3000   PetscAssertPointer(spRHS, 2);
3001   PetscCall(PetscObjectTypeCompare((PetscObject)spRHS, MATTRANSPOSEVIRTUAL, &flg));
3002   if (flg) {
3003     PetscCall(MatTransposeGetMat(spRHS, &Bt));
3004   } else SETERRQ(PetscObjectComm((PetscObject)spRHS), PETSC_ERR_ARG_WRONG, "Matrix spRHS must be type MATTRANSPOSEVIRTUAL matrix");
3005 
3006   PetscCall(MatMumpsSetIcntl(F, 30, 1));
3007 
3008   if (mumps->petsc_size > 1) {
3009     Mat_MPIAIJ *b = (Mat_MPIAIJ *)Bt->data;
3010     Btseq         = b->A;
3011   } else {
3012     Btseq = Bt;
3013   }
3014 
3015   PetscCall(MatGetSize(spRHS, &M, &nrhs));
3016   mumps->id.nrhs = nrhs;
3017   mumps->id.lrhs = M;
3018   mumps->id.rhs  = NULL;
3019 
3020   if (!mumps->myid) {
3021     PetscCall(MatSeqAIJGetArray(Btseq, &aa));
3022     PetscCall(MatGetRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
3023     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
3024     PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs));
3025     mumps->id.rhs_sparse = (MumpsScalar *)aa;
3026   } else {
3027     mumps->id.irhs_ptr    = NULL;
3028     mumps->id.irhs_sparse = NULL;
3029     mumps->id.nz_rhs      = 0;
3030     mumps->id.rhs_sparse  = NULL;
3031   }
3032   mumps->id.ICNTL(20) = 1; /* rhs is sparse */
3033   mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */
3034 
3035   /* solve phase */
3036   mumps->id.job = JOB_SOLVE;
3037   PetscMUMPS_c(mumps);
3038   PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2));
3039 
3040   if (!mumps->myid) {
3041     PetscCall(MatSeqAIJRestoreArray(Btseq, &aa));
3042     PetscCall(MatRestoreRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
3043     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
3044   }
3045   PetscFunctionReturn(PETSC_SUCCESS);
3046 }
3047 
3048 /*@
3049   MatMumpsGetInverse - Get user-specified set of entries in inverse of `A`
3050 
3051   Logically Collective
3052 
3053   Input Parameter:
3054 . F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
3055 
3056   Output Parameter:
3057 . spRHS - sequential sparse matrix in `MATTRANSPOSEVIRTUAL` format with requested entries of inverse of `A`
3058 
3059   Level: beginner
3060 
3061   References:
3062 .  * - MUMPS Users' Guide
3063 
3064 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()`
3065 @*/
3066 PetscErrorCode MatMumpsGetInverse(Mat F, Mat spRHS)
3067 {
3068   PetscFunctionBegin;
3069   PetscValidType(F, 1);
3070   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3071   PetscUseMethod(F, "MatMumpsGetInverse_C", (Mat, Mat), (F, spRHS));
3072   PetscFunctionReturn(PETSC_SUCCESS);
3073 }
3074 
3075 static PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F, Mat spRHST)
3076 {
3077   Mat spRHS;
3078 
3079   PetscFunctionBegin;
3080   PetscCall(MatCreateTranspose(spRHST, &spRHS));
3081   PetscCall(MatMumpsGetInverse_MUMPS(F, spRHS));
3082   PetscCall(MatDestroy(&spRHS));
3083   PetscFunctionReturn(PETSC_SUCCESS);
3084 }
3085 
3086 /*@
3087   MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix `A`^T
3088 
3089   Logically Collective
3090 
3091   Input Parameter:
3092 . F - the factored matrix of A obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
3093 
3094   Output Parameter:
3095 . spRHST - sequential sparse matrix in `MATAIJ` format containing the requested entries of inverse of `A`^T
3096 
3097   Level: beginner
3098 
3099   References:
3100 .  * - MUMPS Users' Guide
3101 
3102 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()`, `MatMumpsGetInverse()`
3103 @*/
3104 PetscErrorCode MatMumpsGetInverseTranspose(Mat F, Mat spRHST)
3105 {
3106   PetscBool flg;
3107 
3108   PetscFunctionBegin;
3109   PetscValidType(F, 1);
3110   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3111   PetscCall(PetscObjectTypeCompareAny((PetscObject)spRHST, &flg, MATSEQAIJ, MATMPIAIJ, NULL));
3112   PetscCheck(flg, PetscObjectComm((PetscObject)spRHST), PETSC_ERR_ARG_WRONG, "Matrix spRHST must be MATAIJ matrix");
3113 
3114   PetscUseMethod(F, "MatMumpsGetInverseTranspose_C", (Mat, Mat), (F, spRHST));
3115   PetscFunctionReturn(PETSC_SUCCESS);
3116 }
3117 
3118 /*@
3119   MatMumpsGetInfo - Get MUMPS parameter INFO()
3120 
3121   Logically Collective
3122 
3123   Input Parameters:
3124 + F     - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
3125 - icntl - index of MUMPS parameter array INFO()
3126 
3127   Output Parameter:
3128 . ival - value of MUMPS INFO(icntl)
3129 
3130   Level: beginner
3131 
3132   References:
3133 .  * - MUMPS Users' Guide
3134 
3135 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
3136 @*/
3137 PetscErrorCode MatMumpsGetInfo(Mat F, PetscInt icntl, PetscInt *ival)
3138 {
3139   PetscFunctionBegin;
3140   PetscValidType(F, 1);
3141   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3142   PetscAssertPointer(ival, 3);
3143   PetscUseMethod(F, "MatMumpsGetInfo_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival));
3144   PetscFunctionReturn(PETSC_SUCCESS);
3145 }
3146 
3147 /*@
3148   MatMumpsGetInfog - Get MUMPS parameter INFOG()
3149 
3150   Logically Collective
3151 
3152   Input Parameters:
3153 + F     - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
3154 - icntl - index of MUMPS parameter array INFOG()
3155 
3156   Output Parameter:
3157 . ival - value of MUMPS INFOG(icntl)
3158 
3159   Level: beginner
3160 
3161   References:
3162 .  * - MUMPS Users' Guide
3163 
3164 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
3165 @*/
3166 PetscErrorCode MatMumpsGetInfog(Mat F, PetscInt icntl, PetscInt *ival)
3167 {
3168   PetscFunctionBegin;
3169   PetscValidType(F, 1);
3170   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3171   PetscAssertPointer(ival, 3);
3172   PetscUseMethod(F, "MatMumpsGetInfog_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival));
3173   PetscFunctionReturn(PETSC_SUCCESS);
3174 }
3175 
3176 /*@
3177   MatMumpsGetRinfo - Get MUMPS parameter RINFO()
3178 
3179   Logically Collective
3180 
3181   Input Parameters:
3182 + F     - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
3183 - icntl - index of MUMPS parameter array RINFO()
3184 
3185   Output Parameter:
3186 . val - value of MUMPS RINFO(icntl)
3187 
3188   Level: beginner
3189 
3190   References:
3191 .  * - MUMPS Users' Guide
3192 
3193 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfog()`
3194 @*/
3195 PetscErrorCode MatMumpsGetRinfo(Mat F, PetscInt icntl, PetscReal *val)
3196 {
3197   PetscFunctionBegin;
3198   PetscValidType(F, 1);
3199   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3200   PetscAssertPointer(val, 3);
3201   PetscUseMethod(F, "MatMumpsGetRinfo_C", (Mat, PetscInt, PetscReal *), (F, icntl, val));
3202   PetscFunctionReturn(PETSC_SUCCESS);
3203 }
3204 
3205 /*@
3206   MatMumpsGetRinfog - Get MUMPS parameter RINFOG()
3207 
3208   Logically Collective
3209 
3210   Input Parameters:
3211 + F     - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
3212 - icntl - index of MUMPS parameter array RINFOG()
3213 
3214   Output Parameter:
3215 . val - value of MUMPS RINFOG(icntl)
3216 
3217   Level: beginner
3218 
3219   References:
3220 .  * - MUMPS Users' Guide
3221 
3222 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`
3223 @*/
3224 PetscErrorCode MatMumpsGetRinfog(Mat F, PetscInt icntl, PetscReal *val)
3225 {
3226   PetscFunctionBegin;
3227   PetscValidType(F, 1);
3228   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3229   PetscAssertPointer(val, 3);
3230   PetscUseMethod(F, "MatMumpsGetRinfog_C", (Mat, PetscInt, PetscReal *), (F, icntl, val));
3231   PetscFunctionReturn(PETSC_SUCCESS);
3232 }
3233 
3234 /*@
3235   MatMumpsGetNullPivots - Get MUMPS parameter PIVNUL_LIST()
3236 
3237   Logically Collective
3238 
3239   Input Parameter:
3240 . F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
3241 
3242   Output Parameters:
3243 + size  - local size of the array. The size of the array is non-zero only on the host.
3244 - array - array of rows with null pivot, these rows follow 0-based indexing. The array gets allocated within the function and the user is responsible
3245            for freeing this array.
3246 
3247   Level: beginner
3248 
3249   References:
3250 .  * - MUMPS Users' Guide
3251 
3252 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`
3253 @*/
3254 PetscErrorCode MatMumpsGetNullPivots(Mat F, PetscInt *size, PetscInt **array)
3255 {
3256   PetscFunctionBegin;
3257   PetscValidType(F, 1);
3258   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3259   PetscAssertPointer(size, 2);
3260   PetscAssertPointer(array, 3);
3261   PetscUseMethod(F, "MatMumpsGetNullPivots_C", (Mat, PetscInt *, PetscInt **), (F, size, array));
3262   PetscFunctionReturn(PETSC_SUCCESS);
3263 }
3264 
3265 /*MC
3266   MATSOLVERMUMPS -  A matrix type providing direct solvers (LU and Cholesky) for
3267   distributed and sequential matrices via the external package MUMPS.
3268 
3269   Works with `MATAIJ` and `MATSBAIJ` matrices
3270 
3271   Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS
3272 
3273   Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode.
3274   See details below.
3275 
3276   Use `-pc_type cholesky` or `lu` `-pc_factor_mat_solver_type mumps` to use this direct solver
3277 
3278   Options Database Keys:
3279 +  -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages
3280 .  -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning
3281 .  -mat_mumps_icntl_3 -  ICNTL(3): output stream for global information, collected on the host
3282 .  -mat_mumps_icntl_4 -  ICNTL(4): level of printing (0 to 4)
3283 .  -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)
3284 .  -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto
3285                         Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only)
3286 .  -mat_mumps_icntl_8  - ICNTL(8): scaling strategy (-2 to 8 or 77)
3287 .  -mat_mumps_icntl_10  - ICNTL(10): max num of refinements
3288 .  -mat_mumps_icntl_11  - ICNTL(11): statistics related to an error analysis (via -ksp_view)
3289 .  -mat_mumps_icntl_12  - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)
3290 .  -mat_mumps_icntl_13  - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting
3291 .  -mat_mumps_icntl_14  - ICNTL(14): percentage increase in the estimated working space
3292 .  -mat_mumps_icntl_15  - ICNTL(15): compression of the input matrix resulting from a block format
3293 .  -mat_mumps_icntl_19  - ICNTL(19): computes the Schur complement
3294 .  -mat_mumps_icntl_20  - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS
3295 .  -mat_mumps_icntl_22  - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)
3296 .  -mat_mumps_icntl_23  - ICNTL(23): max size of the working memory (MB) that can allocate per processor
3297 .  -mat_mumps_icntl_24  - ICNTL(24): detection of null pivot rows (0 or 1)
3298 .  -mat_mumps_icntl_25  - ICNTL(25): compute a solution of a deficient matrix and a null space basis
3299 .  -mat_mumps_icntl_26  - ICNTL(26): drives the solution phase if a Schur complement matrix
3300 .  -mat_mumps_icntl_28  - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering
3301 .  -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis
3302 .  -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A)
3303 .  -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization
3304 .  -mat_mumps_icntl_33 - ICNTL(33): compute determinant
3305 .  -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature
3306 .  -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant
3307 .  -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR
3308 .  -mat_mumps_icntl_58 - ICNTL(58): options for symbolic factorization
3309 .  -mat_mumps_cntl_1  - CNTL(1): relative pivoting threshold
3310 .  -mat_mumps_cntl_2  -  CNTL(2): stopping criterion of refinement
3311 .  -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold
3312 .  -mat_mumps_cntl_4 - CNTL(4): value for static pivoting
3313 .  -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots
3314 .  -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization
3315 -  -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS.
3316                                    Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual.
3317 
3318   Level: beginner
3319 
3320     Notes:
3321     MUMPS Cholesky does not handle (complex) Hermitian matrices (see User's Guide at https://mumps-solver.org/index.php?page=doc) so using it will
3322     error if the matrix is Hermitian.
3323 
3324     When used within a `KSP`/`PC` solve the options are prefixed with that of the `PC`. Otherwise one can set the options prefix by calling
3325     `MatSetOptionsPrefixFactor()` on the matrix from which the factor was obtained or `MatSetOptionsPrefix()` on the factor matrix.
3326 
3327     When a MUMPS factorization fails inside a KSP solve, for example with a `KSP_DIVERGED_PC_FAILED`, one can find the MUMPS information about
3328     the failure with
3329 .vb
3330           KSPGetPC(ksp,&pc);
3331           PCFactorGetMatrix(pc,&mat);
3332           MatMumpsGetInfo(mat,....);
3333           MatMumpsGetInfog(mat,....); etc.
3334 .ve
3335     Or run with `-ksp_error_if_not_converged` and the program will be stopped and the information printed in the error message.
3336 
3337     MUMPS provides 64-bit integer support in two build modes:
3338       full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and
3339       requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI).
3340 
3341       selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices,
3342       MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and
3343       columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit
3344       integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS.
3345 
3346     With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc.
3347 
3348   Two modes to run MUMPS/PETSc with OpenMP
3349 .vb
3350      Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP
3351      threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test".
3352 .ve
3353 
3354 .vb
3355      -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example,
3356     if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16"
3357 .ve
3358 
3359    To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part
3360    (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with `--with-openmp` `--download-hwloc`
3361    (or `--with-hwloc`), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS
3362    libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS
3363    (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided).
3364 
3365    If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI
3366    processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of
3367    size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm
3368    are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set
3369    by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs.
3370    In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets,
3371    if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind
3372    MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half
3373    cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the
3374    problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding.
3375    For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and
3376    examine the mapping result.
3377 
3378    PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set `OMP_PROC_BIND` and `OMP_PLACES` in job scripts,
3379    for example, export `OMP_PLACES`=threads and export `OMP_PROC_BIND`=spread. One does not need to export `OMP_NUM_THREADS`=m in job scripts as PETSc
3380    calls `omp_set_num_threads`(m) internally before calling MUMPS.
3381 
3382    References:
3383 +  * - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011).
3384 -  * - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017.
3385 
3386 .seealso: [](ch_matrices), `Mat`, `PCFactorSetMatSolverType()`, `MatSolverType`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`, `KSPGetPC()`, `PCFactorGetMatrix()`
3387 M*/
3388 
3389 static PetscErrorCode MatFactorGetSolverType_mumps(Mat A, MatSolverType *type)
3390 {
3391   PetscFunctionBegin;
3392   *type = MATSOLVERMUMPS;
3393   PetscFunctionReturn(PETSC_SUCCESS);
3394 }
3395 
3396 /* MatGetFactor for Seq and MPI AIJ matrices */
3397 static PetscErrorCode MatGetFactor_aij_mumps(Mat A, MatFactorType ftype, Mat *F)
3398 {
3399   Mat         B;
3400   Mat_MUMPS  *mumps;
3401   PetscBool   isSeqAIJ, isDiag, isDense;
3402   PetscMPIInt size;
3403 
3404   PetscFunctionBegin;
3405 #if defined(PETSC_USE_COMPLEX)
3406   if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) {
3407     PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n"));
3408     *F = NULL;
3409     PetscFunctionReturn(PETSC_SUCCESS);
3410   }
3411 #endif
3412   /* Create the factorization matrix */
3413   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isSeqAIJ));
3414   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATDIAGONAL, &isDiag));
3415   PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &isDense, MATSEQDENSE, MATMPIDENSE, NULL));
3416   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
3417   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
3418   PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name));
3419   PetscCall(MatSetUp(B));
3420 
3421   PetscCall(PetscNew(&mumps));
3422 
3423   B->ops->view    = MatView_MUMPS;
3424   B->ops->getinfo = MatGetInfo_MUMPS;
3425 
3426   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
3427   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
3428   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
3429   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
3430   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
3431   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
3432   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
3433   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
3434   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
3435   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
3436   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
3437   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
3438   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
3439   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
3440 
3441   if (ftype == MAT_FACTOR_LU) {
3442     B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
3443     B->factortype            = MAT_FACTOR_LU;
3444     if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij;
3445     else if (isDiag) mumps->ConvertToTriples = MatConvertToTriples_diagonal_xaij;
3446     else if (isDense) mumps->ConvertToTriples = MatConvertToTriples_dense_xaij;
3447     else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij;
3448     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]));
3449     mumps->sym = 0;
3450   } else {
3451     B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
3452     B->factortype                  = MAT_FACTOR_CHOLESKY;
3453     if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij;
3454     else if (isDiag) mumps->ConvertToTriples = MatConvertToTriples_diagonal_xaij;
3455     else if (isDense) mumps->ConvertToTriples = MatConvertToTriples_dense_xaij;
3456     else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij;
3457     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY]));
3458 #if defined(PETSC_USE_COMPLEX)
3459     mumps->sym = 2;
3460 #else
3461     if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1;
3462     else mumps->sym = 2;
3463 #endif
3464   }
3465 
3466   /* set solvertype */
3467   PetscCall(PetscFree(B->solvertype));
3468   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
3469   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
3470   if (size == 1) {
3471     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3472     B->canuseordering = PETSC_TRUE;
3473   }
3474   B->ops->destroy = MatDestroy_MUMPS;
3475   B->data         = (void *)mumps;
3476 
3477   *F               = B;
3478   mumps->id.job    = JOB_NULL;
3479   mumps->ICNTL_pre = NULL;
3480   mumps->CNTL_pre  = NULL;
3481   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
3482   PetscFunctionReturn(PETSC_SUCCESS);
3483 }
3484 
3485 /* MatGetFactor for Seq and MPI SBAIJ matrices */
3486 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A, MatFactorType ftype, Mat *F)
3487 {
3488   Mat         B;
3489   Mat_MUMPS  *mumps;
3490   PetscBool   isSeqSBAIJ;
3491   PetscMPIInt size;
3492 
3493   PetscFunctionBegin;
3494 #if defined(PETSC_USE_COMPLEX)
3495   if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) {
3496     PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n"));
3497     *F = NULL;
3498     PetscFunctionReturn(PETSC_SUCCESS);
3499   }
3500 #endif
3501   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
3502   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
3503   PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name));
3504   PetscCall(MatSetUp(B));
3505 
3506   PetscCall(PetscNew(&mumps));
3507   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &isSeqSBAIJ));
3508   if (isSeqSBAIJ) {
3509     mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij;
3510   } else {
3511     mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij;
3512   }
3513 
3514   B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
3515   B->ops->view                   = MatView_MUMPS;
3516   B->ops->getinfo                = MatGetInfo_MUMPS;
3517 
3518   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
3519   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
3520   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
3521   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
3522   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
3523   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
3524   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
3525   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
3526   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
3527   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
3528   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
3529   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
3530   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
3531   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
3532 
3533   B->factortype = MAT_FACTOR_CHOLESKY;
3534 #if defined(PETSC_USE_COMPLEX)
3535   mumps->sym = 2;
3536 #else
3537   if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1;
3538   else mumps->sym = 2;
3539 #endif
3540 
3541   /* set solvertype */
3542   PetscCall(PetscFree(B->solvertype));
3543   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
3544   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
3545   if (size == 1) {
3546     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3547     B->canuseordering = PETSC_TRUE;
3548   }
3549   PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY]));
3550   B->ops->destroy = MatDestroy_MUMPS;
3551   B->data         = (void *)mumps;
3552 
3553   *F               = B;
3554   mumps->id.job    = JOB_NULL;
3555   mumps->ICNTL_pre = NULL;
3556   mumps->CNTL_pre  = NULL;
3557   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
3558   PetscFunctionReturn(PETSC_SUCCESS);
3559 }
3560 
3561 static PetscErrorCode MatGetFactor_baij_mumps(Mat A, MatFactorType ftype, Mat *F)
3562 {
3563   Mat         B;
3564   Mat_MUMPS  *mumps;
3565   PetscBool   isSeqBAIJ;
3566   PetscMPIInt size;
3567 
3568   PetscFunctionBegin;
3569   /* Create the factorization matrix */
3570   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &isSeqBAIJ));
3571   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
3572   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
3573   PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name));
3574   PetscCall(MatSetUp(B));
3575 
3576   PetscCall(PetscNew(&mumps));
3577   if (ftype == MAT_FACTOR_LU) {
3578     B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS;
3579     B->factortype            = MAT_FACTOR_LU;
3580     if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij;
3581     else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij;
3582     mumps->sym = 0;
3583     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]));
3584   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead");
3585 
3586   B->ops->view    = MatView_MUMPS;
3587   B->ops->getinfo = MatGetInfo_MUMPS;
3588 
3589   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
3590   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
3591   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
3592   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
3593   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
3594   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
3595   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
3596   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
3597   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
3598   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
3599   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
3600   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
3601   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
3602   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
3603 
3604   /* set solvertype */
3605   PetscCall(PetscFree(B->solvertype));
3606   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
3607   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
3608   if (size == 1) {
3609     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3610     B->canuseordering = PETSC_TRUE;
3611   }
3612   B->ops->destroy = MatDestroy_MUMPS;
3613   B->data         = (void *)mumps;
3614 
3615   *F               = B;
3616   mumps->id.job    = JOB_NULL;
3617   mumps->ICNTL_pre = NULL;
3618   mumps->CNTL_pre  = NULL;
3619   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
3620   PetscFunctionReturn(PETSC_SUCCESS);
3621 }
3622 
3623 /* MatGetFactor for Seq and MPI SELL matrices */
3624 static PetscErrorCode MatGetFactor_sell_mumps(Mat A, MatFactorType ftype, Mat *F)
3625 {
3626   Mat         B;
3627   Mat_MUMPS  *mumps;
3628   PetscBool   isSeqSELL;
3629   PetscMPIInt size;
3630 
3631   PetscFunctionBegin;
3632   /* Create the factorization matrix */
3633   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSELL, &isSeqSELL));
3634   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
3635   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
3636   PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name));
3637   PetscCall(MatSetUp(B));
3638 
3639   PetscCall(PetscNew(&mumps));
3640 
3641   B->ops->view    = MatView_MUMPS;
3642   B->ops->getinfo = MatGetInfo_MUMPS;
3643 
3644   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
3645   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
3646   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
3647   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
3648   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
3649   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
3650   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
3651   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
3652   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
3653   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
3654   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
3655   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
3656 
3657   if (ftype == MAT_FACTOR_LU) {
3658     B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
3659     B->factortype            = MAT_FACTOR_LU;
3660     if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij;
3661     else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented");
3662     mumps->sym = 0;
3663     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]));
3664   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented");
3665 
3666   /* set solvertype */
3667   PetscCall(PetscFree(B->solvertype));
3668   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
3669   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
3670   if (size == 1) {
3671     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization  */
3672     B->canuseordering = PETSC_TRUE;
3673   }
3674   B->ops->destroy = MatDestroy_MUMPS;
3675   B->data         = (void *)mumps;
3676 
3677   *F               = B;
3678   mumps->id.job    = JOB_NULL;
3679   mumps->ICNTL_pre = NULL;
3680   mumps->CNTL_pre  = NULL;
3681   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
3682   PetscFunctionReturn(PETSC_SUCCESS);
3683 }
3684 
3685 /* MatGetFactor for MATNEST matrices */
3686 static PetscErrorCode MatGetFactor_nest_mumps(Mat A, MatFactorType ftype, Mat *F)
3687 {
3688   Mat         B, **mats;
3689   Mat_MUMPS  *mumps;
3690   PetscInt    nr, nc;
3691   PetscMPIInt size;
3692   PetscBool   flg = PETSC_TRUE;
3693 
3694   PetscFunctionBegin;
3695 #if defined(PETSC_USE_COMPLEX)
3696   if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) {
3697     PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n"));
3698     *F = NULL;
3699     PetscFunctionReturn(PETSC_SUCCESS);
3700   }
3701 #endif
3702 
3703   /* Return if some condition is not satisfied */
3704   *F = NULL;
3705   PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats));
3706   if (ftype == MAT_FACTOR_CHOLESKY) {
3707     IS       *rows, *cols;
3708     PetscInt *m, *M;
3709 
3710     PetscCheck(nr == nc, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MAT_FACTOR_CHOLESKY not supported for nest sizes %" PetscInt_FMT " != %" PetscInt_FMT ". Use MAT_FACTOR_LU.", nr, nc);
3711     PetscCall(PetscMalloc2(nr, &rows, nc, &cols));
3712     PetscCall(MatNestGetISs(A, rows, cols));
3713     for (PetscInt r = 0; flg && r < nr; r++) PetscCall(ISEqualUnsorted(rows[r], cols[r], &flg));
3714     if (!flg) {
3715       PetscCall(PetscFree2(rows, cols));
3716       PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for unequal row and column maps. Use MAT_FACTOR_LU.\n"));
3717       PetscFunctionReturn(PETSC_SUCCESS);
3718     }
3719     PetscCall(PetscMalloc2(nr, &m, nr, &M));
3720     for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetMinMax(rows[r], &m[r], &M[r]));
3721     for (PetscInt r = 0; flg && r < nr; r++)
3722       for (PetscInt k = r + 1; flg && k < nr; k++)
3723         if ((m[k] <= m[r] && m[r] <= M[k]) || (m[k] <= M[r] && M[r] <= M[k])) flg = PETSC_FALSE;
3724     PetscCall(PetscFree2(m, M));
3725     PetscCall(PetscFree2(rows, cols));
3726     if (!flg) {
3727       PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for intersecting row maps. Use MAT_FACTOR_LU.\n"));
3728       PetscFunctionReturn(PETSC_SUCCESS);
3729     }
3730   }
3731 
3732   for (PetscInt r = 0; r < nr; r++) {
3733     for (PetscInt c = 0; c < nc; c++) {
3734       Mat       sub = mats[r][c];
3735       PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isTrans, isDiag, isDense;
3736 
3737       if (!sub || (ftype == MAT_FACTOR_CHOLESKY && c < r)) continue;
3738       PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans));
3739       if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub));
3740       else {
3741         PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isTrans));
3742         if (isTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub));
3743       }
3744       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ));
3745       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ));
3746       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ));
3747       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ));
3748       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ));
3749       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ));
3750       PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATDIAGONAL, &isDiag));
3751       PetscCall(PetscObjectTypeCompareAny((PetscObject)sub, &isDense, MATSEQDENSE, MATMPIDENSE, NULL));
3752       if (ftype == MAT_FACTOR_CHOLESKY) {
3753         if (r == c) {
3754           if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isSeqSBAIJ && !isMPISBAIJ && !isDiag && !isDense) {
3755             PetscCall(PetscInfo(sub, "MAT_CHOLESKY_FACTOR not supported for diagonal block of type %s.\n", ((PetscObject)sub)->type_name));
3756             flg = PETSC_FALSE;
3757           }
3758         } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isDiag && !isDense) {
3759           PetscCall(PetscInfo(sub, "MAT_CHOLESKY_FACTOR not supported for off-diagonal block of type %s.\n", ((PetscObject)sub)->type_name));
3760           flg = PETSC_FALSE;
3761         }
3762       } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isDiag && !isDense) {
3763         PetscCall(PetscInfo(sub, "MAT_LU_FACTOR not supported for block of type %s.\n", ((PetscObject)sub)->type_name));
3764         flg = PETSC_FALSE;
3765       }
3766     }
3767   }
3768   if (!flg) PetscFunctionReturn(PETSC_SUCCESS);
3769 
3770   /* Create the factorization matrix */
3771   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
3772   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
3773   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name));
3774   PetscCall(MatSetUp(B));
3775 
3776   PetscCall(PetscNew(&mumps));
3777 
3778   B->ops->view    = MatView_MUMPS;
3779   B->ops->getinfo = MatGetInfo_MUMPS;
3780 
3781   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
3782   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
3783   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
3784   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
3785   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
3786   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
3787   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
3788   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
3789   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
3790   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
3791   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
3792   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
3793   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
3794   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
3795 
3796   if (ftype == MAT_FACTOR_LU) {
3797     B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
3798     B->factortype            = MAT_FACTOR_LU;
3799     mumps->sym               = 0;
3800   } else {
3801     B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
3802     B->factortype                  = MAT_FACTOR_CHOLESKY;
3803 #if defined(PETSC_USE_COMPLEX)
3804     mumps->sym = 2;
3805 #else
3806     if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1;
3807     else mumps->sym = 2;
3808 #endif
3809   }
3810   mumps->ConvertToTriples = MatConvertToTriples_nest_xaij;
3811   PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[ftype]));
3812 
3813   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
3814   if (size == 1) {
3815     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3816     B->canuseordering = PETSC_TRUE;
3817   }
3818 
3819   /* set solvertype */
3820   PetscCall(PetscFree(B->solvertype));
3821   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
3822   B->ops->destroy = MatDestroy_MUMPS;
3823   B->data         = (void *)mumps;
3824 
3825   *F               = B;
3826   mumps->id.job    = JOB_NULL;
3827   mumps->ICNTL_pre = NULL;
3828   mumps->CNTL_pre  = NULL;
3829   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
3830   PetscFunctionReturn(PETSC_SUCCESS);
3831 }
3832 
3833 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void)
3834 {
3835   PetscFunctionBegin;
3836   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
3837   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
3838   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps));
3839   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps));
3840   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPISBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps));
3841   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
3842   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
3843   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps));
3844   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps));
3845   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps));
3846   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSELL, MAT_FACTOR_LU, MatGetFactor_sell_mumps));
3847   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATDIAGONAL, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
3848   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATDIAGONAL, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
3849   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQDENSE, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
3850   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQDENSE, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
3851   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIDENSE, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
3852   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIDENSE, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
3853   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_LU, MatGetFactor_nest_mumps));
3854   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_CHOLESKY, MatGetFactor_nest_mumps));
3855   PetscFunctionReturn(PETSC_SUCCESS);
3856 }
3857