xref: /petsc/src/mat/impls/aij/mpi/mumps/mumps.c (revision 5307ee71759cdcd710de0c6001a0ecfe18451f2c)
1 /*
2     Provides an interface to the MUMPS sparse solver
3 */
4 #include <petscpkg_version.h>
5 #include <petscsf.h>
6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I  "petscmat.h"  I*/
7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
8 #include <../src/mat/impls/sell/mpi/mpisell.h>
9 
10 #define MUMPS_MANUALS "(see users manual https://mumps-solver.org/index.php?page=doc \"Error and warning diagnostics\")"
11 
12 EXTERN_C_BEGIN
13 #if defined(PETSC_USE_COMPLEX)
14   #if defined(PETSC_USE_REAL_SINGLE)
15     #include <cmumps_c.h>
16   #else
17     #include <zmumps_c.h>
18   #endif
19 #else
20   #if defined(PETSC_USE_REAL_SINGLE)
21     #include <smumps_c.h>
22   #else
23     #include <dmumps_c.h>
24   #endif
25 #endif
26 EXTERN_C_END
27 #define JOB_INIT         -1
28 #define JOB_NULL         0
29 #define JOB_FACTSYMBOLIC 1
30 #define JOB_FACTNUMERIC  2
31 #define JOB_SOLVE        3
32 #define JOB_END          -2
33 
34 /* calls to MUMPS */
35 #if defined(PETSC_USE_COMPLEX)
36   #if defined(PETSC_USE_REAL_SINGLE)
37     #define MUMPS_c cmumps_c
38   #else
39     #define MUMPS_c zmumps_c
40   #endif
41 #else
42   #if defined(PETSC_USE_REAL_SINGLE)
43     #define MUMPS_c smumps_c
44   #else
45     #define MUMPS_c dmumps_c
46   #endif
47 #endif
48 
49 /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for
50    number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the
51    naming convention in PetscMPIInt, PetscBLASInt etc.
52 */
53 typedef MUMPS_INT PetscMUMPSInt;
54 
55 #if PETSC_PKG_MUMPS_VERSION_GE(5, 3, 0)
56   #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */
57     #error "PETSc has not been tested with full 64-bit MUMPS and we choose to error out"
58   #endif
59 #else
60   #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */
61     #error "PETSc has not been tested with full 64-bit MUMPS and we choose to error out"
62   #endif
63 #endif
64 
65 #define MPIU_MUMPSINT       MPI_INT
66 #define PETSC_MUMPS_INT_MAX 2147483647
67 #define PETSC_MUMPS_INT_MIN -2147483648
68 
69 /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/
70 static inline PetscErrorCode PetscMUMPSIntCast(PetscCount a, PetscMUMPSInt *b)
71 {
72   PetscFunctionBegin;
73 #if PetscDefined(USE_64BIT_INDICES)
74   PetscAssert(a <= PETSC_MUMPS_INT_MAX && a >= PETSC_MUMPS_INT_MIN, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt");
75 #endif
76   *b = (PetscMUMPSInt)a;
77   PetscFunctionReturn(PETSC_SUCCESS);
78 }
79 
80 /* Put these utility routines here since they are only used in this file */
81 static inline PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems PetscOptionsObject, const char opt[], const char text[], const char man[], PetscMUMPSInt currentvalue, PetscMUMPSInt *value, PetscBool *set, PetscMUMPSInt lb, PetscMUMPSInt ub)
82 {
83   PetscInt  myval;
84   PetscBool myset;
85 
86   PetscFunctionBegin;
87   /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */
88   PetscCall(PetscOptionsInt_Private(PetscOptionsObject, opt, text, man, (PetscInt)currentvalue, &myval, &myset, lb, ub));
89   if (myset) PetscCall(PetscMUMPSIntCast(myval, value));
90   if (set) *set = myset;
91   PetscFunctionReturn(PETSC_SUCCESS);
92 }
93 #define PetscOptionsMUMPSInt(a, b, c, d, e, f) PetscOptionsMUMPSInt_Private(PetscOptionsObject, a, b, c, d, e, f, PETSC_MUMPS_INT_MIN, PETSC_MUMPS_INT_MAX)
94 
95 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */
96 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
97   #define PetscMUMPS_c(mumps) \
98     do { \
99       if (mumps->use_petsc_omp_support) { \
100         if (mumps->is_omp_master) { \
101           PetscCall(PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl)); \
102           PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \
103           PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \
104           PetscCall(PetscFPTrapPop()); \
105           PetscCall(PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl)); \
106         } \
107         PetscCall(PetscOmpCtrlBarrier(mumps->omp_ctrl)); \
108         /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific      \
109          to processes, so we only Bcast info[1], an error code and leave others (since they do not have   \
110          an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82.                   \
111          omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \
112       */ \
113         PetscCallMPI(MPI_Bcast(mumps->id.infog, PETSC_STATIC_ARRAY_LENGTH(mumps->id.infog), MPIU_MUMPSINT, 0, mumps->omp_comm)); \
114         PetscCallMPI(MPI_Bcast(mumps->id.rinfog, PETSC_STATIC_ARRAY_LENGTH(mumps->id.rinfog), MPIU_REAL, 0, mumps->omp_comm)); \
115         PetscCallMPI(MPI_Bcast(mumps->id.info, PETSC_STATIC_ARRAY_LENGTH(mumps->id.info), MPIU_MUMPSINT, 0, mumps->omp_comm)); \
116         PetscCallMPI(MPI_Bcast(mumps->id.rinfo, PETSC_STATIC_ARRAY_LENGTH(mumps->id.rinfo), MPIU_REAL, 0, mumps->omp_comm)); \
117       } else { \
118         PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \
119         PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \
120         PetscCall(PetscFPTrapPop()); \
121       } \
122     } while (0)
123 #else
124   #define PetscMUMPS_c(mumps) \
125     do { \
126       PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \
127       PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \
128       PetscCall(PetscFPTrapPop()); \
129     } while (0)
130 #endif
131 
132 /* declare MumpsScalar */
133 #if defined(PETSC_USE_COMPLEX)
134   #if defined(PETSC_USE_REAL_SINGLE)
135     #define MumpsScalar mumps_complex
136   #else
137     #define MumpsScalar mumps_double_complex
138   #endif
139 #else
140   #define MumpsScalar PetscScalar
141 #endif
142 
143 /* macros s.t. indices match MUMPS documentation */
144 #define ICNTL(I)  icntl[(I) - 1]
145 #define CNTL(I)   cntl[(I) - 1]
146 #define INFOG(I)  infog[(I) - 1]
147 #define INFO(I)   info[(I) - 1]
148 #define RINFOG(I) rinfog[(I) - 1]
149 #define RINFO(I)  rinfo[(I) - 1]
150 
151 typedef struct Mat_MUMPS Mat_MUMPS;
152 struct Mat_MUMPS {
153 #if defined(PETSC_USE_COMPLEX)
154   #if defined(PETSC_USE_REAL_SINGLE)
155   CMUMPS_STRUC_C id;
156   #else
157   ZMUMPS_STRUC_C id;
158   #endif
159 #else
160   #if defined(PETSC_USE_REAL_SINGLE)
161   SMUMPS_STRUC_C id;
162   #else
163   DMUMPS_STRUC_C id;
164   #endif
165 #endif
166 
167   MatStructure   matstruc;
168   PetscMPIInt    myid, petsc_size;
169   PetscMUMPSInt *irn, *jcn;       /* the (i,j,v) triplets passed to mumps. */
170   PetscScalar   *val, *val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */
171   PetscCount     nnz;             /* number of nonzeros. The type is called selective 64-bit in mumps */
172   PetscMUMPSInt  sym;
173   MPI_Comm       mumps_comm;
174   PetscMUMPSInt *ICNTL_pre;
175   PetscReal     *CNTL_pre;
176   PetscMUMPSInt  ICNTL9_pre;         /* check if ICNTL(9) is changed from previous MatSolve */
177   VecScatter     scat_rhs, scat_sol; /* used by MatSolve() */
178   PetscMUMPSInt  ICNTL20;            /* use centralized (0) or distributed (10) dense RHS */
179   PetscMUMPSInt  lrhs_loc, nloc_rhs, *irhs_loc;
180 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
181   PetscInt    *rhs_nrow, max_nrhs;
182   PetscMPIInt *rhs_recvcounts, *rhs_disps;
183   PetscScalar *rhs_loc, *rhs_recvbuf;
184 #endif
185   Vec            b_seq, x_seq;
186   PetscInt       ninfo, *info; /* which INFO to display */
187   PetscInt       sizeredrhs;
188   PetscScalar   *schur_sol;
189   PetscInt       schur_sizesol;
190   PetscMUMPSInt *ia_alloc, *ja_alloc; /* work arrays used for the CSR struct for sparse rhs */
191   PetscCount     cur_ilen, cur_jlen;  /* current len of ia_alloc[], ja_alloc[] */
192   PetscErrorCode (*ConvertToTriples)(Mat, PetscInt, MatReuse, Mat_MUMPS *);
193 
194   /* Support for MATNEST */
195   PetscErrorCode (**nest_convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *);
196   PetscCount  *nest_vals_start;
197   PetscScalar *nest_vals;
198 
199   /* stuff used by petsc/mumps OpenMP support*/
200   PetscBool    use_petsc_omp_support;
201   PetscOmpCtrl omp_ctrl;             /* an OpenMP controller that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */
202   MPI_Comm     petsc_comm, omp_comm; /* petsc_comm is PETSc matrix's comm */
203   PetscCount  *recvcount;            /* a collection of nnz on omp_master */
204   PetscMPIInt  tag, omp_comm_size;
205   PetscBool    is_omp_master; /* is this rank the master of omp_comm */
206   MPI_Request *reqs;
207 };
208 
209 /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt.
210    Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices.
211  */
212 static PetscErrorCode PetscMUMPSIntCSRCast(PETSC_UNUSED Mat_MUMPS *mumps, PetscInt nrow, PetscInt *ia, PetscInt *ja, PetscMUMPSInt **ia_mumps, PetscMUMPSInt **ja_mumps, PetscMUMPSInt *nnz_mumps)
213 {
214   PetscInt nnz = ia[nrow] - 1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscCount since mumps only uses PetscMUMPSInt for rhs */
215 
216   PetscFunctionBegin;
217 #if defined(PETSC_USE_64BIT_INDICES)
218   {
219     PetscInt i;
220     if (nrow + 1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */
221       PetscCall(PetscFree(mumps->ia_alloc));
222       PetscCall(PetscMalloc1(nrow + 1, &mumps->ia_alloc));
223       mumps->cur_ilen = nrow + 1;
224     }
225     if (nnz > mumps->cur_jlen) {
226       PetscCall(PetscFree(mumps->ja_alloc));
227       PetscCall(PetscMalloc1(nnz, &mumps->ja_alloc));
228       mumps->cur_jlen = nnz;
229     }
230     for (i = 0; i < nrow + 1; i++) PetscCall(PetscMUMPSIntCast(ia[i], &mumps->ia_alloc[i]));
231     for (i = 0; i < nnz; i++) PetscCall(PetscMUMPSIntCast(ja[i], &mumps->ja_alloc[i]));
232     *ia_mumps = mumps->ia_alloc;
233     *ja_mumps = mumps->ja_alloc;
234   }
235 #else
236   *ia_mumps = ia;
237   *ja_mumps = ja;
238 #endif
239   PetscCall(PetscMUMPSIntCast(nnz, nnz_mumps));
240   PetscFunctionReturn(PETSC_SUCCESS);
241 }
242 
243 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS *mumps)
244 {
245   PetscFunctionBegin;
246   PetscCall(PetscFree(mumps->id.listvar_schur));
247   PetscCall(PetscFree(mumps->id.redrhs));
248   PetscCall(PetscFree(mumps->schur_sol));
249   mumps->id.size_schur = 0;
250   mumps->id.schur_lld  = 0;
251   mumps->id.ICNTL(19)  = 0;
252   PetscFunctionReturn(PETSC_SUCCESS);
253 }
254 
255 /* solve with rhs in mumps->id.redrhs and return in the same location */
256 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F)
257 {
258   Mat_MUMPS           *mumps = (Mat_MUMPS *)F->data;
259   Mat                  S, B, X;
260   MatFactorSchurStatus schurstatus;
261   PetscInt             sizesol;
262 
263   PetscFunctionBegin;
264   PetscCall(MatFactorFactorizeSchurComplement(F));
265   PetscCall(MatFactorGetSchurComplement(F, &S, &schurstatus));
266   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &B));
267   PetscCall(MatSetType(B, ((PetscObject)S)->type_name));
268 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
269   PetscCall(MatBindToCPU(B, S->boundtocpu));
270 #endif
271   switch (schurstatus) {
272   case MAT_FACTOR_SCHUR_FACTORED:
273     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &X));
274     PetscCall(MatSetType(X, ((PetscObject)S)->type_name));
275 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
276     PetscCall(MatBindToCPU(X, S->boundtocpu));
277 #endif
278     if (!mumps->id.ICNTL(9)) { /* transpose solve */
279       PetscCall(MatMatSolveTranspose(S, B, X));
280     } else {
281       PetscCall(MatMatSolve(S, B, X));
282     }
283     break;
284   case MAT_FACTOR_SCHUR_INVERTED:
285     sizesol = mumps->id.nrhs * mumps->id.size_schur;
286     if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) {
287       PetscCall(PetscFree(mumps->schur_sol));
288       PetscCall(PetscMalloc1(sizesol, &mumps->schur_sol));
289       mumps->schur_sizesol = sizesol;
290     }
291     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->schur_sol, &X));
292     PetscCall(MatSetType(X, ((PetscObject)S)->type_name));
293 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
294     PetscCall(MatBindToCPU(X, S->boundtocpu));
295 #endif
296     PetscCall(MatProductCreateWithMat(S, B, NULL, X));
297     if (!mumps->id.ICNTL(9)) { /* transpose solve */
298       PetscCall(MatProductSetType(X, MATPRODUCT_AtB));
299     } else {
300       PetscCall(MatProductSetType(X, MATPRODUCT_AB));
301     }
302     PetscCall(MatProductSetFromOptions(X));
303     PetscCall(MatProductSymbolic(X));
304     PetscCall(MatProductNumeric(X));
305 
306     PetscCall(MatCopy(X, B, SAME_NONZERO_PATTERN));
307     break;
308   default:
309     SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unhandled MatFactorSchurStatus %d", F->schur_status);
310   }
311   PetscCall(MatFactorRestoreSchurComplement(F, &S, schurstatus));
312   PetscCall(MatDestroy(&B));
313   PetscCall(MatDestroy(&X));
314   PetscFunctionReturn(PETSC_SUCCESS);
315 }
316 
317 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion)
318 {
319   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
320 
321   PetscFunctionBegin;
322   if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */
323     PetscFunctionReturn(PETSC_SUCCESS);
324   }
325   if (!expansion) { /* prepare for the condensation step */
326     PetscInt sizeredrhs = mumps->id.nrhs * mumps->id.size_schur;
327     /* allocate MUMPS internal array to store reduced right-hand sides */
328     if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) {
329       PetscCall(PetscFree(mumps->id.redrhs));
330       mumps->id.lredrhs = mumps->id.size_schur;
331       PetscCall(PetscMalloc1(mumps->id.nrhs * mumps->id.lredrhs, &mumps->id.redrhs));
332       mumps->sizeredrhs = mumps->id.nrhs * mumps->id.lredrhs;
333     }
334   } else { /* prepare for the expansion step */
335     /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */
336     PetscCall(MatMumpsSolveSchur_Private(F));
337     mumps->id.ICNTL(26) = 2; /* expansion phase */
338     PetscMUMPS_c(mumps);
339     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
340     /* restore defaults */
341     mumps->id.ICNTL(26) = -1;
342     /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */
343     if (mumps->id.nrhs > 1) {
344       PetscCall(PetscFree(mumps->id.redrhs));
345       mumps->id.lredrhs = 0;
346       mumps->sizeredrhs = 0;
347     }
348   }
349   PetscFunctionReturn(PETSC_SUCCESS);
350 }
351 
352 /*
353   MatConvertToTriples_A_B - convert PETSc matrix to triples: row[nz], col[nz], val[nz]
354 
355   input:
356     A       - matrix in aij,baij or sbaij format
357     shift   - 0: C style output triple; 1: Fortran style output triple.
358     reuse   - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple
359               MAT_REUSE_MATRIX:   only the values in v array are updated
360   output:
361     nnz     - dim of r, c, and v (number of local nonzero entries of A)
362     r, c, v - row and col index, matrix values (matrix triples)
363 
364   The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is
365   freed with PetscFree(mumps->irn);  This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means
366   that the PetscMalloc() cannot easily be replaced with a PetscMalloc3().
367 
368  */
369 
370 static PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
371 {
372   const PetscScalar *av;
373   const PetscInt    *ai, *aj, *ajj, M = A->rmap->n;
374   PetscCount         nz, rnz, k;
375   PetscMUMPSInt     *row, *col;
376   Mat_SeqAIJ        *aa = (Mat_SeqAIJ *)A->data;
377 
378   PetscFunctionBegin;
379   PetscCall(MatSeqAIJGetArrayRead(A, &av));
380   if (reuse == MAT_INITIAL_MATRIX) {
381     nz = aa->nz;
382     ai = aa->i;
383     aj = aa->j;
384     PetscCall(PetscMalloc2(nz, &row, nz, &col));
385     for (PetscCount i = k = 0; i < M; i++) {
386       rnz = ai[i + 1] - ai[i];
387       ajj = aj + ai[i];
388       for (PetscCount j = 0; j < rnz; j++) {
389         PetscCall(PetscMUMPSIntCast(i + shift, &row[k]));
390         PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[k]));
391         k++;
392       }
393     }
394     mumps->val = (PetscScalar *)av;
395     mumps->irn = row;
396     mumps->jcn = col;
397     mumps->nnz = nz;
398   } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, av, aa->nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqaij_seqaij(), so one needs to copy the memory */
399   else mumps->val = (PetscScalar *)av;                                           /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */
400   PetscCall(MatSeqAIJRestoreArrayRead(A, &av));
401   PetscFunctionReturn(PETSC_SUCCESS);
402 }
403 
404 static PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
405 {
406   PetscCount     nz, i, j, k, r;
407   Mat_SeqSELL   *a = (Mat_SeqSELL *)A->data;
408   PetscMUMPSInt *row, *col;
409 
410   PetscFunctionBegin;
411   nz = a->sliidx[a->totalslices];
412   if (reuse == MAT_INITIAL_MATRIX) {
413     PetscCall(PetscMalloc2(nz, &row, nz, &col));
414     for (i = k = 0; i < a->totalslices; i++) {
415       for (j = a->sliidx[i], r = 0; j < a->sliidx[i + 1]; j++, r = ((r + 1) & 0x07)) PetscCall(PetscMUMPSIntCast(8 * i + r + shift, &row[k++]));
416     }
417     for (i = 0; i < nz; i++) PetscCall(PetscMUMPSIntCast(a->colidx[i] + shift, &col[i]));
418     mumps->irn = row;
419     mumps->jcn = col;
420     mumps->nnz = nz;
421     mumps->val = a->val;
422   } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, a->val, nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqsell_seqaij(), so one needs to copy the memory */
423   else mumps->val = a->val;                                                      /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */
424   PetscFunctionReturn(PETSC_SUCCESS);
425 }
426 
427 static PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
428 {
429   Mat_SeqBAIJ    *aa = (Mat_SeqBAIJ *)A->data;
430   const PetscInt *ai, *aj, *ajj, bs2 = aa->bs2;
431   PetscCount      M, nz = bs2 * aa->nz, idx = 0, rnz, i, j, k, m;
432   PetscInt        bs;
433   PetscMUMPSInt  *row, *col;
434 
435   PetscFunctionBegin;
436   if (reuse == MAT_INITIAL_MATRIX) {
437     PetscCall(MatGetBlockSize(A, &bs));
438     M  = A->rmap->N / bs;
439     ai = aa->i;
440     aj = aa->j;
441     PetscCall(PetscMalloc2(nz, &row, nz, &col));
442     for (i = 0; i < M; i++) {
443       ajj = aj + ai[i];
444       rnz = ai[i + 1] - ai[i];
445       for (k = 0; k < rnz; k++) {
446         for (j = 0; j < bs; j++) {
447           for (m = 0; m < bs; m++) {
448             PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[idx]));
449             PetscCall(PetscMUMPSIntCast(bs * ajj[k] + j + shift, &col[idx]));
450             idx++;
451           }
452         }
453       }
454     }
455     mumps->irn = row;
456     mumps->jcn = col;
457     mumps->nnz = nz;
458     mumps->val = aa->a;
459   } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, aa->a, nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqbaij_seqaij(), so one needs to copy the memory */
460   else mumps->val = aa->a;                                                      /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */
461   PetscFunctionReturn(PETSC_SUCCESS);
462 }
463 
464 static PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
465 {
466   const PetscInt *ai, *aj, *ajj;
467   PetscInt        bs;
468   PetscCount      nz, rnz, i, j, k, m;
469   PetscMUMPSInt  *row, *col;
470   PetscScalar    *val;
471   Mat_SeqSBAIJ   *aa  = (Mat_SeqSBAIJ *)A->data;
472   const PetscInt  bs2 = aa->bs2, mbs = aa->mbs;
473 #if defined(PETSC_USE_COMPLEX)
474   PetscBool isset, hermitian;
475 #endif
476 
477   PetscFunctionBegin;
478 #if defined(PETSC_USE_COMPLEX)
479   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
480   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
481 #endif
482   ai = aa->i;
483   aj = aa->j;
484   PetscCall(MatGetBlockSize(A, &bs));
485   if (reuse == MAT_INITIAL_MATRIX) {
486     const PetscCount alloc_size = aa->nz * bs2;
487 
488     PetscCall(PetscMalloc2(alloc_size, &row, alloc_size, &col));
489     if (bs > 1) {
490       PetscCall(PetscMalloc1(alloc_size, &mumps->val_alloc));
491       mumps->val = mumps->val_alloc;
492     } else {
493       mumps->val = aa->a;
494     }
495     mumps->irn = row;
496     mumps->jcn = col;
497   } else {
498     row = mumps->irn;
499     col = mumps->jcn;
500   }
501   val = mumps->val;
502 
503   nz = 0;
504   if (bs > 1) {
505     for (i = 0; i < mbs; i++) {
506       rnz = ai[i + 1] - ai[i];
507       ajj = aj + ai[i];
508       for (j = 0; j < rnz; j++) {
509         for (k = 0; k < bs; k++) {
510           for (m = 0; m < bs; m++) {
511             if (ajj[j] > i || k >= m) {
512               if (reuse == MAT_INITIAL_MATRIX) {
513                 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[nz]));
514                 PetscCall(PetscMUMPSIntCast(ajj[j] * bs + k + shift, &col[nz]));
515               }
516               val[nz++] = aa->a[(ai[i] + j) * bs2 + m + k * bs];
517             }
518           }
519         }
520       }
521     }
522   } else if (reuse == MAT_INITIAL_MATRIX) {
523     for (i = 0; i < mbs; i++) {
524       rnz = ai[i + 1] - ai[i];
525       ajj = aj + ai[i];
526       for (j = 0; j < rnz; j++) {
527         PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
528         PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz]));
529         nz++;
530       }
531     }
532     PetscCheck(nz == aa->nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different numbers of nonzeros %" PetscCount_FMT " != %" PetscInt_FMT, nz, aa->nz);
533   } else if (mumps->nest_vals)
534     PetscCall(PetscArraycpy(mumps->val, aa->a, aa->nz)); /* bs == 1 and MAT_REUSE_MATRIX, MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqsbaij_seqsbaij(), so one needs to copy the memory */
535   else mumps->val = aa->a;                               /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */
536   if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz;
537   PetscFunctionReturn(PETSC_SUCCESS);
538 }
539 
540 static PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
541 {
542   const PetscInt    *ai, *aj, *ajj, *adiag, M = A->rmap->n;
543   PetscCount         nz, rnz, i, j;
544   const PetscScalar *av, *v1;
545   PetscScalar       *val;
546   PetscMUMPSInt     *row, *col;
547   Mat_SeqAIJ        *aa = (Mat_SeqAIJ *)A->data;
548   PetscBool          missing;
549 #if defined(PETSC_USE_COMPLEX)
550   PetscBool hermitian, isset;
551 #endif
552 
553   PetscFunctionBegin;
554 #if defined(PETSC_USE_COMPLEX)
555   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
556   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
557 #endif
558   PetscCall(MatSeqAIJGetArrayRead(A, &av));
559   ai    = aa->i;
560   aj    = aa->j;
561   adiag = aa->diag;
562   PetscCall(MatMissingDiagonal_SeqAIJ(A, &missing, NULL));
563   if (reuse == MAT_INITIAL_MATRIX) {
564     /* count nz in the upper triangular part of A */
565     nz = 0;
566     if (missing) {
567       for (i = 0; i < M; i++) {
568         if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
569           for (j = ai[i]; j < ai[i + 1]; j++) {
570             if (aj[j] < i) continue;
571             nz++;
572           }
573         } else {
574           nz += ai[i + 1] - adiag[i];
575         }
576       }
577     } else {
578       for (i = 0; i < M; i++) nz += ai[i + 1] - adiag[i];
579     }
580     PetscCall(PetscMalloc2(nz, &row, nz, &col));
581     PetscCall(PetscMalloc1(nz, &val));
582     mumps->nnz = nz;
583     mumps->irn = row;
584     mumps->jcn = col;
585     mumps->val = mumps->val_alloc = val;
586 
587     nz = 0;
588     if (missing) {
589       for (i = 0; i < M; i++) {
590         if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
591           for (j = ai[i]; j < ai[i + 1]; j++) {
592             if (aj[j] < i) continue;
593             PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
594             PetscCall(PetscMUMPSIntCast(aj[j] + shift, &col[nz]));
595             val[nz] = av[j];
596             nz++;
597           }
598         } else {
599           rnz = ai[i + 1] - adiag[i];
600           ajj = aj + adiag[i];
601           v1  = av + adiag[i];
602           for (j = 0; j < rnz; j++) {
603             PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
604             PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz]));
605             val[nz++] = v1[j];
606           }
607         }
608       }
609     } else {
610       for (i = 0; i < M; i++) {
611         rnz = ai[i + 1] - adiag[i];
612         ajj = aj + adiag[i];
613         v1  = av + adiag[i];
614         for (j = 0; j < rnz; j++) {
615           PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
616           PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz]));
617           val[nz++] = v1[j];
618         }
619       }
620     }
621   } else {
622     nz  = 0;
623     val = mumps->val;
624     if (missing) {
625       for (i = 0; i < M; i++) {
626         if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
627           for (j = ai[i]; j < ai[i + 1]; j++) {
628             if (aj[j] < i) continue;
629             val[nz++] = av[j];
630           }
631         } else {
632           rnz = ai[i + 1] - adiag[i];
633           v1  = av + adiag[i];
634           for (j = 0; j < rnz; j++) val[nz++] = v1[j];
635         }
636       }
637     } else {
638       for (i = 0; i < M; i++) {
639         rnz = ai[i + 1] - adiag[i];
640         v1  = av + adiag[i];
641         for (j = 0; j < rnz; j++) val[nz++] = v1[j];
642       }
643     }
644   }
645   PetscCall(MatSeqAIJRestoreArrayRead(A, &av));
646   PetscFunctionReturn(PETSC_SUCCESS);
647 }
648 
649 static PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
650 {
651   const PetscInt    *ai, *aj, *bi, *bj, *garray, *ajj, *bjj;
652   PetscInt           bs;
653   PetscCount         rstart, nz, i, j, k, m, jj, irow, countA, countB;
654   PetscMUMPSInt     *row, *col;
655   const PetscScalar *av, *bv, *v1, *v2;
656   PetscScalar       *val;
657   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ *)A->data;
658   Mat_SeqSBAIJ      *aa  = (Mat_SeqSBAIJ *)mat->A->data;
659   Mat_SeqBAIJ       *bb  = (Mat_SeqBAIJ *)mat->B->data;
660   const PetscInt     bs2 = aa->bs2, mbs = aa->mbs;
661 #if defined(PETSC_USE_COMPLEX)
662   PetscBool hermitian, isset;
663 #endif
664 
665   PetscFunctionBegin;
666 #if defined(PETSC_USE_COMPLEX)
667   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
668   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
669 #endif
670   PetscCall(MatGetBlockSize(A, &bs));
671   rstart = A->rmap->rstart;
672   ai     = aa->i;
673   aj     = aa->j;
674   bi     = bb->i;
675   bj     = bb->j;
676   av     = aa->a;
677   bv     = bb->a;
678 
679   garray = mat->garray;
680 
681   if (reuse == MAT_INITIAL_MATRIX) {
682     nz = (aa->nz + bb->nz) * bs2; /* just a conservative estimate */
683     PetscCall(PetscMalloc2(nz, &row, nz, &col));
684     PetscCall(PetscMalloc1(nz, &val));
685     /* can not decide the exact mumps->nnz now because of the SBAIJ */
686     mumps->irn = row;
687     mumps->jcn = col;
688     mumps->val = mumps->val_alloc = val;
689   } else {
690     val = mumps->val;
691   }
692 
693   jj   = 0;
694   irow = rstart;
695   for (i = 0; i < mbs; i++) {
696     ajj    = aj + ai[i]; /* ptr to the beginning of this row */
697     countA = ai[i + 1] - ai[i];
698     countB = bi[i + 1] - bi[i];
699     bjj    = bj + bi[i];
700     v1     = av + ai[i] * bs2;
701     v2     = bv + bi[i] * bs2;
702 
703     if (bs > 1) {
704       /* A-part */
705       for (j = 0; j < countA; j++) {
706         for (k = 0; k < bs; k++) {
707           for (m = 0; m < bs; m++) {
708             if (rstart + ajj[j] * bs > irow || k >= m) {
709               if (reuse == MAT_INITIAL_MATRIX) {
710                 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj]));
711                 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] * bs + k + shift, &col[jj]));
712               }
713               val[jj++] = v1[j * bs2 + m + k * bs];
714             }
715           }
716         }
717       }
718 
719       /* B-part */
720       for (j = 0; j < countB; j++) {
721         for (k = 0; k < bs; k++) {
722           for (m = 0; m < bs; m++) {
723             if (reuse == MAT_INITIAL_MATRIX) {
724               PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj]));
725               PetscCall(PetscMUMPSIntCast(garray[bjj[j]] * bs + k + shift, &col[jj]));
726             }
727             val[jj++] = v2[j * bs2 + m + k * bs];
728           }
729         }
730       }
731     } else {
732       /* A-part */
733       for (j = 0; j < countA; j++) {
734         if (reuse == MAT_INITIAL_MATRIX) {
735           PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
736           PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj]));
737         }
738         val[jj++] = v1[j];
739       }
740 
741       /* B-part */
742       for (j = 0; j < countB; j++) {
743         if (reuse == MAT_INITIAL_MATRIX) {
744           PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
745           PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]));
746         }
747         val[jj++] = v2[j];
748       }
749     }
750     irow += bs;
751   }
752   if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = jj;
753   PetscFunctionReturn(PETSC_SUCCESS);
754 }
755 
756 static PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
757 {
758   const PetscInt    *ai, *aj, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj;
759   PetscCount         rstart, cstart, nz, i, j, jj, irow, countA, countB;
760   PetscMUMPSInt     *row, *col;
761   const PetscScalar *av, *bv, *v1, *v2;
762   PetscScalar       *val;
763   Mat                Ad, Ao;
764   Mat_SeqAIJ        *aa;
765   Mat_SeqAIJ        *bb;
766 
767   PetscFunctionBegin;
768   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray));
769   PetscCall(MatSeqAIJGetArrayRead(Ad, &av));
770   PetscCall(MatSeqAIJGetArrayRead(Ao, &bv));
771 
772   aa = (Mat_SeqAIJ *)Ad->data;
773   bb = (Mat_SeqAIJ *)Ao->data;
774   ai = aa->i;
775   aj = aa->j;
776   bi = bb->i;
777   bj = bb->j;
778 
779   rstart = A->rmap->rstart;
780   cstart = A->cmap->rstart;
781 
782   if (reuse == MAT_INITIAL_MATRIX) {
783     nz = (PetscCount)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */
784     PetscCall(PetscMalloc2(nz, &row, nz, &col));
785     PetscCall(PetscMalloc1(nz, &val));
786     mumps->nnz = nz;
787     mumps->irn = row;
788     mumps->jcn = col;
789     mumps->val = mumps->val_alloc = val;
790   } else {
791     val = mumps->val;
792   }
793 
794   jj   = 0;
795   irow = rstart;
796   for (i = 0; i < m; i++) {
797     ajj    = aj + ai[i]; /* ptr to the beginning of this row */
798     countA = ai[i + 1] - ai[i];
799     countB = bi[i + 1] - bi[i];
800     bjj    = bj + bi[i];
801     v1     = av + ai[i];
802     v2     = bv + bi[i];
803 
804     /* A-part */
805     for (j = 0; j < countA; j++) {
806       if (reuse == MAT_INITIAL_MATRIX) {
807         PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
808         PetscCall(PetscMUMPSIntCast(cstart + ajj[j] + shift, &col[jj]));
809       }
810       val[jj++] = v1[j];
811     }
812 
813     /* B-part */
814     for (j = 0; j < countB; j++) {
815       if (reuse == MAT_INITIAL_MATRIX) {
816         PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
817         PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]));
818       }
819       val[jj++] = v2[j];
820     }
821     irow++;
822   }
823   PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av));
824   PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv));
825   PetscFunctionReturn(PETSC_SUCCESS);
826 }
827 
828 static PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
829 {
830   Mat_MPIBAIJ       *mat = (Mat_MPIBAIJ *)A->data;
831   Mat_SeqBAIJ       *aa  = (Mat_SeqBAIJ *)mat->A->data;
832   Mat_SeqBAIJ       *bb  = (Mat_SeqBAIJ *)mat->B->data;
833   const PetscInt    *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j, *ajj, *bjj;
834   const PetscInt    *garray = mat->garray, mbs = mat->mbs, rstart = A->rmap->rstart, cstart = A->cmap->rstart;
835   const PetscInt     bs2 = mat->bs2;
836   PetscInt           bs;
837   PetscCount         nz, i, j, k, n, jj, irow, countA, countB, idx;
838   PetscMUMPSInt     *row, *col;
839   const PetscScalar *av = aa->a, *bv = bb->a, *v1, *v2;
840   PetscScalar       *val;
841 
842   PetscFunctionBegin;
843   PetscCall(MatGetBlockSize(A, &bs));
844   if (reuse == MAT_INITIAL_MATRIX) {
845     nz = bs2 * (aa->nz + bb->nz);
846     PetscCall(PetscMalloc2(nz, &row, nz, &col));
847     PetscCall(PetscMalloc1(nz, &val));
848     mumps->nnz = nz;
849     mumps->irn = row;
850     mumps->jcn = col;
851     mumps->val = mumps->val_alloc = val;
852   } else {
853     val = mumps->val;
854   }
855 
856   jj   = 0;
857   irow = rstart;
858   for (i = 0; i < mbs; i++) {
859     countA = ai[i + 1] - ai[i];
860     countB = bi[i + 1] - bi[i];
861     ajj    = aj + ai[i];
862     bjj    = bj + bi[i];
863     v1     = av + bs2 * ai[i];
864     v2     = bv + bs2 * bi[i];
865 
866     idx = 0;
867     /* A-part */
868     for (k = 0; k < countA; k++) {
869       for (j = 0; j < bs; j++) {
870         for (n = 0; n < bs; n++) {
871           if (reuse == MAT_INITIAL_MATRIX) {
872             PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj]));
873             PetscCall(PetscMUMPSIntCast(cstart + bs * ajj[k] + j + shift, &col[jj]));
874           }
875           val[jj++] = v1[idx++];
876         }
877       }
878     }
879 
880     idx = 0;
881     /* B-part */
882     for (k = 0; k < countB; k++) {
883       for (j = 0; j < bs; j++) {
884         for (n = 0; n < bs; n++) {
885           if (reuse == MAT_INITIAL_MATRIX) {
886             PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj]));
887             PetscCall(PetscMUMPSIntCast(bs * garray[bjj[k]] + j + shift, &col[jj]));
888           }
889           val[jj++] = v2[idx++];
890         }
891       }
892     }
893     irow += bs;
894   }
895   PetscFunctionReturn(PETSC_SUCCESS);
896 }
897 
898 static PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
899 {
900   const PetscInt    *ai, *aj, *adiag, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj;
901   PetscCount         rstart, nz, nza, nzb, i, j, jj, irow, countA, countB;
902   PetscMUMPSInt     *row, *col;
903   const PetscScalar *av, *bv, *v1, *v2;
904   PetscScalar       *val;
905   Mat                Ad, Ao;
906   Mat_SeqAIJ        *aa;
907   Mat_SeqAIJ        *bb;
908 #if defined(PETSC_USE_COMPLEX)
909   PetscBool hermitian, isset;
910 #endif
911 
912   PetscFunctionBegin;
913 #if defined(PETSC_USE_COMPLEX)
914   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
915   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
916 #endif
917   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray));
918   PetscCall(MatSeqAIJGetArrayRead(Ad, &av));
919   PetscCall(MatSeqAIJGetArrayRead(Ao, &bv));
920 
921   aa    = (Mat_SeqAIJ *)Ad->data;
922   bb    = (Mat_SeqAIJ *)Ao->data;
923   ai    = aa->i;
924   aj    = aa->j;
925   adiag = aa->diag;
926   bi    = bb->i;
927   bj    = bb->j;
928 
929   rstart = A->rmap->rstart;
930 
931   if (reuse == MAT_INITIAL_MATRIX) {
932     nza = 0; /* num of upper triangular entries in mat->A, including diagonals */
933     nzb = 0; /* num of upper triangular entries in mat->B */
934     for (i = 0; i < m; i++) {
935       nza += (ai[i + 1] - adiag[i]);
936       countB = bi[i + 1] - bi[i];
937       bjj    = bj + bi[i];
938       for (j = 0; j < countB; j++) {
939         if (garray[bjj[j]] > rstart) nzb++;
940       }
941     }
942 
943     nz = nza + nzb; /* total nz of upper triangular part of mat */
944     PetscCall(PetscMalloc2(nz, &row, nz, &col));
945     PetscCall(PetscMalloc1(nz, &val));
946     mumps->nnz = nz;
947     mumps->irn = row;
948     mumps->jcn = col;
949     mumps->val = mumps->val_alloc = val;
950   } else {
951     val = mumps->val;
952   }
953 
954   jj   = 0;
955   irow = rstart;
956   for (i = 0; i < m; i++) {
957     ajj    = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */
958     v1     = av + adiag[i];
959     countA = ai[i + 1] - adiag[i];
960     countB = bi[i + 1] - bi[i];
961     bjj    = bj + bi[i];
962     v2     = bv + bi[i];
963 
964     /* A-part */
965     for (j = 0; j < countA; j++) {
966       if (reuse == MAT_INITIAL_MATRIX) {
967         PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
968         PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj]));
969       }
970       val[jj++] = v1[j];
971     }
972 
973     /* B-part */
974     for (j = 0; j < countB; j++) {
975       if (garray[bjj[j]] > rstart) {
976         if (reuse == MAT_INITIAL_MATRIX) {
977           PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
978           PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]));
979         }
980         val[jj++] = v2[j];
981       }
982     }
983     irow++;
984   }
985   PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av));
986   PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv));
987   PetscFunctionReturn(PETSC_SUCCESS);
988 }
989 
990 static PetscErrorCode MatConvertToTriples_diagonal_xaij(Mat A, PETSC_UNUSED PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
991 {
992   const PetscScalar *av;
993   const PetscInt     M = A->rmap->n;
994   PetscCount         i;
995   PetscMUMPSInt     *row, *col;
996   Vec                v;
997 
998   PetscFunctionBegin;
999   PetscCall(MatDiagonalGetDiagonal(A, &v));
1000   PetscCall(VecGetArrayRead(v, &av));
1001   if (reuse == MAT_INITIAL_MATRIX) {
1002     PetscCall(PetscMalloc2(M, &row, M, &col));
1003     for (i = 0; i < M; i++) {
1004       PetscCall(PetscMUMPSIntCast(i + A->rmap->rstart, &row[i]));
1005       col[i] = row[i];
1006     }
1007     mumps->val = (PetscScalar *)av;
1008     mumps->irn = row;
1009     mumps->jcn = col;
1010     mumps->nnz = M;
1011   } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, av, M)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_diagonal_xaij(), so one needs to copy the memory */
1012   else mumps->val = (PetscScalar *)av;                                      /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */
1013   PetscCall(VecRestoreArrayRead(v, &av));
1014   PetscFunctionReturn(PETSC_SUCCESS);
1015 }
1016 
1017 static PetscErrorCode MatConvertToTriples_dense_xaij(Mat A, PETSC_UNUSED PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1018 {
1019   PetscScalar   *v;
1020   const PetscInt m = A->rmap->n, N = A->cmap->N;
1021   PetscInt       lda;
1022   PetscCount     i, j;
1023   PetscMUMPSInt *row, *col;
1024 
1025   PetscFunctionBegin;
1026   PetscCall(MatDenseGetArray(A, &v));
1027   PetscCall(MatDenseGetLDA(A, &lda));
1028   if (reuse == MAT_INITIAL_MATRIX) {
1029     PetscCall(PetscMalloc2(m * N, &row, m * N, &col));
1030     for (i = 0; i < m; i++) {
1031       col[i] = 0;
1032       PetscCall(PetscMUMPSIntCast(i + A->rmap->rstart, &row[i]));
1033     }
1034     for (j = 1; j < N; j++) {
1035       for (i = 0; i < m; i++) PetscCall(PetscMUMPSIntCast(j, col + i + m * j));
1036       PetscCall(PetscArraycpy(row + m * j, row + m * (j - 1), m));
1037     }
1038     if (lda == m) mumps->val = v;
1039     else {
1040       PetscCall(PetscMalloc1(m * N, &mumps->val));
1041       mumps->val_alloc = mumps->val;
1042       for (j = 0; j < N; j++) PetscCall(PetscArraycpy(mumps->val + m * j, v + lda * j, m));
1043     }
1044     mumps->irn = row;
1045     mumps->jcn = col;
1046     mumps->nnz = m * N;
1047   } else {
1048     if (lda == m && !mumps->nest_vals) mumps->val = v;
1049     else {
1050       for (j = 0; j < N; j++) PetscCall(PetscArraycpy(mumps->val + m * j, v + lda * j, m));
1051     }
1052   }
1053   PetscCall(MatDenseRestoreArray(A, &v));
1054   PetscFunctionReturn(PETSC_SUCCESS);
1055 }
1056 
1057 static PetscErrorCode MatConvertToTriples_nest_xaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1058 {
1059   Mat     **mats;
1060   PetscInt  nr, nc;
1061   PetscBool chol = mumps->sym ? PETSC_TRUE : PETSC_FALSE;
1062 
1063   PetscFunctionBegin;
1064   PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats));
1065   if (reuse == MAT_INITIAL_MATRIX) {
1066     PetscMUMPSInt *irns, *jcns;
1067     PetscScalar   *vals;
1068     PetscCount     totnnz, cumnnz, maxnnz;
1069     PetscInt      *pjcns_w;
1070     IS            *rows, *cols;
1071     PetscInt     **rows_idx, **cols_idx;
1072 
1073     cumnnz = 0;
1074     maxnnz = 0;
1075     PetscCall(PetscMalloc2(nr * nc + 1, &mumps->nest_vals_start, nr * nc, &mumps->nest_convert_to_triples));
1076     for (PetscInt r = 0; r < nr; r++) {
1077       for (PetscInt c = 0; c < nc; c++) {
1078         Mat sub = mats[r][c];
1079 
1080         mumps->nest_convert_to_triples[r * nc + c] = NULL;
1081         if (chol && c < r) continue; /* skip lower-triangular block for Cholesky */
1082         if (sub) {
1083           PetscErrorCode (*convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *) = NULL;
1084           PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isTrans, isHTrans = PETSC_FALSE, isDiag, isDense;
1085           MatInfo   info;
1086 
1087           PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans));
1088           if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub));
1089           else {
1090             PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans));
1091             if (isHTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub));
1092           }
1093           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ));
1094           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ));
1095           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ));
1096           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ));
1097           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ));
1098           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ));
1099           PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATDIAGONAL, &isDiag));
1100           PetscCall(PetscObjectTypeCompareAny((PetscObject)sub, &isDense, MATSEQDENSE, MATMPIDENSE, NULL));
1101 
1102           if (chol) {
1103             if (r == c) {
1104               if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqsbaij;
1105               else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpisbaij;
1106               else if (isSeqSBAIJ) convert_to_triples = MatConvertToTriples_seqsbaij_seqsbaij;
1107               else if (isMPISBAIJ) convert_to_triples = MatConvertToTriples_mpisbaij_mpisbaij;
1108               else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij;
1109               else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij;
1110             } else {
1111               if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij;
1112               else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij;
1113               else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij;
1114               else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij;
1115               else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij;
1116               else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij;
1117             }
1118           } else {
1119             if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij;
1120             else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij;
1121             else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij;
1122             else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij;
1123             else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij;
1124             else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij;
1125           }
1126           PetscCheck(convert_to_triples, PetscObjectComm((PetscObject)sub), PETSC_ERR_SUP, "Not for block of type %s", ((PetscObject)sub)->type_name);
1127           mumps->nest_convert_to_triples[r * nc + c] = convert_to_triples;
1128           PetscCall(MatGetInfo(sub, MAT_LOCAL, &info));
1129           cumnnz += (PetscCount)info.nz_used; /* can be overestimated for Cholesky */
1130           maxnnz = PetscMax(maxnnz, info.nz_used);
1131         }
1132       }
1133     }
1134 
1135     /* Allocate total COO */
1136     totnnz = cumnnz;
1137     PetscCall(PetscMalloc2(totnnz, &irns, totnnz, &jcns));
1138     PetscCall(PetscMalloc1(totnnz, &vals));
1139 
1140     /* Handle rows and column maps
1141        We directly map rows and use an SF for the columns */
1142     PetscCall(PetscMalloc4(nr, &rows, nc, &cols, nr, &rows_idx, nc, &cols_idx));
1143     PetscCall(MatNestGetISs(A, rows, cols));
1144     for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetIndices(rows[r], (const PetscInt **)&rows_idx[r]));
1145     for (PetscInt c = 0; c < nc; c++) PetscCall(ISGetIndices(cols[c], (const PetscInt **)&cols_idx[c]));
1146     if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscMalloc1(maxnnz, &pjcns_w));
1147     else (void)maxnnz;
1148 
1149     cumnnz = 0;
1150     for (PetscInt r = 0; r < nr; r++) {
1151       for (PetscInt c = 0; c < nc; c++) {
1152         Mat             sub  = mats[r][c];
1153         const PetscInt *ridx = rows_idx[r];
1154         const PetscInt *cidx = cols_idx[c];
1155         PetscInt        rst;
1156         PetscSF         csf;
1157         PetscBool       isTrans, isHTrans = PETSC_FALSE, swap;
1158         PetscLayout     cmap;
1159         PetscInt        innz;
1160 
1161         mumps->nest_vals_start[r * nc + c] = cumnnz;
1162         if (!mumps->nest_convert_to_triples[r * nc + c]) continue;
1163 
1164         /* Extract inner blocks if needed */
1165         PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans));
1166         if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub));
1167         else {
1168           PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans));
1169           if (isHTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub));
1170         }
1171         swap = (PetscBool)(isTrans || isHTrans);
1172 
1173         /* Get column layout to map off-process columns */
1174         PetscCall(MatGetLayouts(sub, NULL, &cmap));
1175 
1176         /* Get row start to map on-process rows */
1177         PetscCall(MatGetOwnershipRange(sub, &rst, NULL));
1178 
1179         /* Directly use the mumps datastructure and use C ordering for now */
1180         PetscCall((*mumps->nest_convert_to_triples[r * nc + c])(sub, 0, MAT_INITIAL_MATRIX, mumps));
1181 
1182         /* Swap the role of rows and columns indices for transposed blocks
1183            since we need values with global final ordering */
1184         if (swap) {
1185           cidx = rows_idx[r];
1186           ridx = cols_idx[c];
1187         }
1188 
1189         /* Communicate column indices
1190            This could have been done with a single SF but it would have complicated the code a lot.
1191            But since we do it only once, we pay the price of setting up an SF for each block */
1192         if (PetscDefined(USE_64BIT_INDICES)) {
1193           for (PetscInt k = 0; k < mumps->nnz; k++) pjcns_w[k] = mumps->jcn[k];
1194         } else pjcns_w = (PetscInt *)mumps->jcn; /* This cast is needed only to silence warnings for 64bit integers builds */
1195         PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &csf));
1196         PetscCall(PetscIntCast(mumps->nnz, &innz));
1197         PetscCall(PetscSFSetGraphLayout(csf, cmap, innz, NULL, PETSC_OWN_POINTER, pjcns_w));
1198         PetscCall(PetscSFBcastBegin(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE));
1199         PetscCall(PetscSFBcastEnd(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE));
1200         PetscCall(PetscSFDestroy(&csf));
1201 
1202         /* Import indices: use direct map for rows and mapped indices for columns */
1203         if (swap) {
1204           for (PetscInt k = 0; k < mumps->nnz; k++) {
1205             PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &jcns[cumnnz + k]));
1206             PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &irns[cumnnz + k]));
1207           }
1208         } else {
1209           for (PetscInt k = 0; k < mumps->nnz; k++) {
1210             PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &irns[cumnnz + k]));
1211             PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &jcns[cumnnz + k]));
1212           }
1213         }
1214 
1215         /* Import values to full COO */
1216         PetscCall(PetscArraycpy(vals + cumnnz, mumps->val, mumps->nnz));
1217         if (isHTrans) { /* conjugate the entries */
1218           PetscScalar *v = vals + cumnnz;
1219           for (PetscInt k = 0; k < mumps->nnz; k++) v[k] = PetscConj(v[k]);
1220         }
1221 
1222         /* Shift new starting point and sanity check */
1223         cumnnz += mumps->nnz;
1224         PetscCheck(cumnnz <= totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected number of nonzeros %" PetscCount_FMT " != %" PetscCount_FMT, cumnnz, totnnz);
1225 
1226         /* Free scratch memory */
1227         PetscCall(PetscFree2(mumps->irn, mumps->jcn));
1228         PetscCall(PetscFree(mumps->val_alloc));
1229         mumps->val = NULL;
1230         mumps->nnz = 0;
1231       }
1232     }
1233     if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscFree(pjcns_w));
1234     for (PetscInt r = 0; r < nr; r++) PetscCall(ISRestoreIndices(rows[r], (const PetscInt **)&rows_idx[r]));
1235     for (PetscInt c = 0; c < nc; c++) PetscCall(ISRestoreIndices(cols[c], (const PetscInt **)&cols_idx[c]));
1236     PetscCall(PetscFree4(rows, cols, rows_idx, cols_idx));
1237     if (!chol) PetscCheck(cumnnz == totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different number of nonzeros %" PetscCount_FMT " != %" PetscCount_FMT, cumnnz, totnnz);
1238     mumps->nest_vals_start[nr * nc] = cumnnz;
1239 
1240     /* Set pointers for final MUMPS data structure */
1241     mumps->nest_vals = vals;
1242     mumps->val_alloc = NULL; /* do not use val_alloc since it may be reallocated with the OMP callpath */
1243     mumps->val       = vals;
1244     mumps->irn       = irns;
1245     mumps->jcn       = jcns;
1246     mumps->nnz       = cumnnz;
1247   } else {
1248     PetscScalar *oval = mumps->nest_vals;
1249     for (PetscInt r = 0; r < nr; r++) {
1250       for (PetscInt c = 0; c < nc; c++) {
1251         PetscBool isTrans, isHTrans = PETSC_FALSE;
1252         Mat       sub  = mats[r][c];
1253         PetscInt  midx = r * nc + c;
1254 
1255         if (!mumps->nest_convert_to_triples[midx]) continue;
1256         PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans));
1257         if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub));
1258         else {
1259           PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans));
1260           if (isHTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub));
1261         }
1262         mumps->val = oval + mumps->nest_vals_start[midx];
1263         PetscCall((*mumps->nest_convert_to_triples[midx])(sub, shift, MAT_REUSE_MATRIX, mumps));
1264         if (isHTrans) {
1265           PetscCount nnz = mumps->nest_vals_start[midx + 1] - mumps->nest_vals_start[midx];
1266           for (PetscCount k = 0; k < nnz; k++) mumps->val[k] = PetscConj(mumps->val[k]);
1267         }
1268       }
1269     }
1270     mumps->val = oval;
1271   }
1272   PetscFunctionReturn(PETSC_SUCCESS);
1273 }
1274 
1275 static PetscErrorCode MatDestroy_MUMPS(Mat A)
1276 {
1277   Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;
1278 
1279   PetscFunctionBegin;
1280   PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc));
1281   PetscCall(VecScatterDestroy(&mumps->scat_rhs));
1282   PetscCall(VecScatterDestroy(&mumps->scat_sol));
1283   PetscCall(VecDestroy(&mumps->b_seq));
1284   PetscCall(VecDestroy(&mumps->x_seq));
1285   PetscCall(PetscFree(mumps->id.perm_in));
1286   PetscCall(PetscFree2(mumps->irn, mumps->jcn));
1287   PetscCall(PetscFree(mumps->val_alloc));
1288   PetscCall(PetscFree(mumps->info));
1289   PetscCall(PetscFree(mumps->ICNTL_pre));
1290   PetscCall(PetscFree(mumps->CNTL_pre));
1291   PetscCall(MatMumpsResetSchur_Private(mumps));
1292   if (mumps->id.job != JOB_NULL) { /* cannot call PetscMUMPS_c() if JOB_INIT has never been called for this instance */
1293     mumps->id.job = JOB_END;
1294     PetscMUMPS_c(mumps);
1295     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in termination: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
1296     if (mumps->mumps_comm != MPI_COMM_NULL) {
1297       if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) PetscCallMPI(MPI_Comm_free(&mumps->mumps_comm));
1298       else PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A), &mumps->mumps_comm));
1299     }
1300   }
1301 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1302   if (mumps->use_petsc_omp_support) {
1303     PetscCall(PetscOmpCtrlDestroy(&mumps->omp_ctrl));
1304     PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf));
1305     PetscCall(PetscFree3(mumps->rhs_nrow, mumps->rhs_recvcounts, mumps->rhs_disps));
1306   }
1307 #endif
1308   PetscCall(PetscFree(mumps->ia_alloc));
1309   PetscCall(PetscFree(mumps->ja_alloc));
1310   PetscCall(PetscFree(mumps->recvcount));
1311   PetscCall(PetscFree(mumps->reqs));
1312   PetscCall(PetscFree(mumps->irhs_loc));
1313   PetscCall(PetscFree2(mumps->nest_vals_start, mumps->nest_convert_to_triples));
1314   PetscCall(PetscFree(mumps->nest_vals));
1315   PetscCall(PetscFree(A->data));
1316 
1317   /* clear composed functions */
1318   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
1319   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorSetSchurIS_C", NULL));
1320   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorCreateSchurComplement_C", NULL));
1321   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetIcntl_C", NULL));
1322   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetIcntl_C", NULL));
1323   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetCntl_C", NULL));
1324   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetCntl_C", NULL));
1325   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfo_C", NULL));
1326   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfog_C", NULL));
1327   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfo_C", NULL));
1328   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfog_C", NULL));
1329   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetNullPivots_C", NULL));
1330   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverse_C", NULL));
1331   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverseTranspose_C", NULL));
1332   PetscFunctionReturn(PETSC_SUCCESS);
1333 }
1334 
1335 /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */
1336 static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A, PetscInt nrhs, const PetscScalar *array)
1337 {
1338   Mat_MUMPS        *mumps   = (Mat_MUMPS *)A->data;
1339   const PetscMPIInt ompsize = mumps->omp_comm_size;
1340   PetscInt          i, m, M, rstart;
1341 
1342   PetscFunctionBegin;
1343   PetscCall(MatGetSize(A, &M, NULL));
1344   PetscCall(MatGetLocalSize(A, &m, NULL));
1345   PetscCheck(M <= PETSC_MUMPS_INT_MAX, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt");
1346   if (ompsize == 1) {
1347     if (!mumps->irhs_loc) {
1348       mumps->nloc_rhs = (PetscMUMPSInt)m;
1349       PetscCall(PetscMalloc1(m, &mumps->irhs_loc));
1350       PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
1351       for (i = 0; i < m; i++) PetscCall(PetscMUMPSIntCast(rstart + i + 1, &mumps->irhs_loc[i])); /* use 1-based indices */
1352     }
1353     mumps->id.rhs_loc = (MumpsScalar *)array;
1354   } else {
1355 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1356     const PetscInt *ranges;
1357     PetscMPIInt     j, k, sendcount, *petsc_ranks, *omp_ranks;
1358     MPI_Group       petsc_group, omp_group;
1359     PetscScalar    *recvbuf = NULL;
1360 
1361     if (mumps->is_omp_master) {
1362       /* Lazily initialize the omp stuff for distributed rhs */
1363       if (!mumps->irhs_loc) {
1364         PetscCall(PetscMalloc2(ompsize, &omp_ranks, ompsize, &petsc_ranks));
1365         PetscCall(PetscMalloc3(ompsize, &mumps->rhs_nrow, ompsize, &mumps->rhs_recvcounts, ompsize, &mumps->rhs_disps));
1366         PetscCallMPI(MPI_Comm_group(mumps->petsc_comm, &petsc_group));
1367         PetscCallMPI(MPI_Comm_group(mumps->omp_comm, &omp_group));
1368         for (j = 0; j < ompsize; j++) omp_ranks[j] = j;
1369         PetscCallMPI(MPI_Group_translate_ranks(omp_group, ompsize, omp_ranks, petsc_group, petsc_ranks));
1370 
1371         /* Populate mumps->irhs_loc[], rhs_nrow[] */
1372         mumps->nloc_rhs = 0;
1373         PetscCall(MatGetOwnershipRanges(A, &ranges));
1374         for (j = 0; j < ompsize; j++) {
1375           mumps->rhs_nrow[j] = ranges[petsc_ranks[j] + 1] - ranges[petsc_ranks[j]];
1376           mumps->nloc_rhs += mumps->rhs_nrow[j];
1377         }
1378         PetscCall(PetscMalloc1(mumps->nloc_rhs, &mumps->irhs_loc));
1379         for (j = k = 0; j < ompsize; j++) {
1380           for (i = ranges[petsc_ranks[j]]; i < ranges[petsc_ranks[j] + 1]; i++, k++) mumps->irhs_loc[k] = i + 1; /* uses 1-based indices */
1381         }
1382 
1383         PetscCall(PetscFree2(omp_ranks, petsc_ranks));
1384         PetscCallMPI(MPI_Group_free(&petsc_group));
1385         PetscCallMPI(MPI_Group_free(&omp_group));
1386       }
1387 
1388       /* Realloc buffers when current nrhs is bigger than what we have met */
1389       if (nrhs > mumps->max_nrhs) {
1390         PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf));
1391         PetscCall(PetscMalloc2(mumps->nloc_rhs * nrhs, &mumps->rhs_loc, mumps->nloc_rhs * nrhs, &mumps->rhs_recvbuf));
1392         mumps->max_nrhs = nrhs;
1393       }
1394 
1395       /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */
1396       for (j = 0; j < ompsize; j++) PetscCall(PetscMPIIntCast(mumps->rhs_nrow[j] * nrhs, &mumps->rhs_recvcounts[j]));
1397       mumps->rhs_disps[0] = 0;
1398       for (j = 1; j < ompsize; j++) {
1399         mumps->rhs_disps[j] = mumps->rhs_disps[j - 1] + mumps->rhs_recvcounts[j - 1];
1400         PetscCheck(mumps->rhs_disps[j] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscMPIInt overflow!");
1401       }
1402       recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */
1403     }
1404 
1405     PetscCall(PetscMPIIntCast(m * nrhs, &sendcount));
1406     PetscCallMPI(MPI_Gatherv(array, sendcount, MPIU_SCALAR, recvbuf, mumps->rhs_recvcounts, mumps->rhs_disps, MPIU_SCALAR, 0, mumps->omp_comm));
1407 
1408     if (mumps->is_omp_master) {
1409       if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */
1410         PetscScalar *dst, *dstbase = mumps->rhs_loc;
1411         for (j = 0; j < ompsize; j++) {
1412           const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j];
1413           dst                    = dstbase;
1414           for (i = 0; i < nrhs; i++) {
1415             PetscCall(PetscArraycpy(dst, src, mumps->rhs_nrow[j]));
1416             src += mumps->rhs_nrow[j];
1417             dst += mumps->nloc_rhs;
1418           }
1419           dstbase += mumps->rhs_nrow[j];
1420         }
1421       }
1422       mumps->id.rhs_loc = (MumpsScalar *)mumps->rhs_loc;
1423     }
1424 #endif /* PETSC_HAVE_OPENMP_SUPPORT */
1425   }
1426   mumps->id.nrhs     = (PetscMUMPSInt)nrhs;
1427   mumps->id.nloc_rhs = (PetscMUMPSInt)mumps->nloc_rhs;
1428   mumps->id.lrhs_loc = mumps->nloc_rhs;
1429   mumps->id.irhs_loc = mumps->irhs_loc;
1430   PetscFunctionReturn(PETSC_SUCCESS);
1431 }
1432 
1433 static PetscErrorCode MatSolve_MUMPS(Mat A, Vec b, Vec x)
1434 {
1435   Mat_MUMPS         *mumps  = (Mat_MUMPS *)A->data;
1436   const PetscScalar *rarray = NULL;
1437   PetscScalar       *array;
1438   IS                 is_iden, is_petsc;
1439   PetscInt           i;
1440   PetscBool          second_solve = PETSC_FALSE;
1441   static PetscBool   cite1 = PETSC_FALSE, cite2 = PETSC_FALSE;
1442 
1443   PetscFunctionBegin;
1444   PetscCall(PetscCitationsRegister("@article{MUMPS01,\n  author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n  title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n  journal = {SIAM "
1445                                    "Journal on Matrix Analysis and Applications},\n  volume = {23},\n  number = {1},\n  pages = {15--41},\n  year = {2001}\n}\n",
1446                                    &cite1));
1447   PetscCall(PetscCitationsRegister("@article{MUMPS02,\n  author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n  title = {Hybrid scheduling for the parallel solution of linear systems},\n  journal = {Parallel "
1448                                    "Computing},\n  volume = {32},\n  number = {2},\n  pages = {136--156},\n  year = {2006}\n}\n",
1449                                    &cite2));
1450 
1451   PetscCall(VecFlag(x, A->factorerrortype));
1452   if (A->factorerrortype) {
1453     PetscCall(PetscInfo(A, "MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1454     PetscFunctionReturn(PETSC_SUCCESS);
1455   }
1456 
1457   mumps->id.nrhs = 1;
1458   if (mumps->petsc_size > 1) {
1459     if (mumps->ICNTL20 == 10) {
1460       mumps->id.ICNTL(20) = 10; /* dense distributed RHS */
1461       PetscCall(VecGetArrayRead(b, &rarray));
1462       PetscCall(MatMumpsSetUpDistRHSInfo(A, 1, rarray));
1463     } else {
1464       mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential rhs vector*/
1465       PetscCall(VecScatterBegin(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD));
1466       PetscCall(VecScatterEnd(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD));
1467       if (!mumps->myid) {
1468         PetscCall(VecGetArray(mumps->b_seq, &array));
1469         mumps->id.rhs = (MumpsScalar *)array;
1470       }
1471     }
1472   } else {                   /* petsc_size == 1 */
1473     mumps->id.ICNTL(20) = 0; /* dense centralized RHS */
1474     PetscCall(VecCopy(b, x));
1475     PetscCall(VecGetArray(x, &array));
1476     mumps->id.rhs = (MumpsScalar *)array;
1477   }
1478 
1479   /*
1480      handle condensation step of Schur complement (if any)
1481      We set by default ICNTL(26) == -1 when Schur indices have been provided by the user.
1482      According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase
1483      Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system.
1484      This requires an extra call to PetscMUMPS_c and the computation of the factors for S
1485   */
1486   if (mumps->id.size_schur > 0) {
1487     PetscCheck(mumps->petsc_size <= 1, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc");
1488     if (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2) {
1489       second_solve = PETSC_TRUE;
1490       PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE));
1491       mumps->id.ICNTL(26) = 1; /* condensation phase */
1492     } else if (mumps->id.ICNTL(26) == 1) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE));
1493   }
1494   /* solve phase */
1495   mumps->id.job = JOB_SOLVE;
1496   PetscMUMPS_c(mumps);
1497   PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
1498 
1499   /* handle expansion step of Schur complement (if any) */
1500   if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE));
1501   else if (mumps->id.ICNTL(26) == 1) {
1502     PetscCall(MatMumpsSolveSchur_Private(A));
1503     for (i = 0; i < mumps->id.size_schur; ++i) {
1504 #if !defined(PETSC_USE_COMPLEX)
1505       PetscScalar val = mumps->id.redrhs[i];
1506 #else
1507       PetscScalar val = mumps->id.redrhs[i].r + PETSC_i * mumps->id.redrhs[i].i;
1508 #endif
1509       array[mumps->id.listvar_schur[i] - 1] = val;
1510     }
1511   }
1512 
1513   if (mumps->petsc_size > 1) { /* convert mumps distributed solution to PETSc mpi x */
1514     if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) {
1515       /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */
1516       PetscCall(VecScatterDestroy(&mumps->scat_sol));
1517     }
1518     if (!mumps->scat_sol) { /* create scatter scat_sol */
1519       PetscInt *isol2_loc = NULL;
1520       PetscCall(ISCreateStride(PETSC_COMM_SELF, mumps->id.lsol_loc, 0, 1, &is_iden)); /* from */
1521       PetscCall(PetscMalloc1(mumps->id.lsol_loc, &isol2_loc));
1522       for (i = 0; i < mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i] - 1;                        /* change Fortran style to C style */
1523       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, mumps->id.lsol_loc, isol2_loc, PETSC_OWN_POINTER, &is_petsc)); /* to */
1524       PetscCall(VecScatterCreate(mumps->x_seq, is_iden, x, is_petsc, &mumps->scat_sol));
1525       PetscCall(ISDestroy(&is_iden));
1526       PetscCall(ISDestroy(&is_petsc));
1527       mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */
1528     }
1529 
1530     PetscCall(VecScatterBegin(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD));
1531     PetscCall(VecScatterEnd(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD));
1532   }
1533 
1534   if (mumps->petsc_size > 1) {
1535     if (mumps->ICNTL20 == 10) {
1536       PetscCall(VecRestoreArrayRead(b, &rarray));
1537     } else if (!mumps->myid) {
1538       PetscCall(VecRestoreArray(mumps->b_seq, &array));
1539     }
1540   } else PetscCall(VecRestoreArray(x, &array));
1541 
1542   PetscCall(PetscLogFlops(2.0 * PetscMax(0, (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n)));
1543   PetscFunctionReturn(PETSC_SUCCESS);
1544 }
1545 
1546 static PetscErrorCode MatSolveTranspose_MUMPS(Mat A, Vec b, Vec x)
1547 {
1548   Mat_MUMPS          *mumps = (Mat_MUMPS *)A->data;
1549   const PetscMUMPSInt value = mumps->id.ICNTL(9);
1550 
1551   PetscFunctionBegin;
1552   mumps->id.ICNTL(9) = 0;
1553   PetscCall(MatSolve_MUMPS(A, b, x));
1554   mumps->id.ICNTL(9) = value;
1555   PetscFunctionReturn(PETSC_SUCCESS);
1556 }
1557 
1558 static PetscErrorCode MatMatSolve_MUMPS(Mat A, Mat B, Mat X)
1559 {
1560   Mat                Bt = NULL;
1561   PetscBool          denseX, denseB, flg, flgT;
1562   Mat_MUMPS         *mumps = (Mat_MUMPS *)A->data;
1563   PetscInt           i, nrhs, M, nrhsM;
1564   PetscScalar       *array;
1565   const PetscScalar *rbray;
1566   PetscInt           lsol_loc, nlsol_loc, *idxx, iidx = 0;
1567   PetscMUMPSInt     *isol_loc, *isol_loc_save;
1568   PetscScalar       *bray, *sol_loc, *sol_loc_save;
1569   IS                 is_to, is_from;
1570   PetscInt           k, proc, j, m, myrstart;
1571   const PetscInt    *rstart;
1572   Vec                v_mpi, msol_loc;
1573   VecScatter         scat_sol;
1574   Vec                b_seq;
1575   VecScatter         scat_rhs;
1576   PetscScalar       *aa;
1577   PetscInt           spnr, *ia, *ja;
1578   Mat_MPIAIJ        *b = NULL;
1579 
1580   PetscFunctionBegin;
1581   PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &denseX, MATSEQDENSE, MATMPIDENSE, NULL));
1582   PetscCheck(denseX, PetscObjectComm((PetscObject)X), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix");
1583 
1584   PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &denseB, MATSEQDENSE, MATMPIDENSE, NULL));
1585   if (denseB) {
1586     PetscCheck(B->rmap->n == X->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Matrix B and X must have same row distribution");
1587     mumps->id.ICNTL(20) = 0; /* dense RHS */
1588   } else {                   /* sparse B */
1589     PetscCheck(X != B, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_IDN, "X and B must be different matrices");
1590     PetscCall(PetscObjectTypeCompare((PetscObject)B, MATTRANSPOSEVIRTUAL, &flgT));
1591     if (flgT) { /* input B is transpose of actual RHS matrix,
1592                  because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */
1593       PetscCall(MatTransposeGetMat(B, &Bt));
1594     } else SETERRQ(PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_WRONG, "Matrix B must be MATTRANSPOSEVIRTUAL matrix");
1595     mumps->id.ICNTL(20) = 1; /* sparse RHS */
1596   }
1597 
1598   PetscCall(MatGetSize(B, &M, &nrhs));
1599   PetscCall(PetscIntMultError(nrhs, M, &nrhsM));
1600   mumps->id.nrhs = (PetscMUMPSInt)nrhs;
1601   mumps->id.lrhs = (PetscMUMPSInt)M;
1602   mumps->id.rhs  = NULL;
1603 
1604   if (mumps->petsc_size == 1) {
1605     PetscScalar *aa;
1606     PetscInt     spnr, *ia, *ja;
1607     PetscBool    second_solve = PETSC_FALSE;
1608 
1609     PetscCall(MatDenseGetArray(X, &array));
1610     mumps->id.rhs = (MumpsScalar *)array;
1611 
1612     if (denseB) {
1613       /* copy B to X */
1614       PetscCall(MatDenseGetArrayRead(B, &rbray));
1615       PetscCall(PetscArraycpy(array, rbray, nrhsM));
1616       PetscCall(MatDenseRestoreArrayRead(B, &rbray));
1617     } else { /* sparse B */
1618       PetscCall(MatSeqAIJGetArray(Bt, &aa));
1619       PetscCall(MatGetRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
1620       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
1621       PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs));
1622       mumps->id.rhs_sparse = (MumpsScalar *)aa;
1623     }
1624     /* handle condensation step of Schur complement (if any) */
1625     if (mumps->id.size_schur > 0) {
1626       if (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2) {
1627         second_solve = PETSC_TRUE;
1628         PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE));
1629         mumps->id.ICNTL(26) = 1; /* condensation phase */
1630       } else if (mumps->id.ICNTL(26) == 1) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE));
1631     }
1632     /* solve phase */
1633     mumps->id.job = JOB_SOLVE;
1634     PetscMUMPS_c(mumps);
1635     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
1636 
1637     /* handle expansion step of Schur complement (if any) */
1638     if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE));
1639     else if (mumps->id.ICNTL(26) == 1) {
1640       PetscCall(MatMumpsSolveSchur_Private(A));
1641       for (j = 0; j < nrhs; ++j)
1642         for (i = 0; i < mumps->id.size_schur; ++i) {
1643 #if !defined(PETSC_USE_COMPLEX)
1644           PetscScalar val = mumps->id.redrhs[i + j * mumps->id.lredrhs];
1645 #else
1646           PetscScalar val = mumps->id.redrhs[i + j * mumps->id.lredrhs].r + PETSC_i * mumps->id.redrhs[i + j * mumps->id.lredrhs].i;
1647 #endif
1648           array[mumps->id.listvar_schur[i] - 1 + j * M] = val;
1649         }
1650     }
1651     if (!denseB) { /* sparse B */
1652       PetscCall(MatSeqAIJRestoreArray(Bt, &aa));
1653       PetscCall(MatRestoreRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
1654       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure");
1655     }
1656     PetscCall(MatDenseRestoreArray(X, &array));
1657     PetscFunctionReturn(PETSC_SUCCESS);
1658   }
1659 
1660   /* parallel case: MUMPS requires rhs B to be centralized on the host! */
1661   PetscCheck(!mumps->id.ICNTL(19), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc");
1662 
1663   /* create msol_loc to hold mumps local solution */
1664   isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */
1665   sol_loc_save  = (PetscScalar *)mumps->id.sol_loc;
1666 
1667   lsol_loc = mumps->id.lsol_loc;
1668   PetscCall(PetscIntMultError(nrhs, lsol_loc, &nlsol_loc)); /* length of sol_loc */
1669   PetscCall(PetscMalloc2(nlsol_loc, &sol_loc, lsol_loc, &isol_loc));
1670   mumps->id.sol_loc  = (MumpsScalar *)sol_loc;
1671   mumps->id.isol_loc = isol_loc;
1672 
1673   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nlsol_loc, (PetscScalar *)sol_loc, &msol_loc));
1674 
1675   if (denseB) {
1676     if (mumps->ICNTL20 == 10) {
1677       mumps->id.ICNTL(20) = 10; /* dense distributed RHS */
1678       PetscCall(MatDenseGetArrayRead(B, &rbray));
1679       PetscCall(MatMumpsSetUpDistRHSInfo(A, nrhs, rbray));
1680       PetscCall(MatDenseRestoreArrayRead(B, &rbray));
1681       PetscCall(MatGetLocalSize(B, &m, NULL));
1682       PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhsM, NULL, &v_mpi));
1683     } else {
1684       mumps->id.ICNTL(20) = 0; /* dense centralized RHS */
1685       /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in
1686         very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank
1687         0, re-arrange B into desired order, which is a local operation.
1688       */
1689 
1690       /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */
1691       /* wrap dense rhs matrix B into a vector v_mpi */
1692       PetscCall(MatGetLocalSize(B, &m, NULL));
1693       PetscCall(MatDenseGetArray(B, &bray));
1694       PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhsM, (const PetscScalar *)bray, &v_mpi));
1695       PetscCall(MatDenseRestoreArray(B, &bray));
1696 
1697       /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */
1698       if (!mumps->myid) {
1699         PetscInt *idx;
1700         /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */
1701         PetscCall(PetscMalloc1(nrhsM, &idx));
1702         PetscCall(MatGetOwnershipRanges(B, &rstart));
1703         for (proc = 0, k = 0; proc < mumps->petsc_size; proc++) {
1704           for (j = 0; j < nrhs; j++) {
1705             for (i = rstart[proc]; i < rstart[proc + 1]; i++) idx[k++] = j * M + i;
1706           }
1707         }
1708 
1709         PetscCall(VecCreateSeq(PETSC_COMM_SELF, nrhsM, &b_seq));
1710         PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nrhsM, idx, PETSC_OWN_POINTER, &is_to));
1711         PetscCall(ISCreateStride(PETSC_COMM_SELF, nrhsM, 0, 1, &is_from));
1712       } else {
1713         PetscCall(VecCreateSeq(PETSC_COMM_SELF, 0, &b_seq));
1714         PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_to));
1715         PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_from));
1716       }
1717       PetscCall(VecScatterCreate(v_mpi, is_from, b_seq, is_to, &scat_rhs));
1718       PetscCall(VecScatterBegin(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD));
1719       PetscCall(ISDestroy(&is_to));
1720       PetscCall(ISDestroy(&is_from));
1721       PetscCall(VecScatterEnd(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD));
1722 
1723       if (!mumps->myid) { /* define rhs on the host */
1724         PetscCall(VecGetArray(b_seq, &bray));
1725         mumps->id.rhs = (MumpsScalar *)bray;
1726         PetscCall(VecRestoreArray(b_seq, &bray));
1727       }
1728     }
1729   } else { /* sparse B */
1730     b = (Mat_MPIAIJ *)Bt->data;
1731 
1732     /* wrap dense X into a vector v_mpi */
1733     PetscCall(MatGetLocalSize(X, &m, NULL));
1734     PetscCall(MatDenseGetArray(X, &bray));
1735     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)X), 1, nrhs * m, nrhsM, (const PetscScalar *)bray, &v_mpi));
1736     PetscCall(MatDenseRestoreArray(X, &bray));
1737 
1738     if (!mumps->myid) {
1739       PetscCall(MatSeqAIJGetArray(b->A, &aa));
1740       PetscCall(MatGetRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
1741       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
1742       PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs));
1743       mumps->id.rhs_sparse = (MumpsScalar *)aa;
1744     } else {
1745       mumps->id.irhs_ptr    = NULL;
1746       mumps->id.irhs_sparse = NULL;
1747       mumps->id.nz_rhs      = 0;
1748       mumps->id.rhs_sparse  = NULL;
1749     }
1750   }
1751 
1752   /* solve phase */
1753   mumps->id.job = JOB_SOLVE;
1754   PetscMUMPS_c(mumps);
1755   PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
1756 
1757   /* scatter mumps distributed solution to PETSc vector v_mpi, which shares local arrays with solution matrix X */
1758   PetscCall(MatDenseGetArray(X, &array));
1759   PetscCall(VecPlaceArray(v_mpi, array));
1760 
1761   /* create scatter scat_sol */
1762   PetscCall(MatGetOwnershipRanges(X, &rstart));
1763   /* iidx: index for scatter mumps solution to PETSc X */
1764 
1765   PetscCall(ISCreateStride(PETSC_COMM_SELF, nlsol_loc, 0, 1, &is_from));
1766   PetscCall(PetscMalloc1(nlsol_loc, &idxx));
1767   for (i = 0; i < lsol_loc; i++) {
1768     isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */
1769 
1770     for (proc = 0; proc < mumps->petsc_size; proc++) {
1771       if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc + 1]) {
1772         myrstart = rstart[proc];
1773         k        = isol_loc[i] - myrstart;          /* local index on 1st column of PETSc vector X */
1774         iidx     = k + myrstart * nrhs;             /* maps mumps isol_loc[i] to PETSc index in X */
1775         m        = rstart[proc + 1] - rstart[proc]; /* rows of X for this proc */
1776         break;
1777       }
1778     }
1779 
1780     for (j = 0; j < nrhs; j++) idxx[i + j * lsol_loc] = iidx + j * m;
1781   }
1782   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nlsol_loc, idxx, PETSC_COPY_VALUES, &is_to));
1783   PetscCall(VecScatterCreate(msol_loc, is_from, v_mpi, is_to, &scat_sol));
1784   PetscCall(VecScatterBegin(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD));
1785   PetscCall(ISDestroy(&is_from));
1786   PetscCall(ISDestroy(&is_to));
1787   PetscCall(VecScatterEnd(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD));
1788   PetscCall(MatDenseRestoreArray(X, &array));
1789 
1790   /* free spaces */
1791   mumps->id.sol_loc  = (MumpsScalar *)sol_loc_save;
1792   mumps->id.isol_loc = isol_loc_save;
1793 
1794   PetscCall(PetscFree2(sol_loc, isol_loc));
1795   PetscCall(PetscFree(idxx));
1796   PetscCall(VecDestroy(&msol_loc));
1797   PetscCall(VecDestroy(&v_mpi));
1798   if (!denseB) {
1799     if (!mumps->myid) {
1800       b = (Mat_MPIAIJ *)Bt->data;
1801       PetscCall(MatSeqAIJRestoreArray(b->A, &aa));
1802       PetscCall(MatRestoreRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
1803       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure");
1804     }
1805   } else {
1806     if (mumps->ICNTL20 == 0) {
1807       PetscCall(VecDestroy(&b_seq));
1808       PetscCall(VecScatterDestroy(&scat_rhs));
1809     }
1810   }
1811   PetscCall(VecScatterDestroy(&scat_sol));
1812   PetscCall(PetscLogFlops(nrhs * PetscMax(0, 2.0 * (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n)));
1813   PetscFunctionReturn(PETSC_SUCCESS);
1814 }
1815 
1816 static PetscErrorCode MatMatSolveTranspose_MUMPS(Mat A, Mat B, Mat X)
1817 {
1818   Mat_MUMPS          *mumps = (Mat_MUMPS *)A->data;
1819   const PetscMUMPSInt value = mumps->id.ICNTL(9);
1820 
1821   PetscFunctionBegin;
1822   mumps->id.ICNTL(9) = 0;
1823   PetscCall(MatMatSolve_MUMPS(A, B, X));
1824   mumps->id.ICNTL(9) = value;
1825   PetscFunctionReturn(PETSC_SUCCESS);
1826 }
1827 
1828 static PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A, Mat Bt, Mat X)
1829 {
1830   PetscBool flg;
1831   Mat       B;
1832 
1833   PetscFunctionBegin;
1834   PetscCall(PetscObjectTypeCompareAny((PetscObject)Bt, &flg, MATSEQAIJ, MATMPIAIJ, NULL));
1835   PetscCheck(flg, PetscObjectComm((PetscObject)Bt), PETSC_ERR_ARG_WRONG, "Matrix Bt must be MATAIJ matrix");
1836 
1837   /* Create B=Bt^T that uses Bt's data structure */
1838   PetscCall(MatCreateTranspose(Bt, &B));
1839 
1840   PetscCall(MatMatSolve_MUMPS(A, B, X));
1841   PetscCall(MatDestroy(&B));
1842   PetscFunctionReturn(PETSC_SUCCESS);
1843 }
1844 
1845 #if !defined(PETSC_USE_COMPLEX)
1846 /*
1847   input:
1848    F:        numeric factor
1849   output:
1850    nneg:     total number of negative pivots
1851    nzero:    total number of zero pivots
1852    npos:     (global dimension of F) - nneg - nzero
1853 */
1854 static PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos)
1855 {
1856   Mat_MUMPS  *mumps = (Mat_MUMPS *)F->data;
1857   PetscMPIInt size;
1858 
1859   PetscFunctionBegin;
1860   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)F), &size));
1861   /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */
1862   PetscCheck(size <= 1 || mumps->id.ICNTL(13) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia", mumps->id.INFOG(13));
1863 
1864   if (nneg) *nneg = mumps->id.INFOG(12);
1865   if (nzero || npos) {
1866     PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection");
1867     if (nzero) *nzero = mumps->id.INFOG(28);
1868     if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28));
1869   }
1870   PetscFunctionReturn(PETSC_SUCCESS);
1871 }
1872 #endif
1873 
1874 static PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse, Mat_MUMPS *mumps)
1875 {
1876   PetscMPIInt    nreqs;
1877   PetscMUMPSInt *irn, *jcn;
1878   PetscMPIInt    count;
1879   PetscCount     totnnz, remain;
1880   const PetscInt osize = mumps->omp_comm_size;
1881   PetscScalar   *val;
1882 
1883   PetscFunctionBegin;
1884   if (osize > 1) {
1885     if (reuse == MAT_INITIAL_MATRIX) {
1886       /* master first gathers counts of nonzeros to receive */
1887       if (mumps->is_omp_master) PetscCall(PetscMalloc1(osize, &mumps->recvcount));
1888       PetscCallMPI(MPI_Gather(&mumps->nnz, 1, MPIU_INT64, mumps->recvcount, 1, MPIU_INT64, 0 /*master*/, mumps->omp_comm));
1889 
1890       /* Then each computes number of send/recvs */
1891       if (mumps->is_omp_master) {
1892         /* Start from 1 since self communication is not done in MPI */
1893         nreqs = 0;
1894         for (PetscMPIInt i = 1; i < osize; i++) nreqs += (mumps->recvcount[i] + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX;
1895       } else {
1896         nreqs = (PetscMPIInt)(((mumps->nnz + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX));
1897       }
1898       PetscCall(PetscMalloc1(nreqs * 3, &mumps->reqs)); /* Triple the requests since we send irn, jcn and val separately */
1899 
1900       /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others.
1901          MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz
1902          might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size
1903          is very small, the current approach should have no extra overhead compared to MPI_Gatherv.
1904        */
1905       nreqs = 0; /* counter for actual send/recvs */
1906       if (mumps->is_omp_master) {
1907         totnnz = 0;
1908 
1909         for (PetscMPIInt i = 0; i < osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */
1910         PetscCall(PetscMalloc2(totnnz, &irn, totnnz, &jcn));
1911         PetscCall(PetscMalloc1(totnnz, &val));
1912 
1913         /* Self communication */
1914         PetscCall(PetscArraycpy(irn, mumps->irn, mumps->nnz));
1915         PetscCall(PetscArraycpy(jcn, mumps->jcn, mumps->nnz));
1916         PetscCall(PetscArraycpy(val, mumps->val, mumps->nnz));
1917 
1918         /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */
1919         PetscCall(PetscFree2(mumps->irn, mumps->jcn));
1920         PetscCall(PetscFree(mumps->val_alloc));
1921         mumps->nnz = totnnz;
1922         mumps->irn = irn;
1923         mumps->jcn = jcn;
1924         mumps->val = mumps->val_alloc = val;
1925 
1926         irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */
1927         jcn += mumps->recvcount[0];
1928         val += mumps->recvcount[0];
1929 
1930         /* Remote communication */
1931         for (PetscMPIInt i = 1; i < osize; i++) {
1932           count  = (PetscMPIInt)PetscMin(mumps->recvcount[i], (PetscMPIInt)PETSC_MPI_INT_MAX);
1933           remain = mumps->recvcount[i] - count;
1934           while (count > 0) {
1935             PetscCallMPI(MPIU_Irecv(irn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1936             PetscCallMPI(MPIU_Irecv(jcn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1937             PetscCallMPI(MPIU_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1938             irn += count;
1939             jcn += count;
1940             val += count;
1941             count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX);
1942             remain -= count;
1943           }
1944         }
1945       } else {
1946         irn    = mumps->irn;
1947         jcn    = mumps->jcn;
1948         val    = mumps->val;
1949         count  = (PetscMPIInt)PetscMin(mumps->nnz, (PetscMPIInt)PETSC_MPI_INT_MAX);
1950         remain = mumps->nnz - count;
1951         while (count > 0) {
1952           PetscCallMPI(MPIU_Isend(irn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1953           PetscCallMPI(MPIU_Isend(jcn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1954           PetscCallMPI(MPIU_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1955           irn += count;
1956           jcn += count;
1957           val += count;
1958           count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX);
1959           remain -= count;
1960         }
1961       }
1962     } else {
1963       nreqs = 0;
1964       if (mumps->is_omp_master) {
1965         val = mumps->val + mumps->recvcount[0];
1966         for (PetscMPIInt i = 1; i < osize; i++) { /* Remote communication only since self data is already in place */
1967           count  = (PetscMPIInt)PetscMin(mumps->recvcount[i], (PetscMPIInt)PETSC_MPI_INT_MAX);
1968           remain = mumps->recvcount[i] - count;
1969           while (count > 0) {
1970             PetscCallMPI(MPIU_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1971             val += count;
1972             count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX);
1973             remain -= count;
1974           }
1975         }
1976       } else {
1977         val    = mumps->val;
1978         count  = (PetscMPIInt)PetscMin(mumps->nnz, (PetscMPIInt)PETSC_MPI_INT_MAX);
1979         remain = mumps->nnz - count;
1980         while (count > 0) {
1981           PetscCallMPI(MPIU_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1982           val += count;
1983           count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX);
1984           remain -= count;
1985         }
1986       }
1987     }
1988     PetscCallMPI(MPI_Waitall(nreqs, mumps->reqs, MPI_STATUSES_IGNORE));
1989     mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */
1990   }
1991   PetscFunctionReturn(PETSC_SUCCESS);
1992 }
1993 
1994 static PetscErrorCode MatFactorNumeric_MUMPS(Mat F, Mat A, PETSC_UNUSED const MatFactorInfo *info)
1995 {
1996   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
1997   PetscBool  isMPIAIJ;
1998 
1999   PetscFunctionBegin;
2000   if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) {
2001     if (mumps->id.INFOG(1) == -6) PetscCall(PetscInfo(A, "MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2002     PetscCall(PetscInfo(A, "MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2003     PetscFunctionReturn(PETSC_SUCCESS);
2004   }
2005 
2006   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps));
2007   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX, mumps));
2008 
2009   /* numerical factorization phase */
2010   mumps->id.job = JOB_FACTNUMERIC;
2011   if (!mumps->id.ICNTL(18)) { /* A is centralized */
2012     if (!mumps->myid) mumps->id.a = (MumpsScalar *)mumps->val;
2013   } else {
2014     mumps->id.a_loc = (MumpsScalar *)mumps->val;
2015   }
2016   PetscMUMPS_c(mumps);
2017   if (mumps->id.INFOG(1) < 0) {
2018     PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2));
2019     if (mumps->id.INFOG(1) == -10) {
2020       PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2021       F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
2022     } else if (mumps->id.INFOG(1) == -13) {
2023       PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, cannot allocate required memory %d megabytes\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2024       F->factorerrortype = MAT_FACTOR_OUTMEMORY;
2025     } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) {
2026       PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d, problem with work array\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2027       F->factorerrortype = MAT_FACTOR_OUTMEMORY;
2028     } else {
2029       PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2030       F->factorerrortype = MAT_FACTOR_OTHER;
2031     }
2032   }
2033   PetscCheck(mumps->myid || mumps->id.ICNTL(16) <= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in numerical factorization: ICNTL(16)=%d " MUMPS_MANUALS, mumps->id.INFOG(16));
2034 
2035   F->assembled = PETSC_TRUE;
2036 
2037   if (F->schur) { /* reset Schur status to unfactored */
2038 #if defined(PETSC_HAVE_CUDA)
2039     F->schur->offloadmask = PETSC_OFFLOAD_CPU;
2040 #endif
2041     if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
2042       mumps->id.ICNTL(19) = 2;
2043       PetscCall(MatTranspose(F->schur, MAT_INPLACE_MATRIX, &F->schur));
2044     }
2045     PetscCall(MatFactorRestoreSchurComplement(F, NULL, MAT_FACTOR_SCHUR_UNFACTORED));
2046   }
2047 
2048   /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */
2049   if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3;
2050 
2051   if (!mumps->is_omp_master) mumps->id.INFO(23) = 0;
2052   if (mumps->petsc_size > 1) {
2053     PetscInt     lsol_loc;
2054     PetscScalar *sol_loc;
2055 
2056     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &isMPIAIJ));
2057 
2058     /* distributed solution; Create x_seq=sol_loc for repeated use */
2059     if (mumps->x_seq) {
2060       PetscCall(VecScatterDestroy(&mumps->scat_sol));
2061       PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc));
2062       PetscCall(VecDestroy(&mumps->x_seq));
2063     }
2064     lsol_loc = mumps->id.INFO(23); /* length of sol_loc */
2065     PetscCall(PetscMalloc2(lsol_loc, &sol_loc, lsol_loc, &mumps->id.isol_loc));
2066     mumps->id.lsol_loc = (PetscMUMPSInt)lsol_loc;
2067     mumps->id.sol_loc  = (MumpsScalar *)sol_loc;
2068     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, lsol_loc, sol_loc, &mumps->x_seq));
2069   }
2070   PetscCall(PetscLogFlops((double)mumps->id.RINFO(2)));
2071   PetscFunctionReturn(PETSC_SUCCESS);
2072 }
2073 
2074 /* Sets MUMPS options from the options database */
2075 static PetscErrorCode MatSetFromOptions_MUMPS(Mat F, Mat A)
2076 {
2077   Mat_MUMPS    *mumps = (Mat_MUMPS *)F->data;
2078   PetscMUMPSInt icntl = 0, size, *listvar_schur;
2079   PetscInt      info[80], i, ninfo = 80, rbs, cbs;
2080   PetscBool     flg = PETSC_FALSE, schur = (PetscBool)(mumps->id.ICNTL(26) == -1);
2081   MumpsScalar  *arr;
2082 
2083   PetscFunctionBegin;
2084   PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "MUMPS Options", "Mat");
2085   if (mumps->id.job == JOB_NULL) { /* MatSetFromOptions_MUMPS() has never been called before */
2086     PetscInt nthreads   = 0;
2087     PetscInt nCNTL_pre  = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0;
2088     PetscInt nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0;
2089 
2090     mumps->petsc_comm = PetscObjectComm((PetscObject)A);
2091     PetscCallMPI(MPI_Comm_size(mumps->petsc_comm, &mumps->petsc_size));
2092     PetscCallMPI(MPI_Comm_rank(mumps->petsc_comm, &mumps->myid)); /* "if (!myid)" still works even if mumps_comm is different */
2093 
2094     PetscCall(PetscOptionsName("-mat_mumps_use_omp_threads", "Convert MPI processes into OpenMP threads", "None", &mumps->use_petsc_omp_support));
2095     if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */
2096     /* do not use PetscOptionsInt() so that the option -mat_mumps_use_omp_threads is not displayed twice in the help */
2097     PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)F)->prefix, "-mat_mumps_use_omp_threads", &nthreads, NULL));
2098     if (mumps->use_petsc_omp_support) {
2099       PetscCheck(!schur, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use -%smat_mumps_use_omp_threads with the Schur complement feature", ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : "");
2100 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
2101       PetscCall(PetscOmpCtrlCreate(mumps->petsc_comm, nthreads, &mumps->omp_ctrl));
2102       PetscCall(PetscOmpCtrlGetOmpComms(mumps->omp_ctrl, &mumps->omp_comm, &mumps->mumps_comm, &mumps->is_omp_master));
2103 #else
2104       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "The system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual",
2105               ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : "");
2106 #endif
2107     } else {
2108       mumps->omp_comm      = PETSC_COMM_SELF;
2109       mumps->mumps_comm    = mumps->petsc_comm;
2110       mumps->is_omp_master = PETSC_TRUE;
2111     }
2112     PetscCallMPI(MPI_Comm_size(mumps->omp_comm, &mumps->omp_comm_size));
2113     mumps->reqs = NULL;
2114     mumps->tag  = 0;
2115 
2116     if (mumps->mumps_comm != MPI_COMM_NULL) {
2117       if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) {
2118         /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */
2119         MPI_Comm comm;
2120         PetscCallMPI(MPI_Comm_dup(mumps->mumps_comm, &comm));
2121         mumps->mumps_comm = comm;
2122       } else PetscCall(PetscCommGetComm(mumps->petsc_comm, &mumps->mumps_comm));
2123     }
2124 
2125     mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm);
2126     mumps->id.job          = JOB_INIT;
2127     mumps->id.par          = 1; /* host participates factorizaton and solve */
2128     mumps->id.sym          = mumps->sym;
2129 
2130     size          = mumps->id.size_schur;
2131     arr           = mumps->id.schur;
2132     listvar_schur = mumps->id.listvar_schur;
2133     PetscMUMPS_c(mumps);
2134     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
2135 
2136     /* set PETSc-MUMPS default options - override MUMPS default */
2137     mumps->id.ICNTL(3) = 0;
2138     mumps->id.ICNTL(4) = 0;
2139     if (mumps->petsc_size == 1) {
2140       mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */
2141       mumps->id.ICNTL(7)  = 7; /* automatic choice of ordering done by the package */
2142     } else {
2143       mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */
2144       mumps->id.ICNTL(21) = 1; /* distributed solution */
2145     }
2146 
2147     /* restore cached ICNTL and CNTL values */
2148     for (icntl = 0; icntl < nICNTL_pre; ++icntl) mumps->id.ICNTL(mumps->ICNTL_pre[1 + 2 * icntl]) = mumps->ICNTL_pre[2 + 2 * icntl];
2149     for (icntl = 0; icntl < nCNTL_pre; ++icntl) mumps->id.CNTL((PetscInt)mumps->CNTL_pre[1 + 2 * icntl]) = mumps->CNTL_pre[2 + 2 * icntl];
2150     PetscCall(PetscFree(mumps->ICNTL_pre));
2151     PetscCall(PetscFree(mumps->CNTL_pre));
2152 
2153     if (schur) {
2154       mumps->id.size_schur    = size;
2155       mumps->id.schur_lld     = size;
2156       mumps->id.schur         = arr;
2157       mumps->id.listvar_schur = listvar_schur;
2158       if (mumps->petsc_size > 1) {
2159         PetscBool gs; /* gs is false if any rank other than root has non-empty IS */
2160 
2161         mumps->id.ICNTL(19) = 1;                                                                            /* MUMPS returns Schur centralized on the host */
2162         gs                  = mumps->myid ? (mumps->id.size_schur ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */
2163         PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &gs, 1, MPIU_BOOL, MPI_LAND, mumps->petsc_comm));
2164         PetscCheck(gs, PETSC_COMM_SELF, PETSC_ERR_SUP, "MUMPS distributed parallel Schur complements not yet supported from PETSc");
2165       } else {
2166         if (F->factortype == MAT_FACTOR_LU) {
2167           mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */
2168         } else {
2169           mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */
2170         }
2171       }
2172       mumps->id.ICNTL(26) = -1;
2173     }
2174 
2175     /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code.
2176        For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS.
2177      */
2178     PetscCallMPI(MPI_Bcast(mumps->id.icntl, 40, MPI_INT, 0, mumps->omp_comm));
2179     PetscCallMPI(MPI_Bcast(mumps->id.cntl, 15, MPIU_REAL, 0, mumps->omp_comm));
2180 
2181     mumps->scat_rhs = NULL;
2182     mumps->scat_sol = NULL;
2183   }
2184   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_1", "ICNTL(1): output stream for error messages", "None", mumps->id.ICNTL(1), &icntl, &flg));
2185   if (flg) mumps->id.ICNTL(1) = icntl;
2186   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_2", "ICNTL(2): output stream for diagnostic printing, statistics, and warning", "None", mumps->id.ICNTL(2), &icntl, &flg));
2187   if (flg) mumps->id.ICNTL(2) = icntl;
2188   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_3", "ICNTL(3): output stream for global information, collected on the host", "None", mumps->id.ICNTL(3), &icntl, &flg));
2189   if (flg) mumps->id.ICNTL(3) = icntl;
2190 
2191   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_4", "ICNTL(4): level of printing (0 to 4)", "None", mumps->id.ICNTL(4), &icntl, &flg));
2192   if (flg) mumps->id.ICNTL(4) = icntl;
2193   if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */
2194 
2195   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_6", "ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)", "None", mumps->id.ICNTL(6), &icntl, &flg));
2196   if (flg) mumps->id.ICNTL(6) = icntl;
2197 
2198   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_7", "ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)", "None", mumps->id.ICNTL(7), &icntl, &flg));
2199   if (flg) {
2200     PetscCheck(icntl != 1 && icntl >= 0 && icntl <= 7, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto");
2201     mumps->id.ICNTL(7) = icntl;
2202   }
2203 
2204   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_8", "ICNTL(8): scaling strategy (-2 to 8 or 77)", "None", mumps->id.ICNTL(8), &mumps->id.ICNTL(8), NULL));
2205   /* PetscCall(PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL)); handled by MatSolveTranspose_MUMPS() */
2206   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_10", "ICNTL(10): max num of refinements", "None", mumps->id.ICNTL(10), &mumps->id.ICNTL(10), NULL));
2207   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_11", "ICNTL(11): statistics related to an error analysis (via -ksp_view)", "None", mumps->id.ICNTL(11), &mumps->id.ICNTL(11), NULL));
2208   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_12", "ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)", "None", mumps->id.ICNTL(12), &mumps->id.ICNTL(12), NULL));
2209   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_13", "ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting", "None", mumps->id.ICNTL(13), &mumps->id.ICNTL(13), NULL));
2210   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_14", "ICNTL(14): percentage increase in the estimated working space", "None", mumps->id.ICNTL(14), &mumps->id.ICNTL(14), NULL));
2211   PetscCall(MatGetBlockSizes(A, &rbs, &cbs));
2212   if (rbs == cbs && rbs > 1) mumps->id.ICNTL(15) = (PetscMUMPSInt)-rbs;
2213   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_15", "ICNTL(15): compression of the input matrix resulting from a block format", "None", mumps->id.ICNTL(15), &mumps->id.ICNTL(15), &flg));
2214   if (flg) {
2215     PetscCheck(mumps->id.ICNTL(15) <= 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Positive -mat_mumps_icntl_15 not handled");
2216     PetscCheck((-mumps->id.ICNTL(15) % cbs == 0) && (-mumps->id.ICNTL(15) % rbs == 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "The opposite of -mat_mumps_icntl_15 must be a multiple of the column and row blocksizes");
2217   }
2218   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_19", "ICNTL(19): computes the Schur complement", "None", mumps->id.ICNTL(19), &mumps->id.ICNTL(19), NULL));
2219   if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */
2220     PetscCall(MatDestroy(&F->schur));
2221     PetscCall(MatMumpsResetSchur_Private(mumps));
2222   }
2223 
2224   /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps",
2225      and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran
2226      and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...'
2227      This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug,
2228      see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590.
2229      In short, we could not use distributed RHS until with MPICH v4.0b1 or we enabled a workaround in mumps-5.6.2+
2230    */
2231   mumps->ICNTL20 = 10; /* Distributed dense RHS, by default */
2232 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) || (PetscDefined(HAVE_MPICH) && MPICH_NUMVERSION < 40000101) || PetscDefined(HAVE_MSMPI)
2233   mumps->ICNTL20 = 0; /* Centralized dense RHS, if need be */
2234 #endif
2235   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_20", "ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides", "None", mumps->ICNTL20, &mumps->ICNTL20, &flg));
2236   PetscCheck(!flg || mumps->ICNTL20 == 10 || mumps->ICNTL20 == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10", (int)mumps->ICNTL20);
2237 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0)
2238   PetscCheck(!flg || mumps->ICNTL20 != 10, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=10 is not supported before MUMPS-5.3.0");
2239 #endif
2240   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL)); we only use distributed solution vector */
2241 
2242   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_22", "ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)", "None", mumps->id.ICNTL(22), &mumps->id.ICNTL(22), NULL));
2243   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_23", "ICNTL(23): max size of the working memory (MB) that can allocate per processor", "None", mumps->id.ICNTL(23), &mumps->id.ICNTL(23), NULL));
2244   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_24", "ICNTL(24): detection of null pivot rows (0 or 1)", "None", mumps->id.ICNTL(24), &mumps->id.ICNTL(24), NULL));
2245   if (mumps->id.ICNTL(24)) { mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ }
2246 
2247   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_25", "ICNTL(25): computes a solution of a deficient matrix and a null space basis", "None", mumps->id.ICNTL(25), &mumps->id.ICNTL(25), NULL));
2248   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_26", "ICNTL(26): drives the solution phase if a Schur complement matrix", "None", mumps->id.ICNTL(26), &mumps->id.ICNTL(26), NULL));
2249   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_27", "ICNTL(27): controls the blocking size for multiple right-hand sides", "None", mumps->id.ICNTL(27), &mumps->id.ICNTL(27), NULL));
2250   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_28", "ICNTL(28): use 1 for sequential analysis and ICNTL(7) ordering, or 2 for parallel analysis and ICNTL(29) ordering", "None", mumps->id.ICNTL(28), &mumps->id.ICNTL(28), NULL));
2251   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_29", "ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis", "None", mumps->id.ICNTL(29), &mumps->id.ICNTL(29), NULL));
2252   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL)); */ /* call MatMumpsGetInverse() directly */
2253   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_31", "ICNTL(31): indicates which factors may be discarded during factorization", "None", mumps->id.ICNTL(31), &mumps->id.ICNTL(31), NULL));
2254   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elimination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL));  -- not supported by PETSc API */
2255   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_33", "ICNTL(33): compute determinant", "None", mumps->id.ICNTL(33), &mumps->id.ICNTL(33), NULL));
2256   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_35", "ICNTL(35): activates Block Low Rank (BLR) based factorization", "None", mumps->id.ICNTL(35), &mumps->id.ICNTL(35), NULL));
2257   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_36", "ICNTL(36): choice of BLR factorization variant", "None", mumps->id.ICNTL(36), &mumps->id.ICNTL(36), NULL));
2258   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_37", "ICNTL(37): compression of the contribution blocks (CB)", "None", mumps->id.ICNTL(37), &mumps->id.ICNTL(37), NULL));
2259   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_38", "ICNTL(38): estimated compression rate of LU factors with BLR", "None", mumps->id.ICNTL(38), &mumps->id.ICNTL(38), NULL));
2260   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_48", "ICNTL(48): multithreading with tree parallelism", "None", mumps->id.ICNTL(48), &mumps->id.ICNTL(48), NULL));
2261   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_58", "ICNTL(58): defines options for symbolic factorization", "None", mumps->id.ICNTL(58), &mumps->id.ICNTL(58), NULL));
2262 
2263   PetscCall(PetscOptionsReal("-mat_mumps_cntl_1", "CNTL(1): relative pivoting threshold", "None", mumps->id.CNTL(1), &mumps->id.CNTL(1), NULL));
2264   PetscCall(PetscOptionsReal("-mat_mumps_cntl_2", "CNTL(2): stopping criterion of refinement", "None", mumps->id.CNTL(2), &mumps->id.CNTL(2), NULL));
2265   PetscCall(PetscOptionsReal("-mat_mumps_cntl_3", "CNTL(3): absolute pivoting threshold", "None", mumps->id.CNTL(3), &mumps->id.CNTL(3), NULL));
2266   PetscCall(PetscOptionsReal("-mat_mumps_cntl_4", "CNTL(4): value for static pivoting", "None", mumps->id.CNTL(4), &mumps->id.CNTL(4), NULL));
2267   PetscCall(PetscOptionsReal("-mat_mumps_cntl_5", "CNTL(5): fixation for null pivots", "None", mumps->id.CNTL(5), &mumps->id.CNTL(5), NULL));
2268   PetscCall(PetscOptionsReal("-mat_mumps_cntl_7", "CNTL(7): dropping parameter used during BLR", "None", mumps->id.CNTL(7), &mumps->id.CNTL(7), NULL));
2269 
2270   PetscCall(PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL));
2271 
2272   PetscCall(PetscOptionsIntArray("-mat_mumps_view_info", "request INFO local to each processor", "", info, &ninfo, NULL));
2273   if (ninfo) {
2274     PetscCheck(ninfo <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "number of INFO %" PetscInt_FMT " must <= 80", ninfo);
2275     PetscCall(PetscMalloc1(ninfo, &mumps->info));
2276     mumps->ninfo = ninfo;
2277     for (i = 0; i < ninfo; i++) {
2278       PetscCheck(info[i] >= 0 && info[i] <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "index of INFO %" PetscInt_FMT " must between 1 and 80", ninfo);
2279       mumps->info[i] = info[i];
2280     }
2281   }
2282   PetscOptionsEnd();
2283   PetscFunctionReturn(PETSC_SUCCESS);
2284 }
2285 
2286 static PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F, Mat A, PETSC_UNUSED const MatFactorInfo *info, Mat_MUMPS *mumps)
2287 {
2288   PetscFunctionBegin;
2289   if (mumps->id.INFOG(1) < 0) {
2290     PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in analysis: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
2291     if (mumps->id.INFOG(1) == -6) {
2292       PetscCall(PetscInfo(F, "MUMPS error in analysis: matrix is singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2293       F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT;
2294     } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) {
2295       PetscCall(PetscInfo(F, "MUMPS error in analysis: problem with work array, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2296       F->factorerrortype = MAT_FACTOR_OUTMEMORY;
2297     } else if (mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0) {
2298       PetscCall(PetscInfo(F, "MUMPS error in analysis: empty matrix\n"));
2299     } else {
2300       PetscCall(PetscInfo(F, "MUMPS error in analysis: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS "\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2301       F->factorerrortype = MAT_FACTOR_OTHER;
2302     }
2303   }
2304   if (!mumps->id.n) F->factorerrortype = MAT_FACTOR_NOERROR;
2305   PetscFunctionReturn(PETSC_SUCCESS);
2306 }
2307 
2308 static PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F, Mat A, IS r, PETSC_UNUSED IS c, const MatFactorInfo *info)
2309 {
2310   Mat_MUMPS     *mumps = (Mat_MUMPS *)F->data;
2311   Vec            b;
2312   const PetscInt M = A->rmap->N;
2313 
2314   PetscFunctionBegin;
2315   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
2316     /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */
2317     PetscFunctionReturn(PETSC_SUCCESS);
2318   }
2319 
2320   /* Set MUMPS options from the options database */
2321   PetscCall(MatSetFromOptions_MUMPS(F, A));
2322 
2323   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
2324   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps));
2325 
2326   /* analysis phase */
2327   mumps->id.job = JOB_FACTSYMBOLIC;
2328   PetscCall(PetscMUMPSIntCast(M, &mumps->id.n));
2329   switch (mumps->id.ICNTL(18)) {
2330   case 0: /* centralized assembled matrix input */
2331     if (!mumps->myid) {
2332       mumps->id.nnz = mumps->nnz;
2333       mumps->id.irn = mumps->irn;
2334       mumps->id.jcn = mumps->jcn;
2335       if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val;
2336       if (r && mumps->id.ICNTL(7) == 7) {
2337         mumps->id.ICNTL(7) = 1;
2338         if (!mumps->myid) {
2339           const PetscInt *idx;
2340           PetscInt        i;
2341 
2342           PetscCall(PetscMalloc1(M, &mumps->id.perm_in));
2343           PetscCall(ISGetIndices(r, &idx));
2344           for (i = 0; i < M; i++) PetscCall(PetscMUMPSIntCast(idx[i] + 1, &mumps->id.perm_in[i])); /* perm_in[]: start from 1, not 0! */
2345           PetscCall(ISRestoreIndices(r, &idx));
2346         }
2347       }
2348     }
2349     break;
2350   case 3: /* distributed assembled matrix input (size>1) */
2351     mumps->id.nnz_loc = mumps->nnz;
2352     mumps->id.irn_loc = mumps->irn;
2353     mumps->id.jcn_loc = mumps->jcn;
2354     if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val;
2355     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2356       PetscCall(MatCreateVecs(A, NULL, &b));
2357       PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq));
2358       PetscCall(VecDestroy(&b));
2359     }
2360     break;
2361   }
2362   PetscMUMPS_c(mumps);
2363   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps));
2364 
2365   F->ops->lufactornumeric   = MatFactorNumeric_MUMPS;
2366   F->ops->solve             = MatSolve_MUMPS;
2367   F->ops->solvetranspose    = MatSolveTranspose_MUMPS;
2368   F->ops->matsolve          = MatMatSolve_MUMPS;
2369   F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS;
2370   F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS;
2371 
2372   mumps->matstruc = SAME_NONZERO_PATTERN;
2373   PetscFunctionReturn(PETSC_SUCCESS);
2374 }
2375 
2376 /* Note the PETSc r and c permutations are ignored */
2377 static PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F, Mat A, PETSC_UNUSED IS r, PETSC_UNUSED IS c, const MatFactorInfo *info)
2378 {
2379   Mat_MUMPS     *mumps = (Mat_MUMPS *)F->data;
2380   Vec            b;
2381   const PetscInt M = A->rmap->N;
2382 
2383   PetscFunctionBegin;
2384   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
2385     /* F is assembled by a previous call of MatLUFactorSymbolic_BAIJMUMPS() */
2386     PetscFunctionReturn(PETSC_SUCCESS);
2387   }
2388 
2389   /* Set MUMPS options from the options database */
2390   PetscCall(MatSetFromOptions_MUMPS(F, A));
2391 
2392   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
2393   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps));
2394 
2395   /* analysis phase */
2396   mumps->id.job = JOB_FACTSYMBOLIC;
2397   PetscCall(PetscMUMPSIntCast(M, &mumps->id.n));
2398   switch (mumps->id.ICNTL(18)) {
2399   case 0: /* centralized assembled matrix input */
2400     if (!mumps->myid) {
2401       mumps->id.nnz = mumps->nnz;
2402       mumps->id.irn = mumps->irn;
2403       mumps->id.jcn = mumps->jcn;
2404       if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val;
2405     }
2406     break;
2407   case 3: /* distributed assembled matrix input (size>1) */
2408     mumps->id.nnz_loc = mumps->nnz;
2409     mumps->id.irn_loc = mumps->irn;
2410     mumps->id.jcn_loc = mumps->jcn;
2411     if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val;
2412     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2413       PetscCall(MatCreateVecs(A, NULL, &b));
2414       PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq));
2415       PetscCall(VecDestroy(&b));
2416     }
2417     break;
2418   }
2419   PetscMUMPS_c(mumps);
2420   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps));
2421 
2422   F->ops->lufactornumeric   = MatFactorNumeric_MUMPS;
2423   F->ops->solve             = MatSolve_MUMPS;
2424   F->ops->solvetranspose    = MatSolveTranspose_MUMPS;
2425   F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS;
2426 
2427   mumps->matstruc = SAME_NONZERO_PATTERN;
2428   PetscFunctionReturn(PETSC_SUCCESS);
2429 }
2430 
2431 /* Note the PETSc r permutation and factor info are ignored */
2432 static PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F, Mat A, PETSC_UNUSED IS r, const MatFactorInfo *info)
2433 {
2434   Mat_MUMPS     *mumps = (Mat_MUMPS *)F->data;
2435   Vec            b;
2436   const PetscInt M = A->rmap->N;
2437 
2438   PetscFunctionBegin;
2439   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
2440     /* F is assembled by a previous call of MatCholeskyFactorSymbolic_MUMPS() */
2441     PetscFunctionReturn(PETSC_SUCCESS);
2442   }
2443 
2444   /* Set MUMPS options from the options database */
2445   PetscCall(MatSetFromOptions_MUMPS(F, A));
2446 
2447   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
2448   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps));
2449 
2450   /* analysis phase */
2451   mumps->id.job = JOB_FACTSYMBOLIC;
2452   PetscCall(PetscMUMPSIntCast(M, &mumps->id.n));
2453   switch (mumps->id.ICNTL(18)) {
2454   case 0: /* centralized assembled matrix input */
2455     if (!mumps->myid) {
2456       mumps->id.nnz = mumps->nnz;
2457       mumps->id.irn = mumps->irn;
2458       mumps->id.jcn = mumps->jcn;
2459       if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val;
2460     }
2461     break;
2462   case 3: /* distributed assembled matrix input (size>1) */
2463     mumps->id.nnz_loc = mumps->nnz;
2464     mumps->id.irn_loc = mumps->irn;
2465     mumps->id.jcn_loc = mumps->jcn;
2466     if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val;
2467     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2468       PetscCall(MatCreateVecs(A, NULL, &b));
2469       PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq));
2470       PetscCall(VecDestroy(&b));
2471     }
2472     break;
2473   }
2474   PetscMUMPS_c(mumps);
2475   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps));
2476 
2477   F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS;
2478   F->ops->solve                 = MatSolve_MUMPS;
2479   F->ops->solvetranspose        = MatSolve_MUMPS;
2480   F->ops->matsolve              = MatMatSolve_MUMPS;
2481   F->ops->mattransposesolve     = MatMatTransposeSolve_MUMPS;
2482   F->ops->matsolvetranspose     = MatMatSolveTranspose_MUMPS;
2483 #if defined(PETSC_USE_COMPLEX)
2484   F->ops->getinertia = NULL;
2485 #else
2486   F->ops->getinertia = MatGetInertia_SBAIJMUMPS;
2487 #endif
2488 
2489   mumps->matstruc = SAME_NONZERO_PATTERN;
2490   PetscFunctionReturn(PETSC_SUCCESS);
2491 }
2492 
2493 static PetscErrorCode MatView_MUMPS(Mat A, PetscViewer viewer)
2494 {
2495   PetscBool         iascii;
2496   PetscViewerFormat format;
2497   Mat_MUMPS        *mumps = (Mat_MUMPS *)A->data;
2498 
2499   PetscFunctionBegin;
2500   /* check if matrix is mumps type */
2501   if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(PETSC_SUCCESS);
2502 
2503   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
2504   if (iascii) {
2505     PetscCall(PetscViewerGetFormat(viewer, &format));
2506     if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
2507       PetscCall(PetscViewerASCIIPrintf(viewer, "MUMPS run parameters:\n"));
2508       if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
2509         PetscCall(PetscViewerASCIIPrintf(viewer, "  SYM (matrix type):                   %d\n", mumps->id.sym));
2510         PetscCall(PetscViewerASCIIPrintf(viewer, "  PAR (host participation):            %d\n", mumps->id.par));
2511         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(1) (output for error):         %d\n", mumps->id.ICNTL(1)));
2512         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(2) (output of diagnostic msg): %d\n", mumps->id.ICNTL(2)));
2513         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(3) (output for global info):   %d\n", mumps->id.ICNTL(3)));
2514         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(4) (level of printing):        %d\n", mumps->id.ICNTL(4)));
2515         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(5) (input mat struct):         %d\n", mumps->id.ICNTL(5)));
2516         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(6) (matrix prescaling):        %d\n", mumps->id.ICNTL(6)));
2517         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(7) (sequential matrix ordering):%d\n", mumps->id.ICNTL(7)));
2518         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(8) (scaling strategy):         %d\n", mumps->id.ICNTL(8)));
2519         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(10) (max num of refinements):  %d\n", mumps->id.ICNTL(10)));
2520         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(11) (error analysis):          %d\n", mumps->id.ICNTL(11)));
2521         if (mumps->id.ICNTL(11) > 0) {
2522           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(4) (inf norm of input mat):        %g\n", (double)mumps->id.RINFOG(4)));
2523           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(5) (inf norm of solution):         %g\n", (double)mumps->id.RINFOG(5)));
2524           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(6) (inf norm of residual):         %g\n", (double)mumps->id.RINFOG(6)));
2525           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(7),RINFOG(8) (backward error est): %g, %g\n", (double)mumps->id.RINFOG(7), (double)mumps->id.RINFOG(8)));
2526           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(9) (error estimate):               %g\n", (double)mumps->id.RINFOG(9)));
2527           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n", (double)mumps->id.RINFOG(10), (double)mumps->id.RINFOG(11)));
2528         }
2529         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(12) (efficiency control):                         %d\n", mumps->id.ICNTL(12)));
2530         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(13) (sequential factorization of the root node):  %d\n", mumps->id.ICNTL(13)));
2531         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(14) (percentage of estimated workspace increase): %d\n", mumps->id.ICNTL(14)));
2532         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(15) (compression of the input matrix):            %d\n", mumps->id.ICNTL(15)));
2533         /* ICNTL(15-17) not used */
2534         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(18) (input mat struct):                           %d\n", mumps->id.ICNTL(18)));
2535         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(19) (Schur complement info):                      %d\n", mumps->id.ICNTL(19)));
2536         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(20) (RHS sparse pattern):                         %d\n", mumps->id.ICNTL(20)));
2537         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(21) (solution struct):                            %d\n", mumps->id.ICNTL(21)));
2538         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(22) (in-core/out-of-core facility):               %d\n", mumps->id.ICNTL(22)));
2539         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(23) (max size of memory can be allocated locally):%d\n", mumps->id.ICNTL(23)));
2540 
2541         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(24) (detection of null pivot rows):               %d\n", mumps->id.ICNTL(24)));
2542         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(25) (computation of a null space basis):          %d\n", mumps->id.ICNTL(25)));
2543         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(26) (Schur options for RHS or solution):          %d\n", mumps->id.ICNTL(26)));
2544         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(27) (blocking size for multiple RHS):             %d\n", mumps->id.ICNTL(27)));
2545         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(28) (use parallel or sequential ordering):        %d\n", mumps->id.ICNTL(28)));
2546         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(29) (parallel ordering):                          %d\n", mumps->id.ICNTL(29)));
2547 
2548         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(30) (user-specified set of entries in inv(A)):    %d\n", mumps->id.ICNTL(30)));
2549         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(31) (factors is discarded in the solve phase):    %d\n", mumps->id.ICNTL(31)));
2550         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(33) (compute determinant):                        %d\n", mumps->id.ICNTL(33)));
2551         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(35) (activate BLR based factorization):           %d\n", mumps->id.ICNTL(35)));
2552         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(36) (choice of BLR factorization variant):        %d\n", mumps->id.ICNTL(36)));
2553         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(37) (compression of the contribution blocks):     %d\n", mumps->id.ICNTL(37)));
2554         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(38) (estimated compression rate of LU factors):   %d\n", mumps->id.ICNTL(38)));
2555         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(48) (multithreading with tree parallelism):       %d\n", mumps->id.ICNTL(48)));
2556         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(58) (options for symbolic factorization):         %d\n", mumps->id.ICNTL(58)));
2557 
2558         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(1) (relative pivoting threshold):      %g\n", (double)mumps->id.CNTL(1)));
2559         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(2) (stopping criterion of refinement): %g\n", (double)mumps->id.CNTL(2)));
2560         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(3) (absolute pivoting threshold):      %g\n", (double)mumps->id.CNTL(3)));
2561         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(4) (value of static pivoting):         %g\n", (double)mumps->id.CNTL(4)));
2562         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(5) (fixation for null pivots):         %g\n", (double)mumps->id.CNTL(5)));
2563         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(7) (dropping parameter for BLR):       %g\n", (double)mumps->id.CNTL(7)));
2564 
2565         /* information local to each processor */
2566         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(1) (local estimated flops for the elimination after analysis):\n"));
2567         PetscCall(PetscViewerASCIIPushSynchronized(viewer));
2568         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %g\n", mumps->myid, (double)mumps->id.RINFO(1)));
2569         PetscCall(PetscViewerFlush(viewer));
2570         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(2) (local estimated flops for the assembly after factorization):\n"));
2571         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %g\n", mumps->myid, (double)mumps->id.RINFO(2)));
2572         PetscCall(PetscViewerFlush(viewer));
2573         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(3) (local estimated flops for the elimination after factorization):\n"));
2574         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %g\n", mumps->myid, (double)mumps->id.RINFO(3)));
2575         PetscCall(PetscViewerFlush(viewer));
2576 
2577         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization):\n"));
2578         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(15)));
2579         PetscCall(PetscViewerFlush(viewer));
2580 
2581         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization):\n"));
2582         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(16)));
2583         PetscCall(PetscViewerFlush(viewer));
2584 
2585         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(23) (num of pivots eliminated on this processor after factorization):\n"));
2586         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(23)));
2587         PetscCall(PetscViewerFlush(viewer));
2588 
2589         if (mumps->ninfo && mumps->ninfo <= 80) {
2590           PetscInt i;
2591           for (i = 0; i < mumps->ninfo; i++) {
2592             PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(%" PetscInt_FMT "):\n", mumps->info[i]));
2593             PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(mumps->info[i])));
2594             PetscCall(PetscViewerFlush(viewer));
2595           }
2596         }
2597         PetscCall(PetscViewerASCIIPopSynchronized(viewer));
2598       } else PetscCall(PetscViewerASCIIPrintf(viewer, "  Use -%sksp_view ::ascii_info_detail to display information for all processes\n", ((PetscObject)A)->prefix ? ((PetscObject)A)->prefix : ""));
2599 
2600       if (mumps->myid == 0) { /* information from the host */
2601         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFOG(1) (global estimated flops for the elimination after analysis): %g\n", (double)mumps->id.RINFOG(1)));
2602         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFOG(2) (global estimated flops for the assembly after factorization): %g\n", (double)mumps->id.RINFOG(2)));
2603         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFOG(3) (global estimated flops for the elimination after factorization): %g\n", (double)mumps->id.RINFOG(3)));
2604         PetscCall(PetscViewerASCIIPrintf(viewer, "  (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n", (double)mumps->id.RINFOG(12), (double)mumps->id.RINFOG(13), mumps->id.INFOG(34)));
2605 
2606         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(3)));
2607         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(4)));
2608         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(5) (estimated maximum front size in the complete tree): %d\n", mumps->id.INFOG(5)));
2609         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(6) (number of nodes in the complete tree): %d\n", mumps->id.INFOG(6)));
2610         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(7) (ordering option effectively used after analysis): %d\n", mumps->id.INFOG(7)));
2611         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n", mumps->id.INFOG(8)));
2612         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n", mumps->id.INFOG(9)));
2613         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(10) (total integer space store the matrix factors after factorization): %d\n", mumps->id.INFOG(10)));
2614         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(11) (order of largest frontal matrix after factorization): %d\n", mumps->id.INFOG(11)));
2615         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(12) (number of off-diagonal pivots): %d\n", mumps->id.INFOG(12)));
2616         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(13) (number of delayed pivots after factorization): %d\n", mumps->id.INFOG(13)));
2617         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(14) (number of memory compress after factorization): %d\n", mumps->id.INFOG(14)));
2618         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(15) (number of steps of iterative refinement after solution): %d\n", mumps->id.INFOG(15)));
2619         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n", mumps->id.INFOG(16)));
2620         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n", mumps->id.INFOG(17)));
2621         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n", mumps->id.INFOG(18)));
2622         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n", mumps->id.INFOG(19)));
2623         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(20) (estimated number of entries in the factors): %d\n", mumps->id.INFOG(20)));
2624         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n", mumps->id.INFOG(21)));
2625         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n", mumps->id.INFOG(22)));
2626         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n", mumps->id.INFOG(23)));
2627         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n", mumps->id.INFOG(24)));
2628         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n", mumps->id.INFOG(25)));
2629         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(28) (after factorization: number of null pivots encountered): %d\n", mumps->id.INFOG(28)));
2630         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n", mumps->id.INFOG(29)));
2631         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n", mumps->id.INFOG(30), mumps->id.INFOG(31)));
2632         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(32) (after analysis: type of analysis done): %d\n", mumps->id.INFOG(32)));
2633         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(33) (value used for ICNTL(8)): %d\n", mumps->id.INFOG(33)));
2634         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(34) (exponent of the determinant if determinant is requested): %d\n", mumps->id.INFOG(34)));
2635         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n", mumps->id.INFOG(35)));
2636         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(36)));
2637         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n", mumps->id.INFOG(37)));
2638         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(38)));
2639         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n", mumps->id.INFOG(39)));
2640       }
2641     }
2642   }
2643   PetscFunctionReturn(PETSC_SUCCESS);
2644 }
2645 
2646 static PetscErrorCode MatGetInfo_MUMPS(Mat A, PETSC_UNUSED MatInfoType flag, MatInfo *info)
2647 {
2648   Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;
2649 
2650   PetscFunctionBegin;
2651   info->block_size        = 1.0;
2652   info->nz_allocated      = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20);
2653   info->nz_used           = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20);
2654   info->nz_unneeded       = 0.0;
2655   info->assemblies        = 0.0;
2656   info->mallocs           = 0.0;
2657   info->memory            = 0.0;
2658   info->fill_ratio_given  = 0;
2659   info->fill_ratio_needed = 0;
2660   info->factor_mallocs    = 0;
2661   PetscFunctionReturn(PETSC_SUCCESS);
2662 }
2663 
2664 static PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is)
2665 {
2666   Mat_MUMPS         *mumps = (Mat_MUMPS *)F->data;
2667   const PetscScalar *arr;
2668   const PetscInt    *idxs;
2669   PetscInt           size, i;
2670 
2671   PetscFunctionBegin;
2672   PetscCall(ISGetLocalSize(is, &size));
2673   /* Schur complement matrix */
2674   PetscCall(MatDestroy(&F->schur));
2675   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size, size, NULL, &F->schur));
2676   PetscCall(MatDenseGetArrayRead(F->schur, &arr));
2677   mumps->id.schur = (MumpsScalar *)arr;
2678   PetscCall(PetscMUMPSIntCast(size, &mumps->id.size_schur));
2679   PetscCall(PetscMUMPSIntCast(size, &mumps->id.schur_lld));
2680   PetscCall(MatDenseRestoreArrayRead(F->schur, &arr));
2681   if (mumps->sym == 1) PetscCall(MatSetOption(F->schur, MAT_SPD, PETSC_TRUE));
2682 
2683   /* MUMPS expects Fortran style indices */
2684   PetscCall(PetscFree(mumps->id.listvar_schur));
2685   PetscCall(PetscMalloc1(size, &mumps->id.listvar_schur));
2686   PetscCall(ISGetIndices(is, &idxs));
2687   for (i = 0; i < size; i++) PetscCall(PetscMUMPSIntCast(idxs[i] + 1, &mumps->id.listvar_schur[i]));
2688   PetscCall(ISRestoreIndices(is, &idxs));
2689   /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */
2690   mumps->id.ICNTL(26) = -1;
2691   PetscFunctionReturn(PETSC_SUCCESS);
2692 }
2693 
2694 static PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F, Mat *S)
2695 {
2696   Mat          St;
2697   Mat_MUMPS   *mumps = (Mat_MUMPS *)F->data;
2698   PetscScalar *array;
2699 
2700   PetscFunctionBegin;
2701   PetscCheck(mumps->id.ICNTL(19), PetscObjectComm((PetscObject)F), PETSC_ERR_ORDER, "Schur complement mode not selected! Call MatFactorSetSchurIS() to enable it");
2702   PetscCall(MatCreate(PETSC_COMM_SELF, &St));
2703   PetscCall(MatSetSizes(St, PETSC_DECIDE, PETSC_DECIDE, mumps->id.size_schur, mumps->id.size_schur));
2704   PetscCall(MatSetType(St, MATDENSE));
2705   PetscCall(MatSetUp(St));
2706   PetscCall(MatDenseGetArray(St, &array));
2707   if (!mumps->sym) {                /* MUMPS always return a full matrix */
2708     if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
2709       PetscInt i, j, N = mumps->id.size_schur;
2710       for (i = 0; i < N; i++) {
2711         for (j = 0; j < N; j++) {
2712 #if !defined(PETSC_USE_COMPLEX)
2713           PetscScalar val = mumps->id.schur[i * N + j];
2714 #else
2715           PetscScalar val = mumps->id.schur[i * N + j].r + PETSC_i * mumps->id.schur[i * N + j].i;
2716 #endif
2717           array[j * N + i] = val;
2718         }
2719       }
2720     } else { /* stored by columns */
2721       PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur));
2722     }
2723   } else {                          /* either full or lower-triangular (not packed) */
2724     if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */
2725       PetscInt i, j, N = mumps->id.size_schur;
2726       for (i = 0; i < N; i++) {
2727         for (j = i; j < N; j++) {
2728 #if !defined(PETSC_USE_COMPLEX)
2729           PetscScalar val = mumps->id.schur[i * N + j];
2730 #else
2731           PetscScalar val = mumps->id.schur[i * N + j].r + PETSC_i * mumps->id.schur[i * N + j].i;
2732 #endif
2733           array[i * N + j] = array[j * N + i] = val;
2734         }
2735       }
2736     } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */
2737       PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur));
2738     } else { /* ICNTL(19) == 1 lower triangular stored by rows */
2739       PetscInt i, j, N = mumps->id.size_schur;
2740       for (i = 0; i < N; i++) {
2741         for (j = 0; j < i + 1; j++) {
2742 #if !defined(PETSC_USE_COMPLEX)
2743           PetscScalar val = mumps->id.schur[i * N + j];
2744 #else
2745           PetscScalar val = mumps->id.schur[i * N + j].r + PETSC_i * mumps->id.schur[i * N + j].i;
2746 #endif
2747           array[i * N + j] = array[j * N + i] = val;
2748         }
2749       }
2750     }
2751   }
2752   PetscCall(MatDenseRestoreArray(St, &array));
2753   *S = St;
2754   PetscFunctionReturn(PETSC_SUCCESS);
2755 }
2756 
2757 static PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt ival)
2758 {
2759   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2760 
2761   PetscFunctionBegin;
2762   if (mumps->id.job == JOB_NULL) {                                            /* need to cache icntl and ival since PetscMUMPS_c() has never been called */
2763     PetscMUMPSInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; /* number of already cached ICNTL */
2764     for (i = 0; i < nICNTL_pre; ++i)
2765       if (mumps->ICNTL_pre[1 + 2 * i] == icntl) break; /* is this ICNTL already cached? */
2766     if (i == nICNTL_pre) {                             /* not already cached */
2767       if (i > 0) PetscCall(PetscRealloc(sizeof(PetscMUMPSInt) * (2 * nICNTL_pre + 3), &mumps->ICNTL_pre));
2768       else PetscCall(PetscCalloc(sizeof(PetscMUMPSInt) * 3, &mumps->ICNTL_pre));
2769       mumps->ICNTL_pre[0]++;
2770     }
2771     mumps->ICNTL_pre[1 + 2 * i] = (PetscMUMPSInt)icntl;
2772     PetscCall(PetscMUMPSIntCast(ival, mumps->ICNTL_pre + 2 + 2 * i));
2773   } else PetscCall(PetscMUMPSIntCast(ival, &mumps->id.ICNTL(icntl)));
2774   PetscFunctionReturn(PETSC_SUCCESS);
2775 }
2776 
2777 static PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt *ival)
2778 {
2779   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2780 
2781   PetscFunctionBegin;
2782   if (mumps->id.job == JOB_NULL) {
2783     PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0;
2784     *ival = 0;
2785     for (i = 0; i < nICNTL_pre; ++i) {
2786       if (mumps->ICNTL_pre[1 + 2 * i] == icntl) *ival = mumps->ICNTL_pre[2 + 2 * i];
2787     }
2788   } else *ival = mumps->id.ICNTL(icntl);
2789   PetscFunctionReturn(PETSC_SUCCESS);
2790 }
2791 
2792 /*@
2793   MatMumpsSetIcntl - Set MUMPS parameter ICNTL() <https://mumps-solver.org/index.php?page=doc>
2794 
2795   Logically Collective
2796 
2797   Input Parameters:
2798 + F     - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
2799 . icntl - index of MUMPS parameter array `ICNTL()`
2800 - ival  - value of MUMPS `ICNTL(icntl)`
2801 
2802   Options Database Key:
2803 . -mat_mumps_icntl_<icntl> <ival> - change the option numbered `icntl` to `ival`
2804 
2805   Level: beginner
2806 
2807   Note:
2808   Ignored if MUMPS is not installed or `F` is not a MUMPS matrix
2809 
2810 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2811 @*/
2812 PetscErrorCode MatMumpsSetIcntl(Mat F, PetscInt icntl, PetscInt ival)
2813 {
2814   PetscFunctionBegin;
2815   PetscValidType(F, 1);
2816   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2817   PetscValidLogicalCollectiveInt(F, icntl, 2);
2818   PetscValidLogicalCollectiveInt(F, ival, 3);
2819   PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 48 || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl);
2820   PetscTryMethod(F, "MatMumpsSetIcntl_C", (Mat, PetscInt, PetscInt), (F, icntl, ival));
2821   PetscFunctionReturn(PETSC_SUCCESS);
2822 }
2823 
2824 /*@
2825   MatMumpsGetIcntl - Get MUMPS parameter ICNTL() <https://mumps-solver.org/index.php?page=doc>
2826 
2827   Logically Collective
2828 
2829   Input Parameters:
2830 + F     - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
2831 - icntl - index of MUMPS parameter array ICNTL()
2832 
2833   Output Parameter:
2834 . ival - value of MUMPS ICNTL(icntl)
2835 
2836   Level: beginner
2837 
2838 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2839 @*/
2840 PetscErrorCode MatMumpsGetIcntl(Mat F, PetscInt icntl, PetscInt *ival)
2841 {
2842   PetscFunctionBegin;
2843   PetscValidType(F, 1);
2844   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2845   PetscValidLogicalCollectiveInt(F, icntl, 2);
2846   PetscAssertPointer(ival, 3);
2847   PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 48 || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl);
2848   PetscUseMethod(F, "MatMumpsGetIcntl_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival));
2849   PetscFunctionReturn(PETSC_SUCCESS);
2850 }
2851 
2852 static PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal val)
2853 {
2854   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2855 
2856   PetscFunctionBegin;
2857   if (mumps->id.job == JOB_NULL) {
2858     PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0;
2859     for (i = 0; i < nCNTL_pre; ++i)
2860       if (mumps->CNTL_pre[1 + 2 * i] == icntl) break;
2861     if (i == nCNTL_pre) {
2862       if (i > 0) PetscCall(PetscRealloc(sizeof(PetscReal) * (2 * nCNTL_pre + 3), &mumps->CNTL_pre));
2863       else PetscCall(PetscCalloc(sizeof(PetscReal) * 3, &mumps->CNTL_pre));
2864       mumps->CNTL_pre[0]++;
2865     }
2866     mumps->CNTL_pre[1 + 2 * i] = icntl;
2867     mumps->CNTL_pre[2 + 2 * i] = val;
2868   } else mumps->id.CNTL(icntl) = val;
2869   PetscFunctionReturn(PETSC_SUCCESS);
2870 }
2871 
2872 static PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal *val)
2873 {
2874   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2875 
2876   PetscFunctionBegin;
2877   if (mumps->id.job == JOB_NULL) {
2878     PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0;
2879     *val = 0.0;
2880     for (i = 0; i < nCNTL_pre; ++i) {
2881       if (mumps->CNTL_pre[1 + 2 * i] == icntl) *val = mumps->CNTL_pre[2 + 2 * i];
2882     }
2883   } else *val = mumps->id.CNTL(icntl);
2884   PetscFunctionReturn(PETSC_SUCCESS);
2885 }
2886 
2887 /*@
2888   MatMumpsSetCntl - Set MUMPS parameter CNTL() <https://mumps-solver.org/index.php?page=doc>
2889 
2890   Logically Collective
2891 
2892   Input Parameters:
2893 + F     - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
2894 . icntl - index of MUMPS parameter array `CNTL()`
2895 - val   - value of MUMPS `CNTL(icntl)`
2896 
2897   Options Database Key:
2898 . -mat_mumps_cntl_<icntl> <val> - change the option numbered icntl to ival
2899 
2900   Level: beginner
2901 
2902   Note:
2903   Ignored if MUMPS is not installed or `F` is not a MUMPS matrix
2904 
2905 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2906 @*/
2907 PetscErrorCode MatMumpsSetCntl(Mat F, PetscInt icntl, PetscReal val)
2908 {
2909   PetscFunctionBegin;
2910   PetscValidType(F, 1);
2911   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2912   PetscValidLogicalCollectiveInt(F, icntl, 2);
2913   PetscValidLogicalCollectiveReal(F, val, 3);
2914   PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl);
2915   PetscTryMethod(F, "MatMumpsSetCntl_C", (Mat, PetscInt, PetscReal), (F, icntl, val));
2916   PetscFunctionReturn(PETSC_SUCCESS);
2917 }
2918 
2919 /*@
2920   MatMumpsGetCntl - Get MUMPS parameter CNTL() <https://mumps-solver.org/index.php?page=doc>
2921 
2922   Logically Collective
2923 
2924   Input Parameters:
2925 + F     - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
2926 - icntl - index of MUMPS parameter array CNTL()
2927 
2928   Output Parameter:
2929 . val - value of MUMPS CNTL(icntl)
2930 
2931   Level: beginner
2932 
2933 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2934 @*/
2935 PetscErrorCode MatMumpsGetCntl(Mat F, PetscInt icntl, PetscReal *val)
2936 {
2937   PetscFunctionBegin;
2938   PetscValidType(F, 1);
2939   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2940   PetscValidLogicalCollectiveInt(F, icntl, 2);
2941   PetscAssertPointer(val, 3);
2942   PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl);
2943   PetscUseMethod(F, "MatMumpsGetCntl_C", (Mat, PetscInt, PetscReal *), (F, icntl, val));
2944   PetscFunctionReturn(PETSC_SUCCESS);
2945 }
2946 
2947 static PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F, PetscInt icntl, PetscInt *info)
2948 {
2949   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2950 
2951   PetscFunctionBegin;
2952   *info = mumps->id.INFO(icntl);
2953   PetscFunctionReturn(PETSC_SUCCESS);
2954 }
2955 
2956 static PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F, PetscInt icntl, PetscInt *infog)
2957 {
2958   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2959 
2960   PetscFunctionBegin;
2961   *infog = mumps->id.INFOG(icntl);
2962   PetscFunctionReturn(PETSC_SUCCESS);
2963 }
2964 
2965 static PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfo)
2966 {
2967   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2968 
2969   PetscFunctionBegin;
2970   *rinfo = mumps->id.RINFO(icntl);
2971   PetscFunctionReturn(PETSC_SUCCESS);
2972 }
2973 
2974 static PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfog)
2975 {
2976   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2977 
2978   PetscFunctionBegin;
2979   *rinfog = mumps->id.RINFOG(icntl);
2980   PetscFunctionReturn(PETSC_SUCCESS);
2981 }
2982 
2983 static PetscErrorCode MatMumpsGetNullPivots_MUMPS(Mat F, PetscInt *size, PetscInt **array)
2984 {
2985   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2986 
2987   PetscFunctionBegin;
2988   PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection");
2989   *size  = 0;
2990   *array = NULL;
2991   if (!mumps->myid) {
2992     *size = mumps->id.INFOG(28);
2993     PetscCall(PetscMalloc1(*size, array));
2994     for (int i = 0; i < *size; i++) (*array)[i] = mumps->id.pivnul_list[i] - 1;
2995   }
2996   PetscFunctionReturn(PETSC_SUCCESS);
2997 }
2998 
2999 static PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F, Mat spRHS)
3000 {
3001   Mat          Bt = NULL, Btseq = NULL;
3002   PetscBool    flg;
3003   Mat_MUMPS   *mumps = (Mat_MUMPS *)F->data;
3004   PetscScalar *aa;
3005   PetscInt     spnr, *ia, *ja, M, nrhs;
3006 
3007   PetscFunctionBegin;
3008   PetscAssertPointer(spRHS, 2);
3009   PetscCall(PetscObjectTypeCompare((PetscObject)spRHS, MATTRANSPOSEVIRTUAL, &flg));
3010   if (flg) {
3011     PetscCall(MatTransposeGetMat(spRHS, &Bt));
3012   } else SETERRQ(PetscObjectComm((PetscObject)spRHS), PETSC_ERR_ARG_WRONG, "Matrix spRHS must be type MATTRANSPOSEVIRTUAL matrix");
3013 
3014   PetscCall(MatMumpsSetIcntl(F, 30, 1));
3015 
3016   if (mumps->petsc_size > 1) {
3017     Mat_MPIAIJ *b = (Mat_MPIAIJ *)Bt->data;
3018     Btseq         = b->A;
3019   } else {
3020     Btseq = Bt;
3021   }
3022 
3023   PetscCall(MatGetSize(spRHS, &M, &nrhs));
3024   mumps->id.nrhs = (PetscMUMPSInt)nrhs;
3025   PetscCall(PetscMUMPSIntCast(M, &mumps->id.lrhs));
3026   mumps->id.rhs = NULL;
3027 
3028   if (!mumps->myid) {
3029     PetscCall(MatSeqAIJGetArray(Btseq, &aa));
3030     PetscCall(MatGetRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
3031     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
3032     PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs));
3033     mumps->id.rhs_sparse = (MumpsScalar *)aa;
3034   } else {
3035     mumps->id.irhs_ptr    = NULL;
3036     mumps->id.irhs_sparse = NULL;
3037     mumps->id.nz_rhs      = 0;
3038     mumps->id.rhs_sparse  = NULL;
3039   }
3040   mumps->id.ICNTL(20) = 1; /* rhs is sparse */
3041   mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */
3042 
3043   /* solve phase */
3044   mumps->id.job = JOB_SOLVE;
3045   PetscMUMPS_c(mumps);
3046   PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2));
3047 
3048   if (!mumps->myid) {
3049     PetscCall(MatSeqAIJRestoreArray(Btseq, &aa));
3050     PetscCall(MatRestoreRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
3051     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
3052   }
3053   PetscFunctionReturn(PETSC_SUCCESS);
3054 }
3055 
3056 /*@
3057   MatMumpsGetInverse - Get user-specified set of entries in inverse of `A` <https://mumps-solver.org/index.php?page=doc>
3058 
3059   Logically Collective
3060 
3061   Input Parameter:
3062 . F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3063 
3064   Output Parameter:
3065 . spRHS - sequential sparse matrix in `MATTRANSPOSEVIRTUAL` format with requested entries of inverse of `A`
3066 
3067   Level: beginner
3068 
3069 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()`
3070 @*/
3071 PetscErrorCode MatMumpsGetInverse(Mat F, Mat spRHS)
3072 {
3073   PetscFunctionBegin;
3074   PetscValidType(F, 1);
3075   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3076   PetscUseMethod(F, "MatMumpsGetInverse_C", (Mat, Mat), (F, spRHS));
3077   PetscFunctionReturn(PETSC_SUCCESS);
3078 }
3079 
3080 static PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F, Mat spRHST)
3081 {
3082   Mat spRHS;
3083 
3084   PetscFunctionBegin;
3085   PetscCall(MatCreateTranspose(spRHST, &spRHS));
3086   PetscCall(MatMumpsGetInverse_MUMPS(F, spRHS));
3087   PetscCall(MatDestroy(&spRHS));
3088   PetscFunctionReturn(PETSC_SUCCESS);
3089 }
3090 
3091 /*@
3092   MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix $A^T $ <https://mumps-solver.org/index.php?page=doc>
3093 
3094   Logically Collective
3095 
3096   Input Parameter:
3097 . F - the factored matrix of A obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3098 
3099   Output Parameter:
3100 . spRHST - sequential sparse matrix in `MATAIJ` format containing the requested entries of inverse of `A`^T
3101 
3102   Level: beginner
3103 
3104 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()`, `MatMumpsGetInverse()`
3105 @*/
3106 PetscErrorCode MatMumpsGetInverseTranspose(Mat F, Mat spRHST)
3107 {
3108   PetscBool flg;
3109 
3110   PetscFunctionBegin;
3111   PetscValidType(F, 1);
3112   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3113   PetscCall(PetscObjectTypeCompareAny((PetscObject)spRHST, &flg, MATSEQAIJ, MATMPIAIJ, NULL));
3114   PetscCheck(flg, PetscObjectComm((PetscObject)spRHST), PETSC_ERR_ARG_WRONG, "Matrix spRHST must be MATAIJ matrix");
3115 
3116   PetscUseMethod(F, "MatMumpsGetInverseTranspose_C", (Mat, Mat), (F, spRHST));
3117   PetscFunctionReturn(PETSC_SUCCESS);
3118 }
3119 
3120 /*@
3121   MatMumpsGetInfo - Get MUMPS parameter INFO() <https://mumps-solver.org/index.php?page=doc>
3122 
3123   Logically Collective
3124 
3125   Input Parameters:
3126 + F     - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3127 - icntl - index of MUMPS parameter array INFO()
3128 
3129   Output Parameter:
3130 . ival - value of MUMPS INFO(icntl)
3131 
3132   Level: beginner
3133 
3134 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
3135 @*/
3136 PetscErrorCode MatMumpsGetInfo(Mat F, PetscInt icntl, PetscInt *ival)
3137 {
3138   PetscFunctionBegin;
3139   PetscValidType(F, 1);
3140   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3141   PetscAssertPointer(ival, 3);
3142   PetscUseMethod(F, "MatMumpsGetInfo_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival));
3143   PetscFunctionReturn(PETSC_SUCCESS);
3144 }
3145 
3146 /*@
3147   MatMumpsGetInfog - Get MUMPS parameter INFOG() <https://mumps-solver.org/index.php?page=doc>
3148 
3149   Logically Collective
3150 
3151   Input Parameters:
3152 + F     - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3153 - icntl - index of MUMPS parameter array INFOG()
3154 
3155   Output Parameter:
3156 . ival - value of MUMPS INFOG(icntl)
3157 
3158   Level: beginner
3159 
3160 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
3161 @*/
3162 PetscErrorCode MatMumpsGetInfog(Mat F, PetscInt icntl, PetscInt *ival)
3163 {
3164   PetscFunctionBegin;
3165   PetscValidType(F, 1);
3166   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3167   PetscAssertPointer(ival, 3);
3168   PetscUseMethod(F, "MatMumpsGetInfog_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival));
3169   PetscFunctionReturn(PETSC_SUCCESS);
3170 }
3171 
3172 /*@
3173   MatMumpsGetRinfo - Get MUMPS parameter RINFO() <https://mumps-solver.org/index.php?page=doc>
3174 
3175   Logically Collective
3176 
3177   Input Parameters:
3178 + F     - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3179 - icntl - index of MUMPS parameter array RINFO()
3180 
3181   Output Parameter:
3182 . val - value of MUMPS RINFO(icntl)
3183 
3184   Level: beginner
3185 
3186 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfog()`
3187 @*/
3188 PetscErrorCode MatMumpsGetRinfo(Mat F, PetscInt icntl, PetscReal *val)
3189 {
3190   PetscFunctionBegin;
3191   PetscValidType(F, 1);
3192   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3193   PetscAssertPointer(val, 3);
3194   PetscUseMethod(F, "MatMumpsGetRinfo_C", (Mat, PetscInt, PetscReal *), (F, icntl, val));
3195   PetscFunctionReturn(PETSC_SUCCESS);
3196 }
3197 
3198 /*@
3199   MatMumpsGetRinfog - Get MUMPS parameter RINFOG() <https://mumps-solver.org/index.php?page=doc>
3200 
3201   Logically Collective
3202 
3203   Input Parameters:
3204 + F     - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3205 - icntl - index of MUMPS parameter array RINFOG()
3206 
3207   Output Parameter:
3208 . val - value of MUMPS RINFOG(icntl)
3209 
3210   Level: beginner
3211 
3212 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`
3213 @*/
3214 PetscErrorCode MatMumpsGetRinfog(Mat F, PetscInt icntl, PetscReal *val)
3215 {
3216   PetscFunctionBegin;
3217   PetscValidType(F, 1);
3218   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3219   PetscAssertPointer(val, 3);
3220   PetscUseMethod(F, "MatMumpsGetRinfog_C", (Mat, PetscInt, PetscReal *), (F, icntl, val));
3221   PetscFunctionReturn(PETSC_SUCCESS);
3222 }
3223 
3224 /*@
3225   MatMumpsGetNullPivots - Get MUMPS parameter PIVNUL_LIST() <https://mumps-solver.org/index.php?page=doc>
3226 
3227   Logically Collective
3228 
3229   Input Parameter:
3230 . F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3231 
3232   Output Parameters:
3233 + size  - local size of the array. The size of the array is non-zero only on MPI rank 0
3234 - array - array of rows with null pivot, these rows follow 0-based indexing. The array gets allocated within the function and the user is responsible
3235           for freeing this array.
3236 
3237   Level: beginner
3238 
3239 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`
3240 @*/
3241 PetscErrorCode MatMumpsGetNullPivots(Mat F, PetscInt *size, PetscInt **array)
3242 {
3243   PetscFunctionBegin;
3244   PetscValidType(F, 1);
3245   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3246   PetscAssertPointer(size, 2);
3247   PetscAssertPointer(array, 3);
3248   PetscUseMethod(F, "MatMumpsGetNullPivots_C", (Mat, PetscInt *, PetscInt **), (F, size, array));
3249   PetscFunctionReturn(PETSC_SUCCESS);
3250 }
3251 
3252 /*MC
3253   MATSOLVERMUMPS -  A matrix type providing direct solvers (LU and Cholesky) for
3254   MPI distributed and sequential matrices via the external package MUMPS <https://mumps-solver.org/index.php?page=doc>
3255 
3256   Works with `MATAIJ` and `MATSBAIJ` matrices
3257 
3258   Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS
3259 
3260   Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode.
3261   See details below.
3262 
3263   Use `-pc_type cholesky` or `lu` `-pc_factor_mat_solver_type mumps` to use this direct solver
3264 
3265   Options Database Keys:
3266 +  -mat_mumps_icntl_1  - ICNTL(1): output stream for error messages
3267 .  -mat_mumps_icntl_2  - ICNTL(2): output stream for diagnostic printing, statistics, and warning
3268 .  -mat_mumps_icntl_3  - ICNTL(3): output stream for global information, collected on the host
3269 .  -mat_mumps_icntl_4  - ICNTL(4): level of printing (0 to 4)
3270 .  -mat_mumps_icntl_6  - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)
3271 .  -mat_mumps_icntl_7  - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto
3272                           Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only)
3273 .  -mat_mumps_icntl_8  - ICNTL(8): scaling strategy (-2 to 8 or 77)
3274 .  -mat_mumps_icntl_10 - ICNTL(10): max num of refinements
3275 .  -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view)
3276 .  -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)
3277 .  -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting
3278 .  -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space
3279 .  -mat_mumps_icntl_15 - ICNTL(15): compression of the input matrix resulting from a block format
3280 .  -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement
3281 .  -mat_mumps_icntl_20 - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS
3282 .  -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)
3283 .  -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor
3284 .  -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1)
3285 .  -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis
3286 .  -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix
3287 .  -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ICNTL(7) ordering, or 2 for parallel analysis and ICNTL(29) ordering
3288 .  -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis
3289 .  -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A)
3290 .  -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization
3291 .  -mat_mumps_icntl_33 - ICNTL(33): compute determinant
3292 .  -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature
3293 .  -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant
3294 .  -mat_mumps_icntl_37 - ICNTL(37): compression of the contribution blocks (CB)
3295 .  -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR
3296 .  -mat_mumps_icntl_48 - ICNTL(48): multithreading with tree parallelism
3297 .  -mat_mumps_icntl_58 - ICNTL(58): options for symbolic factorization
3298 .  -mat_mumps_cntl_1   - CNTL(1): relative pivoting threshold
3299 .  -mat_mumps_cntl_2   - CNTL(2): stopping criterion of refinement
3300 .  -mat_mumps_cntl_3   - CNTL(3): absolute pivoting threshold
3301 .  -mat_mumps_cntl_4   - CNTL(4): value for static pivoting
3302 .  -mat_mumps_cntl_5   - CNTL(5): fixation for null pivots
3303 .  -mat_mumps_cntl_7   - CNTL(7): precision of the dropping parameter used during BLR factorization
3304 -  -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS.
3305                                     Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual.
3306 
3307   Level: beginner
3308 
3309   Notes:
3310   MUMPS Cholesky does not handle (complex) Hermitian matrices (see User's Guide at <https://mumps-solver.org/index.php?page=doc>) so using it will
3311   error if the matrix is Hermitian.
3312 
3313   When used within a `KSP`/`PC` solve the options are prefixed with that of the `PC`. Otherwise one can set the options prefix by calling
3314   `MatSetOptionsPrefixFactor()` on the matrix from which the factor was obtained or `MatSetOptionsPrefix()` on the factor matrix.
3315 
3316   When a MUMPS factorization fails inside a KSP solve, for example with a `KSP_DIVERGED_PC_FAILED`, one can find the MUMPS information about
3317   the failure with
3318 .vb
3319           KSPGetPC(ksp,&pc);
3320           PCFactorGetMatrix(pc,&mat);
3321           MatMumpsGetInfo(mat,....);
3322           MatMumpsGetInfog(mat,....); etc.
3323 .ve
3324   Or run with `-ksp_error_if_not_converged` and the program will be stopped and the information printed in the error message.
3325 
3326   MUMPS provides 64-bit integer support in two build modes:
3327   full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and
3328   requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI).
3329 
3330   selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices,
3331   MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and
3332   columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit
3333   integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS.
3334 
3335   With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc.
3336 
3337   Two modes to run MUMPS/PETSc with OpenMP
3338 .vb
3339    Set `OMP_NUM_THREADS` and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP
3340    threads per rank, then you may use "export `OMP_NUM_THREADS` = 16 && mpirun -n 4 ./test".
3341 .ve
3342 
3343 .vb
3344    `-mat_mumps_use_omp_threads` [m] and run your code with as many MPI ranks as the number of cores. For example,
3345    if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16"
3346 .ve
3347 
3348    To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part
3349    (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with `--with-openmp` `--download-hwloc`
3350    (or `--with-hwloc`), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS
3351    libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS
3352    (PETSc will automatically try to utilized a threaded BLAS if `--with-openmp` is provided).
3353 
3354    If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI
3355    processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of
3356    size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm
3357    are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set
3358    by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs.
3359    In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets,
3360    if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind
3361    MPI ranks to cores, then with `-mat_mumps_use_omp_threads` 16, a master rank (and threads it spawns) will use half cores in socket 0, and half
3362    cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the
3363    problem will not happen. Therefore, when you use `-mat_mumps_use_omp_threads`, you need to keep an eye on your MPI rank mapping and CPU binding.
3364    For example, with the Slurm job scheduler, one can use srun `--cpu-bind`=verbose -m block:block to map consecutive MPI ranks to sockets and
3365    examine the mapping result.
3366 
3367    PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set `OMP_PROC_BIND` and `OMP_PLACES` in job scripts,
3368    for example, export `OMP_PLACES`=threads and export `OMP_PROC_BIND`=spread. One does not need to export `OMP_NUM_THREADS`=m in job scripts as PETSc
3369    calls `omp_set_num_threads`(m) internally before calling MUMPS.
3370 
3371    See {cite}`heroux2011bi` and {cite}`gutierrez2017accommodating`
3372 
3373 .seealso: [](ch_matrices), `Mat`, `PCFactorSetMatSolverType()`, `MatSolverType`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`, `KSPGetPC()`, `PCFactorGetMatrix()`
3374 M*/
3375 
3376 static PetscErrorCode MatFactorGetSolverType_mumps(PETSC_UNUSED Mat A, MatSolverType *type)
3377 {
3378   PetscFunctionBegin;
3379   *type = MATSOLVERMUMPS;
3380   PetscFunctionReturn(PETSC_SUCCESS);
3381 }
3382 
3383 /* MatGetFactor for Seq and MPI AIJ matrices */
3384 static PetscErrorCode MatGetFactor_aij_mumps(Mat A, MatFactorType ftype, Mat *F)
3385 {
3386   Mat         B;
3387   Mat_MUMPS  *mumps;
3388   PetscBool   isSeqAIJ, isDiag, isDense;
3389   PetscMPIInt size;
3390 
3391   PetscFunctionBegin;
3392 #if defined(PETSC_USE_COMPLEX)
3393   if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) {
3394     PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n"));
3395     *F = NULL;
3396     PetscFunctionReturn(PETSC_SUCCESS);
3397   }
3398 #endif
3399   /* Create the factorization matrix */
3400   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isSeqAIJ));
3401   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATDIAGONAL, &isDiag));
3402   PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &isDense, MATSEQDENSE, MATMPIDENSE, NULL));
3403   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
3404   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
3405   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name));
3406   PetscCall(MatSetUp(B));
3407 
3408   PetscCall(PetscNew(&mumps));
3409 
3410   B->ops->view    = MatView_MUMPS;
3411   B->ops->getinfo = MatGetInfo_MUMPS;
3412 
3413   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
3414   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
3415   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
3416   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
3417   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
3418   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
3419   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
3420   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
3421   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
3422   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
3423   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
3424   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
3425   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
3426   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
3427 
3428   if (ftype == MAT_FACTOR_LU) {
3429     B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
3430     B->factortype            = MAT_FACTOR_LU;
3431     if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij;
3432     else if (isDiag) mumps->ConvertToTriples = MatConvertToTriples_diagonal_xaij;
3433     else if (isDense) mumps->ConvertToTriples = MatConvertToTriples_dense_xaij;
3434     else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij;
3435     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]));
3436     mumps->sym = 0;
3437   } else {
3438     B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
3439     B->factortype                  = MAT_FACTOR_CHOLESKY;
3440     if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij;
3441     else if (isDiag) mumps->ConvertToTriples = MatConvertToTriples_diagonal_xaij;
3442     else if (isDense) mumps->ConvertToTriples = MatConvertToTriples_dense_xaij;
3443     else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij;
3444     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY]));
3445 #if defined(PETSC_USE_COMPLEX)
3446     mumps->sym = 2;
3447 #else
3448     if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1;
3449     else mumps->sym = 2;
3450 #endif
3451   }
3452 
3453   /* set solvertype */
3454   PetscCall(PetscFree(B->solvertype));
3455   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
3456   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
3457   if (size == 1) {
3458     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3459     B->canuseordering = PETSC_TRUE;
3460   }
3461   B->ops->destroy = MatDestroy_MUMPS;
3462   B->data         = (void *)mumps;
3463 
3464   *F               = B;
3465   mumps->id.job    = JOB_NULL;
3466   mumps->ICNTL_pre = NULL;
3467   mumps->CNTL_pre  = NULL;
3468   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
3469   PetscFunctionReturn(PETSC_SUCCESS);
3470 }
3471 
3472 /* MatGetFactor for Seq and MPI SBAIJ matrices */
3473 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A, PETSC_UNUSED MatFactorType ftype, Mat *F)
3474 {
3475   Mat         B;
3476   Mat_MUMPS  *mumps;
3477   PetscBool   isSeqSBAIJ;
3478   PetscMPIInt size;
3479 
3480   PetscFunctionBegin;
3481 #if defined(PETSC_USE_COMPLEX)
3482   if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) {
3483     PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n"));
3484     *F = NULL;
3485     PetscFunctionReturn(PETSC_SUCCESS);
3486   }
3487 #endif
3488   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
3489   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
3490   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name));
3491   PetscCall(MatSetUp(B));
3492 
3493   PetscCall(PetscNew(&mumps));
3494   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &isSeqSBAIJ));
3495   if (isSeqSBAIJ) {
3496     mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij;
3497   } else {
3498     mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij;
3499   }
3500 
3501   B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
3502   B->ops->view                   = MatView_MUMPS;
3503   B->ops->getinfo                = MatGetInfo_MUMPS;
3504 
3505   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
3506   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
3507   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
3508   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
3509   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
3510   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
3511   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
3512   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
3513   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
3514   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
3515   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
3516   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
3517   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
3518   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
3519 
3520   B->factortype = MAT_FACTOR_CHOLESKY;
3521 #if defined(PETSC_USE_COMPLEX)
3522   mumps->sym = 2;
3523 #else
3524   if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1;
3525   else mumps->sym = 2;
3526 #endif
3527 
3528   /* set solvertype */
3529   PetscCall(PetscFree(B->solvertype));
3530   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
3531   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
3532   if (size == 1) {
3533     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3534     B->canuseordering = PETSC_TRUE;
3535   }
3536   PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY]));
3537   B->ops->destroy = MatDestroy_MUMPS;
3538   B->data         = (void *)mumps;
3539 
3540   *F               = B;
3541   mumps->id.job    = JOB_NULL;
3542   mumps->ICNTL_pre = NULL;
3543   mumps->CNTL_pre  = NULL;
3544   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
3545   PetscFunctionReturn(PETSC_SUCCESS);
3546 }
3547 
3548 static PetscErrorCode MatGetFactor_baij_mumps(Mat A, MatFactorType ftype, Mat *F)
3549 {
3550   Mat         B;
3551   Mat_MUMPS  *mumps;
3552   PetscBool   isSeqBAIJ;
3553   PetscMPIInt size;
3554 
3555   PetscFunctionBegin;
3556   /* Create the factorization matrix */
3557   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &isSeqBAIJ));
3558   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
3559   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
3560   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name));
3561   PetscCall(MatSetUp(B));
3562 
3563   PetscCall(PetscNew(&mumps));
3564   if (ftype == MAT_FACTOR_LU) {
3565     B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS;
3566     B->factortype            = MAT_FACTOR_LU;
3567     if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij;
3568     else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij;
3569     mumps->sym = 0;
3570     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]));
3571   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead");
3572 
3573   B->ops->view    = MatView_MUMPS;
3574   B->ops->getinfo = MatGetInfo_MUMPS;
3575 
3576   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
3577   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
3578   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
3579   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
3580   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
3581   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
3582   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
3583   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
3584   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
3585   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
3586   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
3587   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
3588   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
3589   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
3590 
3591   /* set solvertype */
3592   PetscCall(PetscFree(B->solvertype));
3593   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
3594   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
3595   if (size == 1) {
3596     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3597     B->canuseordering = PETSC_TRUE;
3598   }
3599   B->ops->destroy = MatDestroy_MUMPS;
3600   B->data         = (void *)mumps;
3601 
3602   *F               = B;
3603   mumps->id.job    = JOB_NULL;
3604   mumps->ICNTL_pre = NULL;
3605   mumps->CNTL_pre  = NULL;
3606   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
3607   PetscFunctionReturn(PETSC_SUCCESS);
3608 }
3609 
3610 /* MatGetFactor for Seq and MPI SELL matrices */
3611 static PetscErrorCode MatGetFactor_sell_mumps(Mat A, MatFactorType ftype, Mat *F)
3612 {
3613   Mat         B;
3614   Mat_MUMPS  *mumps;
3615   PetscBool   isSeqSELL;
3616   PetscMPIInt size;
3617 
3618   PetscFunctionBegin;
3619   /* Create the factorization matrix */
3620   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSELL, &isSeqSELL));
3621   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
3622   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
3623   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name));
3624   PetscCall(MatSetUp(B));
3625 
3626   PetscCall(PetscNew(&mumps));
3627 
3628   B->ops->view    = MatView_MUMPS;
3629   B->ops->getinfo = MatGetInfo_MUMPS;
3630 
3631   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
3632   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
3633   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
3634   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
3635   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
3636   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
3637   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
3638   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
3639   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
3640   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
3641   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
3642   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
3643 
3644   if (ftype == MAT_FACTOR_LU) {
3645     B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
3646     B->factortype            = MAT_FACTOR_LU;
3647     if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij;
3648     else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented");
3649     mumps->sym = 0;
3650     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]));
3651   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented");
3652 
3653   /* set solvertype */
3654   PetscCall(PetscFree(B->solvertype));
3655   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
3656   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
3657   if (size == 1) {
3658     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization  */
3659     B->canuseordering = PETSC_TRUE;
3660   }
3661   B->ops->destroy = MatDestroy_MUMPS;
3662   B->data         = (void *)mumps;
3663 
3664   *F               = B;
3665   mumps->id.job    = JOB_NULL;
3666   mumps->ICNTL_pre = NULL;
3667   mumps->CNTL_pre  = NULL;
3668   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
3669   PetscFunctionReturn(PETSC_SUCCESS);
3670 }
3671 
3672 /* MatGetFactor for MATNEST matrices */
3673 static PetscErrorCode MatGetFactor_nest_mumps(Mat A, MatFactorType ftype, Mat *F)
3674 {
3675   Mat         B, **mats;
3676   Mat_MUMPS  *mumps;
3677   PetscInt    nr, nc;
3678   PetscMPIInt size;
3679   PetscBool   flg = PETSC_TRUE;
3680 
3681   PetscFunctionBegin;
3682 #if defined(PETSC_USE_COMPLEX)
3683   if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) {
3684     PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n"));
3685     *F = NULL;
3686     PetscFunctionReturn(PETSC_SUCCESS);
3687   }
3688 #endif
3689 
3690   /* Return if some condition is not satisfied */
3691   *F = NULL;
3692   PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats));
3693   if (ftype == MAT_FACTOR_CHOLESKY) {
3694     IS       *rows, *cols;
3695     PetscInt *m, *M;
3696 
3697     PetscCheck(nr == nc, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MAT_FACTOR_CHOLESKY not supported for nest sizes %" PetscInt_FMT " != %" PetscInt_FMT ". Use MAT_FACTOR_LU.", nr, nc);
3698     PetscCall(PetscMalloc2(nr, &rows, nc, &cols));
3699     PetscCall(MatNestGetISs(A, rows, cols));
3700     for (PetscInt r = 0; flg && r < nr; r++) PetscCall(ISEqualUnsorted(rows[r], cols[r], &flg));
3701     if (!flg) {
3702       PetscCall(PetscFree2(rows, cols));
3703       PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for unequal row and column maps. Use MAT_FACTOR_LU.\n"));
3704       PetscFunctionReturn(PETSC_SUCCESS);
3705     }
3706     PetscCall(PetscMalloc2(nr, &m, nr, &M));
3707     for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetMinMax(rows[r], &m[r], &M[r]));
3708     for (PetscInt r = 0; flg && r < nr; r++)
3709       for (PetscInt k = r + 1; flg && k < nr; k++)
3710         if ((m[k] <= m[r] && m[r] <= M[k]) || (m[k] <= M[r] && M[r] <= M[k])) flg = PETSC_FALSE;
3711     PetscCall(PetscFree2(m, M));
3712     PetscCall(PetscFree2(rows, cols));
3713     if (!flg) {
3714       PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for intersecting row maps. Use MAT_FACTOR_LU.\n"));
3715       PetscFunctionReturn(PETSC_SUCCESS);
3716     }
3717   }
3718 
3719   for (PetscInt r = 0; r < nr; r++) {
3720     for (PetscInt c = 0; c < nc; c++) {
3721       Mat       sub = mats[r][c];
3722       PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isTrans, isDiag, isDense;
3723 
3724       if (!sub || (ftype == MAT_FACTOR_CHOLESKY && c < r)) continue;
3725       PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans));
3726       if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub));
3727       else {
3728         PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isTrans));
3729         if (isTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub));
3730       }
3731       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ));
3732       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ));
3733       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ));
3734       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ));
3735       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ));
3736       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ));
3737       PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATDIAGONAL, &isDiag));
3738       PetscCall(PetscObjectTypeCompareAny((PetscObject)sub, &isDense, MATSEQDENSE, MATMPIDENSE, NULL));
3739       if (ftype == MAT_FACTOR_CHOLESKY) {
3740         if (r == c) {
3741           if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isSeqSBAIJ && !isMPISBAIJ && !isDiag && !isDense) {
3742             PetscCall(PetscInfo(sub, "MAT_FACTOR_CHOLESKY not supported for diagonal block of type %s.\n", ((PetscObject)sub)->type_name));
3743             flg = PETSC_FALSE;
3744           }
3745         } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isDiag && !isDense) {
3746           PetscCall(PetscInfo(sub, "MAT_FACTOR_CHOLESKY not supported for off-diagonal block of type %s.\n", ((PetscObject)sub)->type_name));
3747           flg = PETSC_FALSE;
3748         }
3749       } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isDiag && !isDense) {
3750         PetscCall(PetscInfo(sub, "MAT_FACTOR_LU not supported for block of type %s.\n", ((PetscObject)sub)->type_name));
3751         flg = PETSC_FALSE;
3752       }
3753     }
3754   }
3755   if (!flg) PetscFunctionReturn(PETSC_SUCCESS);
3756 
3757   /* Create the factorization matrix */
3758   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
3759   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
3760   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name));
3761   PetscCall(MatSetUp(B));
3762 
3763   PetscCall(PetscNew(&mumps));
3764 
3765   B->ops->view    = MatView_MUMPS;
3766   B->ops->getinfo = MatGetInfo_MUMPS;
3767 
3768   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
3769   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
3770   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
3771   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
3772   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
3773   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
3774   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
3775   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
3776   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
3777   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
3778   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
3779   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
3780   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
3781   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
3782 
3783   if (ftype == MAT_FACTOR_LU) {
3784     B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
3785     B->factortype            = MAT_FACTOR_LU;
3786     mumps->sym               = 0;
3787   } else {
3788     B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
3789     B->factortype                  = MAT_FACTOR_CHOLESKY;
3790 #if defined(PETSC_USE_COMPLEX)
3791     mumps->sym = 2;
3792 #else
3793     if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1;
3794     else mumps->sym = 2;
3795 #endif
3796   }
3797   mumps->ConvertToTriples = MatConvertToTriples_nest_xaij;
3798   PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[ftype]));
3799 
3800   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
3801   if (size == 1) {
3802     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3803     B->canuseordering = PETSC_TRUE;
3804   }
3805 
3806   /* set solvertype */
3807   PetscCall(PetscFree(B->solvertype));
3808   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
3809   B->ops->destroy = MatDestroy_MUMPS;
3810   B->data         = (void *)mumps;
3811 
3812   *F               = B;
3813   mumps->id.job    = JOB_NULL;
3814   mumps->ICNTL_pre = NULL;
3815   mumps->CNTL_pre  = NULL;
3816   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
3817   PetscFunctionReturn(PETSC_SUCCESS);
3818 }
3819 
3820 PETSC_INTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void)
3821 {
3822   PetscFunctionBegin;
3823   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
3824   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
3825   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps));
3826   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps));
3827   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPISBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps));
3828   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
3829   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
3830   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps));
3831   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps));
3832   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps));
3833   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSELL, MAT_FACTOR_LU, MatGetFactor_sell_mumps));
3834   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATDIAGONAL, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
3835   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATDIAGONAL, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
3836   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQDENSE, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
3837   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQDENSE, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
3838   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIDENSE, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
3839   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIDENSE, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
3840   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_LU, MatGetFactor_nest_mumps));
3841   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_CHOLESKY, MatGetFactor_nest_mumps));
3842   PetscFunctionReturn(PETSC_SUCCESS);
3843 }
3844