xref: /petsc/src/mat/impls/aij/mpi/mumps/mumps.c (revision 3d1372b23971def3aed7e3dc12090948688700a0)
1 /*
2     Provides an interface to the MUMPS sparse solver
3 */
4 #include <petscpkg_version.h>
5 #include <petscsf.h>
6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I  "petscmat.h"  I*/
7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
8 #include <../src/mat/impls/sell/mpi/mpisell.h>
9 #include <petsc/private/vecimpl.h>
10 
11 #define MUMPS_MANUALS "(see users manual https://mumps-solver.org/index.php?page=doc \"Error and warning diagnostics\")"
12 
13 EXTERN_C_BEGIN
14 #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
15   #include <cmumps_c.h>
16   #include <zmumps_c.h>
17   #include <smumps_c.h>
18   #include <dmumps_c.h>
19 #else
20   #if defined(PETSC_USE_COMPLEX)
21     #if defined(PETSC_USE_REAL_SINGLE)
22       #include <cmumps_c.h>
23       #define MUMPS_c     cmumps_c
24       #define MumpsScalar CMUMPS_COMPLEX
25     #else
26       #include <zmumps_c.h>
27       #define MUMPS_c     zmumps_c
28       #define MumpsScalar ZMUMPS_COMPLEX
29     #endif
30   #else
31     #if defined(PETSC_USE_REAL_SINGLE)
32       #include <smumps_c.h>
33       #define MUMPS_c     smumps_c
34       #define MumpsScalar SMUMPS_REAL
35     #else
36       #include <dmumps_c.h>
37       #define MUMPS_c     dmumps_c
38       #define MumpsScalar DMUMPS_REAL
39     #endif
40   #endif
41 #endif
42 #if defined(PETSC_USE_COMPLEX)
43   #if defined(PETSC_USE_REAL_SINGLE)
44     #define MUMPS_STRUC_C CMUMPS_STRUC_C
45   #else
46     #define MUMPS_STRUC_C ZMUMPS_STRUC_C
47   #endif
48 #else
49   #if defined(PETSC_USE_REAL_SINGLE)
50     #define MUMPS_STRUC_C SMUMPS_STRUC_C
51   #else
52     #define MUMPS_STRUC_C DMUMPS_STRUC_C
53   #endif
54 #endif
55 EXTERN_C_END
56 
57 #define JOB_INIT         -1
58 #define JOB_NULL         0
59 #define JOB_FACTSYMBOLIC 1
60 #define JOB_FACTNUMERIC  2
61 #define JOB_SOLVE        3
62 #define JOB_END          -2
63 
64 /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for
65    number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the
66    naming convention in PetscMPIInt, PetscBLASInt etc.
67 */
68 typedef MUMPS_INT PetscMUMPSInt;
69 
70 #if PETSC_PKG_MUMPS_VERSION_GE(5, 3, 0)
71   #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */
72     #error "PETSc has not been tested with full 64-bit MUMPS and we choose to error out"
73   #endif
74 #else
75   #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */
76     #error "PETSc has not been tested with full 64-bit MUMPS and we choose to error out"
77   #endif
78 #endif
79 
80 #define MPIU_MUMPSINT       MPI_INT
81 #define PETSC_MUMPS_INT_MAX 2147483647
82 #define PETSC_MUMPS_INT_MIN -2147483648
83 
84 /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/
85 static inline PetscErrorCode PetscMUMPSIntCast(PetscCount a, PetscMUMPSInt *b)
86 {
87   PetscFunctionBegin;
88 #if PetscDefined(USE_64BIT_INDICES)
89   PetscAssert(a <= PETSC_MUMPS_INT_MAX && a >= PETSC_MUMPS_INT_MIN, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt");
90 #endif
91   *b = (PetscMUMPSInt)a;
92   PetscFunctionReturn(PETSC_SUCCESS);
93 }
94 
95 /* Put these utility routines here since they are only used in this file */
96 static inline PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems PetscOptionsObject, const char opt[], const char text[], const char man[], PetscMUMPSInt currentvalue, PetscMUMPSInt *value, PetscBool *set, PetscMUMPSInt lb, PetscMUMPSInt ub)
97 {
98   PetscInt  myval;
99   PetscBool myset;
100 
101   PetscFunctionBegin;
102   /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */
103   PetscCall(PetscOptionsInt_Private(PetscOptionsObject, opt, text, man, (PetscInt)currentvalue, &myval, &myset, lb, ub));
104   if (myset) PetscCall(PetscMUMPSIntCast(myval, value));
105   if (set) *set = myset;
106   PetscFunctionReturn(PETSC_SUCCESS);
107 }
108 #define PetscOptionsMUMPSInt(a, b, c, d, e, f) PetscOptionsMUMPSInt_Private(PetscOptionsObject, a, b, c, d, e, f, PETSC_MUMPS_INT_MIN, PETSC_MUMPS_INT_MAX)
109 
110 // An abstract type for specific MUMPS types {S,D,C,Z}MUMPS_STRUC_C.
111 //
112 // With the abstract (outer) type, we can write shared code. We call MUMPS through a type-to-be-determined inner field within the abstract type.
113 // Before/after calling MUMPS, we need to copy in/out fields between the outer and the inner, which seems expensive. But note that the large fixed size
114 // arrays within the types are directly linked. At the end, we only need to copy ~20 intergers/pointers, which is doable. See PreMumpsCall()/PostMumpsCall().
115 //
116 // Not all fields in the specific types are exposed in the abstract type. We only need those used by the PETSc/MUMPS interface.
117 // Notably, DMUMPS_COMPLEX* and DMUMPS_REAL* fields are now declared as void *. Their type will be determined by the the actual precision to be used.
118 // Also note that we added some *_len fields not in specific types to track sizes of those MumpsScalar buffers.
119 typedef struct {
120   PetscPrecision precision;   // precision used by MUMPS
121   void          *internal_id; // the data structure passed to MUMPS, whose actual type {S,D,C,Z}MUMPS_STRUC_C is to be decided by precision and PETSc's use of complex
122 
123   // aliased fields from internal_id, so that we can use XMUMPS_STRUC_C to write shared code across different precisions.
124   MUMPS_INT  sym, par, job;
125   MUMPS_INT  comm_fortran; /* Fortran communicator */
126   MUMPS_INT *icntl;
127   void      *cntl; // MumpsReal, fixed size array
128   MUMPS_INT  n;
129   MUMPS_INT  nblk;
130 
131   /* Assembled entry */
132   MUMPS_INT8 nnz;
133   MUMPS_INT *irn;
134   MUMPS_INT *jcn;
135   void      *a; // MumpsScalar, centralized input
136   PetscCount a_len;
137 
138   /* Distributed entry */
139   MUMPS_INT8 nnz_loc;
140   MUMPS_INT *irn_loc;
141   MUMPS_INT *jcn_loc;
142   void      *a_loc; // MumpsScalar, distributed input
143   PetscCount a_loc_len;
144 
145   /* Matrix by blocks */
146   MUMPS_INT *blkptr;
147   MUMPS_INT *blkvar;
148 
149   /* Ordering, if given by user */
150   MUMPS_INT *perm_in;
151 
152   /* RHS, solution, ouptput data and statistics */
153   void      *rhs, *redrhs, *rhs_sparse, *sol_loc, *rhs_loc;                 // MumpsScalar buffers
154   PetscCount rhs_len, redrhs_len, rhs_sparse_len, sol_loc_len, rhs_loc_len; // length of buffers (in MumpsScalar) IF allocated in a different precision than PetscScalar
155 
156   MUMPS_INT *irhs_sparse, *irhs_ptr, *isol_loc, *irhs_loc;
157   MUMPS_INT  nrhs, lrhs, lredrhs, nz_rhs, lsol_loc, nloc_rhs, lrhs_loc;
158   // MUMPS_INT  nsol_loc; // introduced in MUMPS-5.7, but PETSc doesn't use it; would cause compile errors with the widely used 5.6. If you add it, must also update PreMumpsCall() and guard this with #if PETSC_PKG_MUMPS_VERSION_GE(5, 7, 0)
159   MUMPS_INT  schur_lld;
160   MUMPS_INT *info, *infog;   // fixed size array
161   void      *rinfo, *rinfog; // MumpsReal, fixed size array
162 
163   /* Null space */
164   MUMPS_INT *pivnul_list; // allocated by MUMPS!
165   MUMPS_INT *mapping;     // allocated by MUMPS!
166 
167   /* Schur */
168   MUMPS_INT  size_schur;
169   MUMPS_INT *listvar_schur;
170   void      *schur; // MumpsScalar
171   PetscCount schur_len;
172 
173   /* For out-of-core */
174   char *ooc_tmpdir; // fixed size array
175   char *ooc_prefix; // fixed size array
176 } XMUMPS_STRUC_C;
177 
178 // Note: fixed-size arrays are allocated by MUMPS; redirect them to the outer struct
179 #define AllocatInternalID(MUMPS_STRUC_T, outer) \
180   do { \
181     MUMPS_STRUC_T *inner; \
182     PetscCall(PetscNew(&inner)); \
183     outer->icntl      = inner->icntl; \
184     outer->cntl       = inner->cntl; \
185     outer->info       = inner->info; \
186     outer->infog      = inner->infog; \
187     outer->rinfo      = inner->rinfo; \
188     outer->rinfog     = inner->rinfog; \
189     outer->ooc_tmpdir = inner->ooc_tmpdir; \
190     outer->ooc_prefix = inner->ooc_prefix; \
191     /* the three field should never change after init */ \
192     inner->comm_fortran = outer->comm_fortran; \
193     inner->par          = outer->par; \
194     inner->sym          = outer->sym; \
195     outer->internal_id  = inner; \
196   } while (0)
197 
198 // Allocate the internal [SDCZ]MUMPS_STRUC_C ID data structure in the given <precision>, and link fields of the outer and the inner
199 static inline PetscErrorCode MatMumpsAllocateInternalID(XMUMPS_STRUC_C *outer, PetscPrecision precision)
200 {
201   PetscFunctionBegin;
202   outer->precision = precision;
203 #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
204   #if defined(PETSC_USE_COMPLEX)
205   if (precision == PETSC_PRECISION_SINGLE) AllocatInternalID(CMUMPS_STRUC_C, outer);
206   else AllocatInternalID(ZMUMPS_STRUC_C, outer);
207   #else
208   if (precision == PETSC_PRECISION_SINGLE) AllocatInternalID(SMUMPS_STRUC_C, outer);
209   else AllocatInternalID(DMUMPS_STRUC_C, outer);
210   #endif
211 #else
212   AllocatInternalID(MUMPS_STRUC_C, outer);
213 #endif
214   PetscFunctionReturn(PETSC_SUCCESS);
215 }
216 
217 #define FreeInternalIDFields(MUMPS_STRUC_T, outer) \
218   do { \
219     MUMPS_STRUC_T *inner = (MUMPS_STRUC_T *)(outer)->internal_id; \
220     PetscCall(PetscFree(inner->a)); \
221     PetscCall(PetscFree(inner->a_loc)); \
222     PetscCall(PetscFree(inner->redrhs)); \
223     PetscCall(PetscFree(inner->rhs)); \
224     PetscCall(PetscFree(inner->rhs_sparse)); \
225     PetscCall(PetscFree(inner->rhs_loc)); \
226     PetscCall(PetscFree(inner->sol_loc)); \
227     PetscCall(PetscFree(inner->schur)); \
228   } while (0)
229 
230 static inline PetscErrorCode MatMumpsFreeInternalID(XMUMPS_STRUC_C *outer)
231 {
232   PetscFunctionBegin;
233   if (outer->internal_id) { // sometimes, the inner is never created before we destroy the outer
234 #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
235     const PetscPrecision mumps_precision = outer->precision;
236     if (mumps_precision != PETSC_SCALAR_PRECISION) { // Free internal buffers if we used mixed precision
237   #if defined(PETSC_USE_COMPLEX)
238       if (mumps_precision == PETSC_PRECISION_SINGLE) FreeInternalIDFields(CMUMPS_STRUC_C, outer);
239       else FreeInternalIDFields(ZMUMPS_STRUC_C, outer);
240   #else
241       if (mumps_precision == PETSC_PRECISION_SINGLE) FreeInternalIDFields(SMUMPS_STRUC_C, outer);
242       else FreeInternalIDFields(DMUMPS_STRUC_C, outer);
243   #endif
244     }
245 #endif
246     PetscCall(PetscFree(outer->internal_id));
247   }
248   PetscFunctionReturn(PETSC_SUCCESS);
249 }
250 
251 // Make a companion MumpsScalar array (with a given PetscScalar array), to hold at least <n> MumpsScalars in the given <precision> and return the address at <ma>.
252 // <convert> indicates if we need to convert PetscScalars to MumpsScalars after allocating the MumpsScalar array.
253 // (For bravity, we use <ma> for array address and <m> for its length in MumpsScalar, though in code they should be <*ma> and <*m>)
254 // If <ma> already points to a buffer/array, on input <m> should be its length. Note the buffer might be freed if it is not big enough for this request.
255 //
256 // The returned array is a companion, so how it is created depends on if PetscScalar and MumpsScalar are the same.
257 // 1) If they are different, a separate array will be made and its length and address will be provided at <m> and <ma> on output.
258 // 2) Otherwise, <pa> will be returned in <ma>, and <m> will be zero on output.
259 //
260 //
261 //   Input parameters:
262 // + convert   - whether to do PetscScalar to MumpsScalar conversion
263 // . n         - length of the PetscScalar array
264 // . pa        - [n]], points to the PetscScalar array
265 // . precision - precision of MumpsScalar
266 // . m         - on input, length of an existing MumpsScalar array <ma> if any, otherwise *m is just zero.
267 // - ma        - on input, an existing MumpsScalar array if any.
268 //
269 //   Output parameters:
270 // + m  - length of the MumpsScalar buffer at <ma> if MumpsScalar is different from PetscScalar, otherwise 0
271 // . ma - the MumpsScalar array, which could be an alias of <pa> when the two types are the same.
272 //
273 //   Note:
274 //    New memory, if allocated, is done via PetscMalloc1(), and is owned by caller.
275 static PetscErrorCode MatMumpsMakeMumpsScalarArray(PetscBool convert, PetscCount n, const PetscScalar *pa, PetscPrecision precision, PetscCount *m, void **ma)
276 {
277   PetscFunctionBegin;
278 #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
279   const PetscPrecision mumps_precision = precision;
280   PetscCheck(precision == PETSC_PRECISION_SINGLE || precision == PETSC_PRECISION_DOUBLE, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unsupported precicison (%d). Must be single or double", (int)precision);
281   #if defined(PETSC_USE_COMPLEX)
282   if (mumps_precision != PETSC_SCALAR_PRECISION) {
283     if (mumps_precision == PETSC_PRECISION_SINGLE) {
284       if (*m < n) {
285         PetscCall(PetscFree(*ma));
286         PetscCall(PetscMalloc1(n, (CMUMPS_COMPLEX **)ma));
287         *m = n;
288       }
289       if (convert) {
290         CMUMPS_COMPLEX *b = *(CMUMPS_COMPLEX **)ma;
291         for (PetscCount i = 0; i < n; i++) {
292           b[i].r = PetscRealPart(pa[i]);
293           b[i].i = PetscImaginaryPart(pa[i]);
294         }
295       }
296     } else {
297       if (*m < n) {
298         PetscCall(PetscFree(*ma));
299         PetscCall(PetscMalloc1(n, (ZMUMPS_COMPLEX **)ma));
300         *m = n;
301       }
302       if (convert) {
303         ZMUMPS_COMPLEX *b = *(ZMUMPS_COMPLEX **)ma;
304         for (PetscCount i = 0; i < n; i++) {
305           b[i].r = PetscRealPart(pa[i]);
306           b[i].i = PetscImaginaryPart(pa[i]);
307         }
308       }
309     }
310   }
311   #else
312   if (mumps_precision != PETSC_SCALAR_PRECISION) {
313     if (mumps_precision == PETSC_PRECISION_SINGLE) {
314       if (*m < n) {
315         PetscCall(PetscFree(*ma));
316         PetscCall(PetscMalloc1(n, (SMUMPS_REAL **)ma));
317         *m = n;
318       }
319       if (convert) {
320         SMUMPS_REAL *b = *(SMUMPS_REAL **)ma;
321         for (PetscCount i = 0; i < n; i++) b[i] = pa[i];
322       }
323     } else {
324       if (*m < n) {
325         PetscCall(PetscFree(*ma));
326         PetscCall(PetscMalloc1(n, (DMUMPS_REAL **)ma));
327         *m = n;
328       }
329       if (convert) {
330         DMUMPS_REAL *b = *(DMUMPS_REAL **)ma;
331         for (PetscCount i = 0; i < n; i++) b[i] = pa[i];
332       }
333     }
334   }
335   #endif
336   else
337 #endif
338   {
339     if (*m != 0) PetscCall(PetscFree(*ma)); // free existing buffer if any
340     *ma = (void *)pa;                       // same precision, make them alias
341     *m  = 0;
342   }
343   PetscFunctionReturn(PETSC_SUCCESS);
344 }
345 
346 // Cast a MumpsScalar array <ma[n]> in <mumps_precision> to a PetscScalar array at address <pa>.
347 //
348 // 1) If the two types are different, cast array elements.
349 // 2) Otherwise, this works as a memcpy; of course, if the two addresses are equal, it is a no-op.
350 static PetscErrorCode MatMumpsCastMumpsScalarArray(PetscCount n, PetscPrecision mumps_precision, const void *ma, PetscScalar *pa)
351 {
352   PetscFunctionBegin;
353 #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
354   if (mumps_precision != PETSC_SCALAR_PRECISION) {
355   #if defined(PETSC_USE_COMPLEX)
356     if (mumps_precision == PETSC_PRECISION_SINGLE) {
357       PetscReal         *a = (PetscReal *)pa;
358       const SMUMPS_REAL *b = (const SMUMPS_REAL *)ma;
359       for (PetscCount i = 0; i < 2 * n; i++) a[i] = b[i];
360     } else {
361       PetscReal         *a = (PetscReal *)pa;
362       const DMUMPS_REAL *b = (const DMUMPS_REAL *)ma;
363       for (PetscCount i = 0; i < 2 * n; i++) a[i] = b[i];
364     }
365   #else
366     if (mumps_precision == PETSC_PRECISION_SINGLE) {
367       const SMUMPS_REAL *b = (const SMUMPS_REAL *)ma;
368       for (PetscCount i = 0; i < n; i++) pa[i] = b[i];
369     } else {
370       const DMUMPS_REAL *b = (const DMUMPS_REAL *)ma;
371       for (PetscCount i = 0; i < n; i++) pa[i] = b[i];
372     }
373   #endif
374   } else
375 #endif
376     PetscCall(PetscArraycpy((PetscScalar *)pa, (PetscScalar *)ma, n));
377   PetscFunctionReturn(PETSC_SUCCESS);
378 }
379 
380 // Cast a PetscScalar array <pa[n]> to a MumpsScalar array in the given <mumps_precision> at address <ma>.
381 //
382 // 1) If the two types are different, cast array elements.
383 // 2) Otherwise, this works as a memcpy; of course, if the two addresses are equal, it is a no-op.
384 static PetscErrorCode MatMumpsCastPetscScalarArray(PetscCount n, const PetscScalar *pa, PetscPrecision mumps_precision, const void *ma)
385 {
386   PetscFunctionBegin;
387 #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
388   if (mumps_precision != PETSC_SCALAR_PRECISION) {
389   #if defined(PETSC_USE_COMPLEX)
390     if (mumps_precision == PETSC_PRECISION_SINGLE) {
391       CMUMPS_COMPLEX *b = (CMUMPS_COMPLEX *)ma;
392       for (PetscCount i = 0; i < n; i++) {
393         b[i].r = PetscRealPart(pa[i]);
394         b[i].i = PetscImaginaryPart(pa[i]);
395       }
396     } else {
397       ZMUMPS_COMPLEX *b = (ZMUMPS_COMPLEX *)ma;
398       for (PetscCount i = 0; i < n; i++) {
399         b[i].r = PetscRealPart(pa[i]);
400         b[i].i = PetscImaginaryPart(pa[i]);
401       }
402     }
403   #else
404     if (mumps_precision == PETSC_PRECISION_SINGLE) {
405       SMUMPS_REAL *b = (SMUMPS_REAL *)ma;
406       for (PetscCount i = 0; i < n; i++) b[i] = pa[i];
407     } else {
408       DMUMPS_REAL *b = (DMUMPS_REAL *)ma;
409       for (PetscCount i = 0; i < n; i++) b[i] = pa[i];
410     }
411   #endif
412   } else
413 #endif
414     PetscCall(PetscArraycpy((PetscScalar *)ma, (PetscScalar *)pa, n));
415   PetscFunctionReturn(PETSC_SUCCESS);
416 }
417 
418 static inline MPI_Datatype MPIU_MUMPSREAL(const XMUMPS_STRUC_C *id)
419 {
420   return id->precision == PETSC_PRECISION_DOUBLE ? MPI_DOUBLE : MPI_FLOAT;
421 }
422 
423 #define PreMumpsCall(inner, outer, mumpsscalar) \
424   do { \
425     inner->job           = outer->job; \
426     inner->n             = outer->n; \
427     inner->nblk          = outer->nblk; \
428     inner->nnz           = outer->nnz; \
429     inner->irn           = outer->irn; \
430     inner->jcn           = outer->jcn; \
431     inner->a             = (mumpsscalar *)outer->a; \
432     inner->nnz_loc       = outer->nnz_loc; \
433     inner->irn_loc       = outer->irn_loc; \
434     inner->jcn_loc       = outer->jcn_loc; \
435     inner->a_loc         = (mumpsscalar *)outer->a_loc; \
436     inner->blkptr        = outer->blkptr; \
437     inner->blkvar        = outer->blkvar; \
438     inner->perm_in       = outer->perm_in; \
439     inner->rhs           = (mumpsscalar *)outer->rhs; \
440     inner->redrhs        = (mumpsscalar *)outer->redrhs; \
441     inner->rhs_sparse    = (mumpsscalar *)outer->rhs_sparse; \
442     inner->sol_loc       = (mumpsscalar *)outer->sol_loc; \
443     inner->rhs_loc       = (mumpsscalar *)outer->rhs_loc; \
444     inner->irhs_sparse   = outer->irhs_sparse; \
445     inner->irhs_ptr      = outer->irhs_ptr; \
446     inner->isol_loc      = outer->isol_loc; \
447     inner->irhs_loc      = outer->irhs_loc; \
448     inner->nrhs          = outer->nrhs; \
449     inner->lrhs          = outer->lrhs; \
450     inner->lredrhs       = outer->lredrhs; \
451     inner->nz_rhs        = outer->nz_rhs; \
452     inner->lsol_loc      = outer->lsol_loc; \
453     inner->nloc_rhs      = outer->nloc_rhs; \
454     inner->lrhs_loc      = outer->lrhs_loc; \
455     inner->schur_lld     = outer->schur_lld; \
456     inner->size_schur    = outer->size_schur; \
457     inner->listvar_schur = outer->listvar_schur; \
458     inner->schur         = (mumpsscalar *)outer->schur; \
459   } while (0)
460 
461 #define PostMumpsCall(inner, outer) \
462   do { \
463     outer->pivnul_list = inner->pivnul_list; \
464     outer->mapping     = inner->mapping; \
465   } while (0)
466 
467 // Entry for PETSc to call mumps
468 static inline PetscErrorCode PetscCallMumps_Private(XMUMPS_STRUC_C *outer)
469 {
470   PetscFunctionBegin;
471 #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
472   #if defined(PETSC_USE_COMPLEX)
473   if (outer->precision == PETSC_PRECISION_SINGLE) {
474     CMUMPS_STRUC_C *inner = (CMUMPS_STRUC_C *)outer->internal_id;
475     PreMumpsCall(inner, outer, CMUMPS_COMPLEX);
476     PetscStackCallExternalVoid("cmumps_c", cmumps_c(inner));
477     PostMumpsCall(inner, outer);
478   } else {
479     ZMUMPS_STRUC_C *inner = (ZMUMPS_STRUC_C *)outer->internal_id;
480     PreMumpsCall(inner, outer, ZMUMPS_COMPLEX);
481     PetscStackCallExternalVoid("zmumps_c", zmumps_c(inner));
482     PostMumpsCall(inner, outer);
483   }
484   #else
485   if (outer->precision == PETSC_PRECISION_SINGLE) {
486     SMUMPS_STRUC_C *inner = (SMUMPS_STRUC_C *)outer->internal_id;
487     PreMumpsCall(inner, outer, SMUMPS_REAL);
488     PetscStackCallExternalVoid("smumps_c", smumps_c(inner));
489     PostMumpsCall(inner, outer);
490   } else {
491     DMUMPS_STRUC_C *inner = (DMUMPS_STRUC_C *)outer->internal_id;
492     PreMumpsCall(inner, outer, DMUMPS_REAL);
493     PetscStackCallExternalVoid("dmumps_c", dmumps_c(inner));
494     PostMumpsCall(inner, outer);
495   }
496   #endif
497 #else
498   MUMPS_STRUC_C *inner = (MUMPS_STRUC_C *)outer->internal_id;
499   PreMumpsCall(inner, outer, MumpsScalar);
500   PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(inner));
501   PostMumpsCall(inner, outer);
502 #endif
503   PetscFunctionReturn(PETSC_SUCCESS);
504 }
505 
506 /* macros s.t. indices match MUMPS documentation */
507 #define ICNTL(I) icntl[(I) - 1]
508 #define INFOG(I) infog[(I) - 1]
509 #define INFO(I)  info[(I) - 1]
510 
511 // Get a value from a MumpsScalar array, which is the <F> field in the struct of MUMPS_STRUC_C. The value is convertible to PetscScalar. Note no minus 1 on I!
512 #if defined(PETSC_USE_COMPLEX)
513   #define ID_FIELD_GET(ID, F, I) ((ID).precision == PETSC_PRECISION_SINGLE ? ((CMUMPS_COMPLEX *)(ID).F)[I].r + PETSC_i * ((CMUMPS_COMPLEX *)(ID).F)[I].i : ((ZMUMPS_COMPLEX *)(ID).F)[I].r + PETSC_i * ((ZMUMPS_COMPLEX *)(ID).F)[I].i)
514 #else
515   #define ID_FIELD_GET(ID, F, I) ((ID).precision == PETSC_PRECISION_SINGLE ? ((float *)(ID).F)[I] : ((double *)(ID).F)[I])
516 #endif
517 
518 // Get a value from MumpsReal arrays. The value is convertible to PetscReal.
519 #define ID_CNTL_GET(ID, I)   ((ID).precision == PETSC_PRECISION_SINGLE ? ((float *)(ID).cntl)[(I) - 1] : ((double *)(ID).cntl)[(I) - 1])
520 #define ID_RINFOG_GET(ID, I) ((ID).precision == PETSC_PRECISION_SINGLE ? ((float *)(ID).rinfog)[(I) - 1] : ((double *)(ID).rinfog)[(I) - 1])
521 #define ID_RINFO_GET(ID, I)  ((ID).precision == PETSC_PRECISION_SINGLE ? ((float *)(ID).rinfo)[(I) - 1] : ((double *)(ID).rinfo)[(I) - 1])
522 
523 // Set the I-th entry of the MumpsReal array id.cntl[] with a PetscReal <VAL>
524 #define ID_CNTL_SET(ID, I, VAL) \
525   do { \
526     if ((ID).precision == PETSC_PRECISION_SINGLE) ((float *)(ID).cntl)[(I) - 1] = (VAL); \
527     else ((double *)(ID).cntl)[(I) - 1] = (VAL); \
528   } while (0)
529 
530 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */
531 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
532   #define PetscMUMPS_c(mumps) \
533     do { \
534       if (mumps->use_petsc_omp_support) { \
535         if (mumps->is_omp_master) { \
536           PetscCall(PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl)); \
537           PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \
538           PetscCall(PetscCallMumps_Private(&mumps->id)); \
539           PetscCall(PetscFPTrapPop()); \
540           PetscCall(PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl)); \
541         } \
542         PetscCall(PetscOmpCtrlBarrier(mumps->omp_ctrl)); \
543         /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific      \
544          to processes, so we only Bcast info[1], an error code and leave others (since they do not have   \
545          an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82.                   \
546          omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \
547       */ \
548         MUMPS_STRUC_C tmp; /* All MUMPS_STRUC_C types have same lengths on these info arrays */ \
549         PetscCallMPI(MPI_Bcast(mumps->id.infog, PETSC_STATIC_ARRAY_LENGTH(tmp.infog), MPIU_MUMPSINT, 0, mumps->omp_comm)); \
550         PetscCallMPI(MPI_Bcast(mumps->id.info, PETSC_STATIC_ARRAY_LENGTH(tmp.info), MPIU_MUMPSINT, 0, mumps->omp_comm)); \
551         PetscCallMPI(MPI_Bcast(mumps->id.rinfog, PETSC_STATIC_ARRAY_LENGTH(tmp.rinfog), MPIU_MUMPSREAL(&mumps->id), 0, mumps->omp_comm)); \
552         PetscCallMPI(MPI_Bcast(mumps->id.rinfo, PETSC_STATIC_ARRAY_LENGTH(tmp.rinfo), MPIU_MUMPSREAL(&mumps->id), 0, mumps->omp_comm)); \
553       } else { \
554         PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \
555         PetscCall(PetscCallMumps_Private(&mumps->id)); \
556         PetscCall(PetscFPTrapPop()); \
557       } \
558     } while (0)
559 #else
560   #define PetscMUMPS_c(mumps) \
561     do { \
562       PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \
563       PetscCall(PetscCallMumps_Private(&mumps->id)); \
564       PetscCall(PetscFPTrapPop()); \
565     } while (0)
566 #endif
567 
568 typedef struct Mat_MUMPS Mat_MUMPS;
569 struct Mat_MUMPS {
570   XMUMPS_STRUC_C id;
571 
572   MatStructure   matstruc;
573   PetscMPIInt    myid, petsc_size;
574   PetscMUMPSInt *irn, *jcn;       /* the (i,j,v) triplets passed to mumps. */
575   PetscScalar   *val, *val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */
576   PetscCount     nnz;             /* number of nonzeros. The type is called selective 64-bit in mumps */
577   PetscMUMPSInt  sym;
578   MPI_Comm       mumps_comm;
579   PetscMUMPSInt *ICNTL_pre;
580   PetscReal     *CNTL_pre;
581   PetscMUMPSInt  ICNTL9_pre;         /* check if ICNTL(9) is changed from previous MatSolve */
582   VecScatter     scat_rhs, scat_sol; /* used by MatSolve() */
583   PetscMUMPSInt  ICNTL20;            /* use centralized (0) or distributed (10) dense RHS */
584   PetscMUMPSInt  ICNTL26;
585   PetscMUMPSInt  lrhs_loc, nloc_rhs, *irhs_loc;
586 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
587   PetscInt    *rhs_nrow, max_nrhs;
588   PetscMPIInt *rhs_recvcounts, *rhs_disps;
589   PetscScalar *rhs_loc, *rhs_recvbuf;
590 #endif
591   Vec            b_seq, x_seq;
592   PetscInt       ninfo, *info; /* which INFO to display */
593   PetscInt       sizeredrhs;
594   PetscScalar   *schur_sol;
595   PetscInt       schur_sizesol;
596   PetscScalar   *redrhs;              // buffer in PetscScalar in case MumpsScalar is in a different precision
597   PetscMUMPSInt *ia_alloc, *ja_alloc; /* work arrays used for the CSR struct for sparse rhs */
598   PetscCount     cur_ilen, cur_jlen;  /* current len of ia_alloc[], ja_alloc[] */
599   PetscErrorCode (*ConvertToTriples)(Mat, PetscInt, MatReuse, Mat_MUMPS *);
600 
601   /* Support for MATNEST */
602   PetscErrorCode (**nest_convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *);
603   PetscCount  *nest_vals_start;
604   PetscScalar *nest_vals;
605 
606   /* stuff used by petsc/mumps OpenMP support*/
607   PetscBool    use_petsc_omp_support;
608   PetscOmpCtrl omp_ctrl;             /* an OpenMP controller that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */
609   MPI_Comm     petsc_comm, omp_comm; /* petsc_comm is PETSc matrix's comm */
610   PetscCount  *recvcount;            /* a collection of nnz on omp_master */
611   PetscMPIInt  tag, omp_comm_size;
612   PetscBool    is_omp_master; /* is this rank the master of omp_comm */
613   MPI_Request *reqs;
614 };
615 
616 /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt.
617    Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices.
618  */
619 static PetscErrorCode PetscMUMPSIntCSRCast(PETSC_UNUSED Mat_MUMPS *mumps, PetscInt nrow, PetscInt *ia, PetscInt *ja, PetscMUMPSInt **ia_mumps, PetscMUMPSInt **ja_mumps, PetscMUMPSInt *nnz_mumps)
620 {
621   PetscInt nnz = ia[nrow] - 1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscCount since mumps only uses PetscMUMPSInt for rhs */
622 
623   PetscFunctionBegin;
624 #if defined(PETSC_USE_64BIT_INDICES)
625   {
626     PetscInt i;
627     if (nrow + 1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */
628       PetscCall(PetscFree(mumps->ia_alloc));
629       PetscCall(PetscMalloc1(nrow + 1, &mumps->ia_alloc));
630       mumps->cur_ilen = nrow + 1;
631     }
632     if (nnz > mumps->cur_jlen) {
633       PetscCall(PetscFree(mumps->ja_alloc));
634       PetscCall(PetscMalloc1(nnz, &mumps->ja_alloc));
635       mumps->cur_jlen = nnz;
636     }
637     for (i = 0; i < nrow + 1; i++) PetscCall(PetscMUMPSIntCast(ia[i], &mumps->ia_alloc[i]));
638     for (i = 0; i < nnz; i++) PetscCall(PetscMUMPSIntCast(ja[i], &mumps->ja_alloc[i]));
639     *ia_mumps = mumps->ia_alloc;
640     *ja_mumps = mumps->ja_alloc;
641   }
642 #else
643   *ia_mumps = ia;
644   *ja_mumps = ja;
645 #endif
646   PetscCall(PetscMUMPSIntCast(nnz, nnz_mumps));
647   PetscFunctionReturn(PETSC_SUCCESS);
648 }
649 
650 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS *mumps)
651 {
652   PetscFunctionBegin;
653   PetscCall(PetscFree(mumps->id.listvar_schur));
654   PetscCall(PetscFree(mumps->redrhs)); // if needed, id.redrhs will be freed in MatMumpsFreeInternalID()
655   PetscCall(PetscFree(mumps->schur_sol));
656   mumps->id.size_schur = 0;
657   mumps->id.schur_lld  = 0;
658   if (mumps->id.internal_id) mumps->id.ICNTL(19) = 0; // sometimes, the inner id is yet built
659   PetscFunctionReturn(PETSC_SUCCESS);
660 }
661 
662 /* solve with rhs in mumps->id.redrhs and return in the same location */
663 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F)
664 {
665   Mat_MUMPS           *mumps = (Mat_MUMPS *)F->data;
666   Mat                  S, B, X; // solve S*X = B; all three matrices are dense
667   MatFactorSchurStatus schurstatus;
668   PetscInt             sizesol;
669   const PetscScalar   *xarray;
670 
671   PetscFunctionBegin;
672   PetscCall(MatFactorFactorizeSchurComplement(F));
673   PetscCall(MatFactorGetSchurComplement(F, &S, &schurstatus));
674   PetscCall(MatMumpsCastMumpsScalarArray(mumps->sizeredrhs, mumps->id.precision, mumps->id.redrhs, mumps->redrhs));
675 
676   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->redrhs, &B));
677   PetscCall(MatSetType(B, ((PetscObject)S)->type_name));
678 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
679   PetscCall(MatBindToCPU(B, S->boundtocpu));
680 #endif
681   switch (schurstatus) {
682   case MAT_FACTOR_SCHUR_FACTORED:
683     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->redrhs, &X));
684     PetscCall(MatSetType(X, ((PetscObject)S)->type_name));
685 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
686     PetscCall(MatBindToCPU(X, S->boundtocpu));
687 #endif
688     if (!mumps->id.ICNTL(9)) { /* transpose solve */
689       PetscCall(MatMatSolveTranspose(S, B, X));
690     } else {
691       PetscCall(MatMatSolve(S, B, X));
692     }
693     break;
694   case MAT_FACTOR_SCHUR_INVERTED:
695     sizesol = mumps->id.nrhs * mumps->id.size_schur;
696     if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) {
697       PetscCall(PetscFree(mumps->schur_sol));
698       PetscCall(PetscMalloc1(sizesol, &mumps->schur_sol));
699       mumps->schur_sizesol = sizesol;
700     }
701     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->schur_sol, &X));
702     PetscCall(MatSetType(X, ((PetscObject)S)->type_name));
703 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
704     PetscCall(MatBindToCPU(X, S->boundtocpu));
705 #endif
706     PetscCall(MatProductCreateWithMat(S, B, NULL, X));
707     if (!mumps->id.ICNTL(9)) { /* transpose solve */
708       PetscCall(MatProductSetType(X, MATPRODUCT_AtB));
709     } else {
710       PetscCall(MatProductSetType(X, MATPRODUCT_AB));
711     }
712     PetscCall(MatProductSetFromOptions(X));
713     PetscCall(MatProductSymbolic(X));
714     PetscCall(MatProductNumeric(X));
715 
716     PetscCall(MatCopy(X, B, SAME_NONZERO_PATTERN));
717     break;
718   default:
719     SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unhandled MatFactorSchurStatus %d", F->schur_status);
720   }
721   // MUST get the array from X (not B), though they share the same host array. We can only guarantee X has the correct data on device.
722   PetscCall(MatDenseGetArrayRead(X, &xarray)); // xarray should be mumps->redrhs, but using MatDenseGetArrayRead is safer with GPUs.
723   PetscCall(MatMumpsCastPetscScalarArray(mumps->sizeredrhs, xarray, mumps->id.precision, mumps->id.redrhs));
724   PetscCall(MatDenseRestoreArrayRead(X, &xarray));
725   PetscCall(MatFactorRestoreSchurComplement(F, &S, schurstatus));
726   PetscCall(MatDestroy(&B));
727   PetscCall(MatDestroy(&X));
728   PetscFunctionReturn(PETSC_SUCCESS);
729 }
730 
731 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion)
732 {
733   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
734 
735   PetscFunctionBegin;
736   if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */
737     PetscFunctionReturn(PETSC_SUCCESS);
738   }
739   if (!expansion) { /* prepare for the condensation step */
740     PetscInt sizeredrhs = mumps->id.nrhs * mumps->id.size_schur;
741     /* allocate MUMPS internal array to store reduced right-hand sides */
742     if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) {
743       mumps->id.lredrhs = mumps->id.size_schur;
744       mumps->sizeredrhs = mumps->id.nrhs * mumps->id.lredrhs;
745       if (mumps->id.redrhs_len) PetscCall(PetscFree(mumps->id.redrhs));
746       PetscCall(PetscFree(mumps->redrhs));
747       PetscCall(PetscMalloc1(mumps->sizeredrhs, &mumps->redrhs));
748       PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_FALSE, mumps->sizeredrhs, mumps->redrhs, mumps->id.precision, &mumps->id.redrhs_len, &mumps->id.redrhs));
749     }
750   } else {                                    /* prepare for the expansion step */
751     PetscCall(MatMumpsSolveSchur_Private(F)); /* solve Schur complement, put solution in id.redrhs (this has to be done by the MUMPS user, so basically us) */
752     mumps->id.ICNTL(26) = 2;                  /* expansion phase */
753     PetscMUMPS_c(mumps);
754     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2));
755     /* restore defaults */
756     mumps->id.ICNTL(26) = -1;
757     /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */
758     if (mumps->id.nrhs > 1) {
759       if (mumps->id.redrhs_len) PetscCall(PetscFree(mumps->id.redrhs));
760       PetscCall(PetscFree(mumps->redrhs));
761       mumps->id.redrhs_len = 0;
762       mumps->id.lredrhs    = 0;
763       mumps->sizeredrhs    = 0;
764     }
765   }
766   PetscFunctionReturn(PETSC_SUCCESS);
767 }
768 
769 /*
770   MatConvertToTriples_A_B - convert PETSc matrix to triples: row[nz], col[nz], val[nz]
771 
772   input:
773     A       - matrix in aij,baij or sbaij format
774     shift   - 0: C style output triple; 1: Fortran style output triple.
775     reuse   - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple
776               MAT_REUSE_MATRIX:   only the values in v array are updated
777   output:
778     nnz     - dim of r, c, and v (number of local nonzero entries of A)
779     r, c, v - row and col index, matrix values (matrix triples)
780 
781   The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is
782   freed with PetscFree(mumps->irn);  This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means
783   that the PetscMalloc() cannot easily be replaced with a PetscMalloc3().
784 
785  */
786 
787 static PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
788 {
789   const PetscScalar *av;
790   const PetscInt    *ai, *aj, *ajj, M = A->rmap->n;
791   PetscCount         nz, rnz, k;
792   PetscMUMPSInt     *row, *col;
793   Mat_SeqAIJ        *aa = (Mat_SeqAIJ *)A->data;
794 
795   PetscFunctionBegin;
796   PetscCall(MatSeqAIJGetArrayRead(A, &av));
797   if (reuse == MAT_INITIAL_MATRIX) {
798     nz = aa->nz;
799     ai = aa->i;
800     aj = aa->j;
801     PetscCall(PetscMalloc2(nz, &row, nz, &col));
802     for (PetscCount i = k = 0; i < M; i++) {
803       rnz = ai[i + 1] - ai[i];
804       ajj = aj + ai[i];
805       for (PetscCount j = 0; j < rnz; j++) {
806         PetscCall(PetscMUMPSIntCast(i + shift, &row[k]));
807         PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[k]));
808         k++;
809       }
810     }
811     mumps->val = (PetscScalar *)av;
812     mumps->irn = row;
813     mumps->jcn = col;
814     mumps->nnz = nz;
815   } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, av, aa->nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqaij_seqaij(), so one needs to copy the memory */
816   else mumps->val = (PetscScalar *)av;                                           /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */
817   PetscCall(MatSeqAIJRestoreArrayRead(A, &av));
818   PetscFunctionReturn(PETSC_SUCCESS);
819 }
820 
821 static PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
822 {
823   PetscCount     nz, i, j, k, r;
824   Mat_SeqSELL   *a = (Mat_SeqSELL *)A->data;
825   PetscMUMPSInt *row, *col;
826 
827   PetscFunctionBegin;
828   nz = a->sliidx[a->totalslices];
829   if (reuse == MAT_INITIAL_MATRIX) {
830     PetscCall(PetscMalloc2(nz, &row, nz, &col));
831     for (i = k = 0; i < a->totalslices; i++) {
832       for (j = a->sliidx[i], r = 0; j < a->sliidx[i + 1]; j++, r = ((r + 1) & 0x07)) PetscCall(PetscMUMPSIntCast(8 * i + r + shift, &row[k++]));
833     }
834     for (i = 0; i < nz; i++) PetscCall(PetscMUMPSIntCast(a->colidx[i] + shift, &col[i]));
835     mumps->irn = row;
836     mumps->jcn = col;
837     mumps->nnz = nz;
838     mumps->val = a->val;
839   } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, a->val, nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqsell_seqaij(), so one needs to copy the memory */
840   else mumps->val = a->val;                                                      /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */
841   PetscFunctionReturn(PETSC_SUCCESS);
842 }
843 
844 static PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
845 {
846   Mat_SeqBAIJ    *aa = (Mat_SeqBAIJ *)A->data;
847   const PetscInt *ai, *aj, *ajj, bs2 = aa->bs2;
848   PetscCount      M, nz = bs2 * aa->nz, idx = 0, rnz, i, j, k, m;
849   PetscInt        bs;
850   PetscMUMPSInt  *row, *col;
851 
852   PetscFunctionBegin;
853   if (reuse == MAT_INITIAL_MATRIX) {
854     PetscCall(MatGetBlockSize(A, &bs));
855     M  = A->rmap->N / bs;
856     ai = aa->i;
857     aj = aa->j;
858     PetscCall(PetscMalloc2(nz, &row, nz, &col));
859     for (i = 0; i < M; i++) {
860       ajj = aj + ai[i];
861       rnz = ai[i + 1] - ai[i];
862       for (k = 0; k < rnz; k++) {
863         for (j = 0; j < bs; j++) {
864           for (m = 0; m < bs; m++) {
865             PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[idx]));
866             PetscCall(PetscMUMPSIntCast(bs * ajj[k] + j + shift, &col[idx]));
867             idx++;
868           }
869         }
870       }
871     }
872     mumps->irn = row;
873     mumps->jcn = col;
874     mumps->nnz = nz;
875     mumps->val = aa->a;
876   } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, aa->a, nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqbaij_seqaij(), so one needs to copy the memory */
877   else mumps->val = aa->a;                                                      /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */
878   PetscFunctionReturn(PETSC_SUCCESS);
879 }
880 
881 static PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
882 {
883   const PetscInt *ai, *aj, *ajj;
884   PetscInt        bs;
885   PetscCount      nz, rnz, i, j, k, m;
886   PetscMUMPSInt  *row, *col;
887   PetscScalar    *val;
888   Mat_SeqSBAIJ   *aa  = (Mat_SeqSBAIJ *)A->data;
889   const PetscInt  bs2 = aa->bs2, mbs = aa->mbs;
890 #if defined(PETSC_USE_COMPLEX)
891   PetscBool isset, hermitian;
892 #endif
893 
894   PetscFunctionBegin;
895 #if defined(PETSC_USE_COMPLEX)
896   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
897   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
898 #endif
899   ai = aa->i;
900   aj = aa->j;
901   PetscCall(MatGetBlockSize(A, &bs));
902   if (reuse == MAT_INITIAL_MATRIX) {
903     const PetscCount alloc_size = aa->nz * bs2;
904 
905     PetscCall(PetscMalloc2(alloc_size, &row, alloc_size, &col));
906     if (bs > 1) {
907       PetscCall(PetscMalloc1(alloc_size, &mumps->val_alloc));
908       mumps->val = mumps->val_alloc;
909     } else {
910       mumps->val = aa->a;
911     }
912     mumps->irn = row;
913     mumps->jcn = col;
914   } else {
915     row = mumps->irn;
916     col = mumps->jcn;
917   }
918   val = mumps->val;
919 
920   nz = 0;
921   if (bs > 1) {
922     for (i = 0; i < mbs; i++) {
923       rnz = ai[i + 1] - ai[i];
924       ajj = aj + ai[i];
925       for (j = 0; j < rnz; j++) {
926         for (k = 0; k < bs; k++) {
927           for (m = 0; m < bs; m++) {
928             if (ajj[j] > i || k >= m) {
929               if (reuse == MAT_INITIAL_MATRIX) {
930                 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[nz]));
931                 PetscCall(PetscMUMPSIntCast(ajj[j] * bs + k + shift, &col[nz]));
932               }
933               val[nz++] = aa->a[(ai[i] + j) * bs2 + m + k * bs];
934             }
935           }
936         }
937       }
938     }
939   } else if (reuse == MAT_INITIAL_MATRIX) {
940     for (i = 0; i < mbs; i++) {
941       rnz = ai[i + 1] - ai[i];
942       ajj = aj + ai[i];
943       for (j = 0; j < rnz; j++) {
944         PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
945         PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz]));
946         nz++;
947       }
948     }
949     PetscCheck(nz == aa->nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different numbers of nonzeros %" PetscCount_FMT " != %" PetscInt_FMT, nz, aa->nz);
950   } else if (mumps->nest_vals)
951     PetscCall(PetscArraycpy(mumps->val, aa->a, aa->nz)); /* bs == 1 and MAT_REUSE_MATRIX, MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqsbaij_seqsbaij(), so one needs to copy the memory */
952   else mumps->val = aa->a;                               /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */
953   if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz;
954   PetscFunctionReturn(PETSC_SUCCESS);
955 }
956 
957 static PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
958 {
959   const PetscInt    *ai, *aj, *ajj, *adiag, M = A->rmap->n;
960   PetscCount         nz, rnz, i, j;
961   const PetscScalar *av, *v1;
962   PetscScalar       *val;
963   PetscMUMPSInt     *row, *col;
964   Mat_SeqAIJ        *aa = (Mat_SeqAIJ *)A->data;
965   PetscBool          diagDense;
966 #if defined(PETSC_USE_COMPLEX)
967   PetscBool hermitian, isset;
968 #endif
969 
970   PetscFunctionBegin;
971 #if defined(PETSC_USE_COMPLEX)
972   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
973   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
974 #endif
975   PetscCall(MatSeqAIJGetArrayRead(A, &av));
976   ai = aa->i;
977   aj = aa->j;
978   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, &diagDense));
979   if (reuse == MAT_INITIAL_MATRIX) {
980     /* count nz in the upper triangular part of A */
981     nz = 0;
982     if (!diagDense) {
983       for (i = 0; i < M; i++) {
984         if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
985           for (j = ai[i]; j < ai[i + 1]; j++) {
986             if (aj[j] < i) continue;
987             nz++;
988           }
989         } else {
990           nz += ai[i + 1] - adiag[i];
991         }
992       }
993     } else {
994       for (i = 0; i < M; i++) nz += ai[i + 1] - adiag[i];
995     }
996     PetscCall(PetscMalloc2(nz, &row, nz, &col));
997     PetscCall(PetscMalloc1(nz, &val));
998     mumps->nnz = nz;
999     mumps->irn = row;
1000     mumps->jcn = col;
1001     mumps->val = mumps->val_alloc = val;
1002 
1003     nz = 0;
1004     if (!diagDense) {
1005       for (i = 0; i < M; i++) {
1006         if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
1007           for (j = ai[i]; j < ai[i + 1]; j++) {
1008             if (aj[j] < i) continue;
1009             PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
1010             PetscCall(PetscMUMPSIntCast(aj[j] + shift, &col[nz]));
1011             val[nz] = av[j];
1012             nz++;
1013           }
1014         } else {
1015           rnz = ai[i + 1] - adiag[i];
1016           ajj = aj + adiag[i];
1017           v1  = av + adiag[i];
1018           for (j = 0; j < rnz; j++) {
1019             PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
1020             PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz]));
1021             val[nz++] = v1[j];
1022           }
1023         }
1024       }
1025     } else {
1026       for (i = 0; i < M; i++) {
1027         rnz = ai[i + 1] - adiag[i];
1028         ajj = aj + adiag[i];
1029         v1  = av + adiag[i];
1030         for (j = 0; j < rnz; j++) {
1031           PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
1032           PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz]));
1033           val[nz++] = v1[j];
1034         }
1035       }
1036     }
1037   } else {
1038     nz  = 0;
1039     val = mumps->val;
1040     if (!diagDense) {
1041       for (i = 0; i < M; i++) {
1042         if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
1043           for (j = ai[i]; j < ai[i + 1]; j++) {
1044             if (aj[j] < i) continue;
1045             val[nz++] = av[j];
1046           }
1047         } else {
1048           rnz = ai[i + 1] - adiag[i];
1049           v1  = av + adiag[i];
1050           for (j = 0; j < rnz; j++) val[nz++] = v1[j];
1051         }
1052       }
1053     } else {
1054       for (i = 0; i < M; i++) {
1055         rnz = ai[i + 1] - adiag[i];
1056         v1  = av + adiag[i];
1057         for (j = 0; j < rnz; j++) val[nz++] = v1[j];
1058       }
1059     }
1060   }
1061   PetscCall(MatSeqAIJRestoreArrayRead(A, &av));
1062   PetscFunctionReturn(PETSC_SUCCESS);
1063 }
1064 
1065 static PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1066 {
1067   const PetscInt    *ai, *aj, *bi, *bj, *garray, *ajj, *bjj;
1068   PetscInt           bs;
1069   PetscCount         rstart, nz, i, j, k, m, jj, irow, countA, countB;
1070   PetscMUMPSInt     *row, *col;
1071   const PetscScalar *av, *bv, *v1, *v2;
1072   PetscScalar       *val;
1073   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ *)A->data;
1074   Mat_SeqSBAIJ      *aa  = (Mat_SeqSBAIJ *)mat->A->data;
1075   Mat_SeqBAIJ       *bb  = (Mat_SeqBAIJ *)mat->B->data;
1076   const PetscInt     bs2 = aa->bs2, mbs = aa->mbs;
1077 #if defined(PETSC_USE_COMPLEX)
1078   PetscBool hermitian, isset;
1079 #endif
1080 
1081   PetscFunctionBegin;
1082 #if defined(PETSC_USE_COMPLEX)
1083   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
1084   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
1085 #endif
1086   PetscCall(MatGetBlockSize(A, &bs));
1087   rstart = A->rmap->rstart;
1088   ai     = aa->i;
1089   aj     = aa->j;
1090   bi     = bb->i;
1091   bj     = bb->j;
1092   av     = aa->a;
1093   bv     = bb->a;
1094 
1095   garray = mat->garray;
1096 
1097   if (reuse == MAT_INITIAL_MATRIX) {
1098     nz = (aa->nz + bb->nz) * bs2; /* just a conservative estimate */
1099     PetscCall(PetscMalloc2(nz, &row, nz, &col));
1100     PetscCall(PetscMalloc1(nz, &val));
1101     /* can not decide the exact mumps->nnz now because of the SBAIJ */
1102     mumps->irn = row;
1103     mumps->jcn = col;
1104     mumps->val = mumps->val_alloc = val;
1105   } else {
1106     val = mumps->val;
1107   }
1108 
1109   jj   = 0;
1110   irow = rstart;
1111   for (i = 0; i < mbs; i++) {
1112     ajj    = aj + ai[i]; /* ptr to the beginning of this row */
1113     countA = ai[i + 1] - ai[i];
1114     countB = bi[i + 1] - bi[i];
1115     bjj    = bj + bi[i];
1116     v1     = av + ai[i] * bs2;
1117     v2     = bv + bi[i] * bs2;
1118 
1119     if (bs > 1) {
1120       /* A-part */
1121       for (j = 0; j < countA; j++) {
1122         for (k = 0; k < bs; k++) {
1123           for (m = 0; m < bs; m++) {
1124             if (rstart + ajj[j] * bs > irow || k >= m) {
1125               if (reuse == MAT_INITIAL_MATRIX) {
1126                 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj]));
1127                 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] * bs + k + shift, &col[jj]));
1128               }
1129               val[jj++] = v1[j * bs2 + m + k * bs];
1130             }
1131           }
1132         }
1133       }
1134 
1135       /* B-part */
1136       for (j = 0; j < countB; j++) {
1137         for (k = 0; k < bs; k++) {
1138           for (m = 0; m < bs; m++) {
1139             if (reuse == MAT_INITIAL_MATRIX) {
1140               PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj]));
1141               PetscCall(PetscMUMPSIntCast(garray[bjj[j]] * bs + k + shift, &col[jj]));
1142             }
1143             val[jj++] = v2[j * bs2 + m + k * bs];
1144           }
1145         }
1146       }
1147     } else {
1148       /* A-part */
1149       for (j = 0; j < countA; j++) {
1150         if (reuse == MAT_INITIAL_MATRIX) {
1151           PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
1152           PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj]));
1153         }
1154         val[jj++] = v1[j];
1155       }
1156 
1157       /* B-part */
1158       for (j = 0; j < countB; j++) {
1159         if (reuse == MAT_INITIAL_MATRIX) {
1160           PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
1161           PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]));
1162         }
1163         val[jj++] = v2[j];
1164       }
1165     }
1166     irow += bs;
1167   }
1168   if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = jj;
1169   PetscFunctionReturn(PETSC_SUCCESS);
1170 }
1171 
1172 static PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1173 {
1174   const PetscInt    *ai, *aj, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj;
1175   PetscCount         rstart, cstart, nz, i, j, jj, irow, countA, countB;
1176   PetscMUMPSInt     *row, *col;
1177   const PetscScalar *av, *bv, *v1, *v2;
1178   PetscScalar       *val;
1179   Mat                Ad, Ao;
1180   Mat_SeqAIJ        *aa;
1181   Mat_SeqAIJ        *bb;
1182 
1183   PetscFunctionBegin;
1184   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray));
1185   PetscCall(MatSeqAIJGetArrayRead(Ad, &av));
1186   PetscCall(MatSeqAIJGetArrayRead(Ao, &bv));
1187 
1188   aa = (Mat_SeqAIJ *)Ad->data;
1189   bb = (Mat_SeqAIJ *)Ao->data;
1190   ai = aa->i;
1191   aj = aa->j;
1192   bi = bb->i;
1193   bj = bb->j;
1194 
1195   rstart = A->rmap->rstart;
1196   cstart = A->cmap->rstart;
1197 
1198   if (reuse == MAT_INITIAL_MATRIX) {
1199     nz = (PetscCount)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */
1200     PetscCall(PetscMalloc2(nz, &row, nz, &col));
1201     PetscCall(PetscMalloc1(nz, &val));
1202     mumps->nnz = nz;
1203     mumps->irn = row;
1204     mumps->jcn = col;
1205     mumps->val = mumps->val_alloc = val;
1206   } else {
1207     val = mumps->val;
1208   }
1209 
1210   jj   = 0;
1211   irow = rstart;
1212   for (i = 0; i < m; i++) {
1213     ajj    = aj + ai[i]; /* ptr to the beginning of this row */
1214     countA = ai[i + 1] - ai[i];
1215     countB = bi[i + 1] - bi[i];
1216     bjj    = bj + bi[i];
1217     v1     = av + ai[i];
1218     v2     = bv + bi[i];
1219 
1220     /* A-part */
1221     for (j = 0; j < countA; j++) {
1222       if (reuse == MAT_INITIAL_MATRIX) {
1223         PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
1224         PetscCall(PetscMUMPSIntCast(cstart + ajj[j] + shift, &col[jj]));
1225       }
1226       val[jj++] = v1[j];
1227     }
1228 
1229     /* B-part */
1230     for (j = 0; j < countB; j++) {
1231       if (reuse == MAT_INITIAL_MATRIX) {
1232         PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
1233         PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]));
1234       }
1235       val[jj++] = v2[j];
1236     }
1237     irow++;
1238   }
1239   PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av));
1240   PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv));
1241   PetscFunctionReturn(PETSC_SUCCESS);
1242 }
1243 
1244 static PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1245 {
1246   Mat_MPIBAIJ       *mat = (Mat_MPIBAIJ *)A->data;
1247   Mat_SeqBAIJ       *aa  = (Mat_SeqBAIJ *)mat->A->data;
1248   Mat_SeqBAIJ       *bb  = (Mat_SeqBAIJ *)mat->B->data;
1249   const PetscInt    *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j, *ajj, *bjj;
1250   const PetscInt    *garray = mat->garray, mbs = mat->mbs, rstart = A->rmap->rstart, cstart = A->cmap->rstart;
1251   const PetscInt     bs2 = mat->bs2;
1252   PetscInt           bs;
1253   PetscCount         nz, i, j, k, n, jj, irow, countA, countB, idx;
1254   PetscMUMPSInt     *row, *col;
1255   const PetscScalar *av = aa->a, *bv = bb->a, *v1, *v2;
1256   PetscScalar       *val;
1257 
1258   PetscFunctionBegin;
1259   PetscCall(MatGetBlockSize(A, &bs));
1260   if (reuse == MAT_INITIAL_MATRIX) {
1261     nz = bs2 * (aa->nz + bb->nz);
1262     PetscCall(PetscMalloc2(nz, &row, nz, &col));
1263     PetscCall(PetscMalloc1(nz, &val));
1264     mumps->nnz = nz;
1265     mumps->irn = row;
1266     mumps->jcn = col;
1267     mumps->val = mumps->val_alloc = val;
1268   } else {
1269     val = mumps->val;
1270   }
1271 
1272   jj   = 0;
1273   irow = rstart;
1274   for (i = 0; i < mbs; i++) {
1275     countA = ai[i + 1] - ai[i];
1276     countB = bi[i + 1] - bi[i];
1277     ajj    = aj + ai[i];
1278     bjj    = bj + bi[i];
1279     v1     = av + bs2 * ai[i];
1280     v2     = bv + bs2 * bi[i];
1281 
1282     idx = 0;
1283     /* A-part */
1284     for (k = 0; k < countA; k++) {
1285       for (j = 0; j < bs; j++) {
1286         for (n = 0; n < bs; n++) {
1287           if (reuse == MAT_INITIAL_MATRIX) {
1288             PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj]));
1289             PetscCall(PetscMUMPSIntCast(cstart + bs * ajj[k] + j + shift, &col[jj]));
1290           }
1291           val[jj++] = v1[idx++];
1292         }
1293       }
1294     }
1295 
1296     idx = 0;
1297     /* B-part */
1298     for (k = 0; k < countB; k++) {
1299       for (j = 0; j < bs; j++) {
1300         for (n = 0; n < bs; n++) {
1301           if (reuse == MAT_INITIAL_MATRIX) {
1302             PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj]));
1303             PetscCall(PetscMUMPSIntCast(bs * garray[bjj[k]] + j + shift, &col[jj]));
1304           }
1305           val[jj++] = v2[idx++];
1306         }
1307       }
1308     }
1309     irow += bs;
1310   }
1311   PetscFunctionReturn(PETSC_SUCCESS);
1312 }
1313 
1314 static PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1315 {
1316   const PetscInt    *ai, *aj, *adiag, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj;
1317   PetscCount         rstart, nz, nza, nzb, i, j, jj, irow, countA, countB;
1318   PetscMUMPSInt     *row, *col;
1319   const PetscScalar *av, *bv, *v1, *v2;
1320   PetscScalar       *val;
1321   Mat                Ad, Ao;
1322   Mat_SeqAIJ        *aa;
1323   Mat_SeqAIJ        *bb;
1324 #if defined(PETSC_USE_COMPLEX)
1325   PetscBool hermitian, isset;
1326 #endif
1327 
1328   PetscFunctionBegin;
1329 #if defined(PETSC_USE_COMPLEX)
1330   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
1331   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
1332 #endif
1333   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray));
1334   PetscCall(MatSeqAIJGetArrayRead(Ad, &av));
1335   PetscCall(MatSeqAIJGetArrayRead(Ao, &bv));
1336 
1337   aa = (Mat_SeqAIJ *)Ad->data;
1338   bb = (Mat_SeqAIJ *)Ao->data;
1339   ai = aa->i;
1340   aj = aa->j;
1341   bi = bb->i;
1342   bj = bb->j;
1343   PetscCall(MatGetDiagonalMarkers_SeqAIJ(Ad, &adiag, NULL));
1344   rstart = A->rmap->rstart;
1345 
1346   if (reuse == MAT_INITIAL_MATRIX) {
1347     nza = 0; /* num of upper triangular entries in mat->A, including diagonals */
1348     nzb = 0; /* num of upper triangular entries in mat->B */
1349     for (i = 0; i < m; i++) {
1350       nza += (ai[i + 1] - adiag[i]);
1351       countB = bi[i + 1] - bi[i];
1352       bjj    = bj + bi[i];
1353       for (j = 0; j < countB; j++) {
1354         if (garray[bjj[j]] > rstart) nzb++;
1355       }
1356     }
1357 
1358     nz = nza + nzb; /* total nz of upper triangular part of mat */
1359     PetscCall(PetscMalloc2(nz, &row, nz, &col));
1360     PetscCall(PetscMalloc1(nz, &val));
1361     mumps->nnz = nz;
1362     mumps->irn = row;
1363     mumps->jcn = col;
1364     mumps->val = mumps->val_alloc = val;
1365   } else {
1366     val = mumps->val;
1367   }
1368 
1369   jj   = 0;
1370   irow = rstart;
1371   for (i = 0; i < m; i++) {
1372     ajj    = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */
1373     v1     = av + adiag[i];
1374     countA = ai[i + 1] - adiag[i];
1375     countB = bi[i + 1] - bi[i];
1376     bjj    = bj + bi[i];
1377     v2     = bv + bi[i];
1378 
1379     /* A-part */
1380     for (j = 0; j < countA; j++) {
1381       if (reuse == MAT_INITIAL_MATRIX) {
1382         PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
1383         PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj]));
1384       }
1385       val[jj++] = v1[j];
1386     }
1387 
1388     /* B-part */
1389     for (j = 0; j < countB; j++) {
1390       if (garray[bjj[j]] > rstart) {
1391         if (reuse == MAT_INITIAL_MATRIX) {
1392           PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
1393           PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]));
1394         }
1395         val[jj++] = v2[j];
1396       }
1397     }
1398     irow++;
1399   }
1400   PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av));
1401   PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv));
1402   PetscFunctionReturn(PETSC_SUCCESS);
1403 }
1404 
1405 static PetscErrorCode MatConvertToTriples_diagonal_xaij(Mat A, PETSC_UNUSED PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1406 {
1407   const PetscScalar *av;
1408   const PetscInt     M = A->rmap->n;
1409   PetscCount         i;
1410   PetscMUMPSInt     *row, *col;
1411   Vec                v;
1412 
1413   PetscFunctionBegin;
1414   PetscCall(MatDiagonalGetDiagonal(A, &v));
1415   PetscCall(VecGetArrayRead(v, &av));
1416   if (reuse == MAT_INITIAL_MATRIX) {
1417     PetscCall(PetscMalloc2(M, &row, M, &col));
1418     for (i = 0; i < M; i++) {
1419       PetscCall(PetscMUMPSIntCast(i + A->rmap->rstart, &row[i]));
1420       col[i] = row[i];
1421     }
1422     mumps->val = (PetscScalar *)av;
1423     mumps->irn = row;
1424     mumps->jcn = col;
1425     mumps->nnz = M;
1426   } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, av, M)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_diagonal_xaij(), so one needs to copy the memory */
1427   else mumps->val = (PetscScalar *)av;                                      /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */
1428   PetscCall(VecRestoreArrayRead(v, &av));
1429   PetscFunctionReturn(PETSC_SUCCESS);
1430 }
1431 
1432 static PetscErrorCode MatConvertToTriples_dense_xaij(Mat A, PETSC_UNUSED PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1433 {
1434   PetscScalar   *v;
1435   const PetscInt m = A->rmap->n, N = A->cmap->N;
1436   PetscInt       lda;
1437   PetscCount     i, j;
1438   PetscMUMPSInt *row, *col;
1439 
1440   PetscFunctionBegin;
1441   PetscCall(MatDenseGetArray(A, &v));
1442   PetscCall(MatDenseGetLDA(A, &lda));
1443   if (reuse == MAT_INITIAL_MATRIX) {
1444     PetscCall(PetscMalloc2(m * N, &row, m * N, &col));
1445     for (i = 0; i < m; i++) {
1446       col[i] = 0;
1447       PetscCall(PetscMUMPSIntCast(i + A->rmap->rstart, &row[i]));
1448     }
1449     for (j = 1; j < N; j++) {
1450       for (i = 0; i < m; i++) PetscCall(PetscMUMPSIntCast(j, col + i + m * j));
1451       PetscCall(PetscArraycpy(row + m * j, row + m * (j - 1), m));
1452     }
1453     if (lda == m) mumps->val = v;
1454     else {
1455       PetscCall(PetscMalloc1(m * N, &mumps->val));
1456       mumps->val_alloc = mumps->val;
1457       for (j = 0; j < N; j++) PetscCall(PetscArraycpy(mumps->val + m * j, v + lda * j, m));
1458     }
1459     mumps->irn = row;
1460     mumps->jcn = col;
1461     mumps->nnz = m * N;
1462   } else {
1463     if (lda == m && !mumps->nest_vals) mumps->val = v;
1464     else {
1465       for (j = 0; j < N; j++) PetscCall(PetscArraycpy(mumps->val + m * j, v + lda * j, m));
1466     }
1467   }
1468   PetscCall(MatDenseRestoreArray(A, &v));
1469   PetscFunctionReturn(PETSC_SUCCESS);
1470 }
1471 
1472 // If the input Mat (sub) is either MATTRANSPOSEVIRTUAL or MATHERMITIANTRANSPOSEVIRTUAL, this function gets the parent Mat until it is not a
1473 // MATTRANSPOSEVIRTUAL or MATHERMITIANTRANSPOSEVIRTUAL itself and returns the appropriate shift, scaling, and whether the parent Mat should be conjugated
1474 // and its rows and columns permuted
1475 // TODO FIXME: this should not be in this file and should instead be refactored where the same logic applies, e.g., MatAXPY_Dense_Nest()
1476 static PetscErrorCode MatGetTranspose_TransposeVirtual(Mat *sub, PetscBool *conjugate, PetscScalar *vshift, PetscScalar *vscale, PetscBool *swap)
1477 {
1478   Mat         A;
1479   PetscScalar s[2];
1480   PetscBool   isTrans, isHTrans, compare;
1481 
1482   PetscFunctionBegin;
1483   do {
1484     PetscCall(PetscObjectTypeCompare((PetscObject)*sub, MATTRANSPOSEVIRTUAL, &isTrans));
1485     if (isTrans) {
1486       PetscCall(MatTransposeGetMat(*sub, &A));
1487       isHTrans = PETSC_FALSE;
1488     } else {
1489       PetscCall(PetscObjectTypeCompare((PetscObject)*sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans));
1490       if (isHTrans) PetscCall(MatHermitianTransposeGetMat(*sub, &A));
1491     }
1492     compare = (PetscBool)(isTrans || isHTrans);
1493     if (compare) {
1494       if (vshift && vscale) {
1495         PetscCall(MatShellGetScalingShifts(*sub, s, s + 1, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
1496         if (!*conjugate) {
1497           *vshift += s[0] * *vscale;
1498           *vscale *= s[1];
1499         } else {
1500           *vshift += PetscConj(s[0]) * *vscale;
1501           *vscale *= PetscConj(s[1]);
1502         }
1503       }
1504       if (swap) *swap = (PetscBool)!*swap;
1505       if (isHTrans && conjugate) *conjugate = (PetscBool)!*conjugate;
1506       *sub = A;
1507     }
1508   } while (compare);
1509   PetscFunctionReturn(PETSC_SUCCESS);
1510 }
1511 
1512 static PetscErrorCode MatConvertToTriples_nest_xaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1513 {
1514   Mat     **mats;
1515   PetscInt  nr, nc;
1516   PetscBool chol = mumps->sym ? PETSC_TRUE : PETSC_FALSE;
1517 
1518   PetscFunctionBegin;
1519   PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats));
1520   if (reuse == MAT_INITIAL_MATRIX) {
1521     PetscMUMPSInt *irns, *jcns;
1522     PetscScalar   *vals;
1523     PetscCount     totnnz, cumnnz, maxnnz;
1524     PetscInt      *pjcns_w, Mbs = 0;
1525     IS            *rows, *cols;
1526     PetscInt     **rows_idx, **cols_idx;
1527 
1528     cumnnz = 0;
1529     maxnnz = 0;
1530     PetscCall(PetscMalloc2(nr * nc + 1, &mumps->nest_vals_start, nr * nc, &mumps->nest_convert_to_triples));
1531     for (PetscInt r = 0; r < nr; r++) {
1532       for (PetscInt c = 0; c < nc; c++) {
1533         Mat sub = mats[r][c];
1534 
1535         mumps->nest_convert_to_triples[r * nc + c] = NULL;
1536         if (chol && c < r) continue; /* skip lower-triangular block for Cholesky */
1537         if (sub) {
1538           PetscErrorCode (*convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *) = NULL;
1539           PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isDiag, isDense;
1540           MatInfo   info;
1541 
1542           PetscCall(MatGetTranspose_TransposeVirtual(&sub, NULL, NULL, NULL, NULL));
1543           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ));
1544           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ));
1545           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ));
1546           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ));
1547           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ));
1548           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ));
1549           PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATDIAGONAL, &isDiag));
1550           PetscCall(PetscObjectTypeCompareAny((PetscObject)sub, &isDense, MATSEQDENSE, MATMPIDENSE, NULL));
1551 
1552           if (chol) {
1553             if (r == c) {
1554               if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqsbaij;
1555               else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpisbaij;
1556               else if (isSeqSBAIJ) convert_to_triples = MatConvertToTriples_seqsbaij_seqsbaij;
1557               else if (isMPISBAIJ) convert_to_triples = MatConvertToTriples_mpisbaij_mpisbaij;
1558               else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij;
1559               else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij;
1560             } else {
1561               if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij;
1562               else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij;
1563               else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij;
1564               else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij;
1565               else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij;
1566               else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij;
1567             }
1568           } else {
1569             if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij;
1570             else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij;
1571             else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij;
1572             else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij;
1573             else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij;
1574             else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij;
1575           }
1576           PetscCheck(convert_to_triples, PetscObjectComm((PetscObject)sub), PETSC_ERR_SUP, "Not for block of type %s", ((PetscObject)sub)->type_name);
1577           mumps->nest_convert_to_triples[r * nc + c] = convert_to_triples;
1578           PetscCall(MatGetInfo(sub, MAT_LOCAL, &info));
1579           cumnnz += (PetscCount)info.nz_used; /* can be overestimated for Cholesky */
1580           maxnnz = PetscMax(maxnnz, info.nz_used);
1581         }
1582       }
1583     }
1584 
1585     /* Allocate total COO */
1586     totnnz = cumnnz;
1587     PetscCall(PetscMalloc2(totnnz, &irns, totnnz, &jcns));
1588     PetscCall(PetscMalloc1(totnnz, &vals));
1589 
1590     /* Handle rows and column maps
1591        We directly map rows and use an SF for the columns */
1592     PetscCall(PetscMalloc4(nr, &rows, nc, &cols, nr, &rows_idx, nc, &cols_idx));
1593     PetscCall(MatNestGetISs(A, rows, cols));
1594     for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetIndices(rows[r], (const PetscInt **)&rows_idx[r]));
1595     for (PetscInt c = 0; c < nc; c++) PetscCall(ISGetIndices(cols[c], (const PetscInt **)&cols_idx[c]));
1596     if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscMalloc1(maxnnz, &pjcns_w));
1597     else (void)maxnnz;
1598 
1599     cumnnz = 0;
1600     for (PetscInt r = 0; r < nr; r++) {
1601       for (PetscInt c = 0; c < nc; c++) {
1602         Mat             sub    = mats[r][c];
1603         const PetscInt *ridx   = rows_idx[r];
1604         const PetscInt *cidx   = cols_idx[c];
1605         PetscScalar     vscale = 1.0, vshift = 0.0;
1606         PetscInt        rst, size, bs;
1607         PetscSF         csf;
1608         PetscBool       conjugate = PETSC_FALSE, swap = PETSC_FALSE;
1609         PetscLayout     cmap;
1610         PetscInt        innz;
1611 
1612         mumps->nest_vals_start[r * nc + c] = cumnnz;
1613         if (c == r) {
1614           PetscCall(ISGetSize(rows[r], &size));
1615           if (!mumps->nest_convert_to_triples[r * nc + c]) {
1616             for (PetscInt c = 0; c < nc && !sub; ++c) sub = mats[r][c]; // diagonal Mat is NULL, so start over from the beginning of the current row
1617           }
1618           PetscCall(MatGetBlockSize(sub, &bs));
1619           Mbs += size / bs;
1620         }
1621         if (!mumps->nest_convert_to_triples[r * nc + c]) continue;
1622 
1623         /* Extract inner blocks if needed */
1624         PetscCall(MatGetTranspose_TransposeVirtual(&sub, &conjugate, &vshift, &vscale, &swap));
1625         PetscCheck(vshift == 0.0, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Nonzero shift in parent MatShell");
1626 
1627         /* Get column layout to map off-process columns */
1628         PetscCall(MatGetLayouts(sub, NULL, &cmap));
1629 
1630         /* Get row start to map on-process rows */
1631         PetscCall(MatGetOwnershipRange(sub, &rst, NULL));
1632 
1633         /* Directly use the mumps datastructure and use C ordering for now */
1634         PetscCall((*mumps->nest_convert_to_triples[r * nc + c])(sub, 0, MAT_INITIAL_MATRIX, mumps));
1635 
1636         /* Swap the role of rows and columns indices for transposed blocks
1637            since we need values with global final ordering */
1638         if (swap) {
1639           cidx = rows_idx[r];
1640           ridx = cols_idx[c];
1641         }
1642 
1643         /* Communicate column indices
1644            This could have been done with a single SF but it would have complicated the code a lot.
1645            But since we do it only once, we pay the price of setting up an SF for each block */
1646         if (PetscDefined(USE_64BIT_INDICES)) {
1647           for (PetscInt k = 0; k < mumps->nnz; k++) pjcns_w[k] = mumps->jcn[k];
1648         } else pjcns_w = (PetscInt *)mumps->jcn; /* This cast is needed only to silence warnings for 64bit integers builds */
1649         PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &csf));
1650         PetscCall(PetscIntCast(mumps->nnz, &innz));
1651         PetscCall(PetscSFSetGraphLayout(csf, cmap, innz, NULL, PETSC_OWN_POINTER, pjcns_w));
1652         PetscCall(PetscSFBcastBegin(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE));
1653         PetscCall(PetscSFBcastEnd(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE));
1654         PetscCall(PetscSFDestroy(&csf));
1655 
1656         /* Import indices: use direct map for rows and mapped indices for columns */
1657         if (swap) {
1658           for (PetscInt k = 0; k < mumps->nnz; k++) {
1659             PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &jcns[cumnnz + k]));
1660             PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &irns[cumnnz + k]));
1661           }
1662         } else {
1663           for (PetscInt k = 0; k < mumps->nnz; k++) {
1664             PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &irns[cumnnz + k]));
1665             PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &jcns[cumnnz + k]));
1666           }
1667         }
1668 
1669         /* Import values to full COO */
1670         if (conjugate) { /* conjugate the entries */
1671           PetscScalar *v = vals + cumnnz;
1672           for (PetscInt k = 0; k < mumps->nnz; k++) v[k] = vscale * PetscConj(mumps->val[k]);
1673         } else if (vscale != 1.0) {
1674           PetscScalar *v = vals + cumnnz;
1675           for (PetscInt k = 0; k < mumps->nnz; k++) v[k] = vscale * mumps->val[k];
1676         } else PetscCall(PetscArraycpy(vals + cumnnz, mumps->val, mumps->nnz));
1677 
1678         /* Shift new starting point and sanity check */
1679         cumnnz += mumps->nnz;
1680         PetscCheck(cumnnz <= totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected number of nonzeros %" PetscCount_FMT " != %" PetscCount_FMT, cumnnz, totnnz);
1681 
1682         /* Free scratch memory */
1683         PetscCall(PetscFree2(mumps->irn, mumps->jcn));
1684         PetscCall(PetscFree(mumps->val_alloc));
1685         mumps->val = NULL;
1686         mumps->nnz = 0;
1687       }
1688     }
1689     if (mumps->id.ICNTL(15) == 1) {
1690       if (Mbs != A->rmap->N) {
1691         PetscMPIInt rank, size;
1692 
1693         PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
1694         PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
1695         if (rank == 0) {
1696           PetscInt shift = 0;
1697 
1698           PetscCall(PetscMUMPSIntCast(Mbs, &mumps->id.nblk));
1699           PetscCall(PetscFree(mumps->id.blkptr));
1700           PetscCall(PetscMalloc1(Mbs + 1, &mumps->id.blkptr));
1701           mumps->id.blkptr[0] = 1;
1702           for (PetscInt i = 0; i < size; ++i) {
1703             for (PetscInt r = 0; r < nr; r++) {
1704               Mat             sub = mats[r][r];
1705               const PetscInt *ranges;
1706               PetscInt        bs;
1707 
1708               for (PetscInt c = 0; c < nc && !sub; ++c) sub = mats[r][c]; // diagonal Mat is NULL, so start over from the beginning of the current row
1709               PetscCall(MatGetOwnershipRanges(sub, &ranges));
1710               PetscCall(MatGetBlockSize(sub, &bs));
1711               for (PetscInt j = 0, start = mumps->id.blkptr[shift] + bs; j < ranges[i + 1] - ranges[i]; j += bs) PetscCall(PetscMUMPSIntCast(start + j, mumps->id.blkptr + shift + j / bs + 1));
1712               shift += (ranges[i + 1] - ranges[i]) / bs;
1713             }
1714           }
1715         }
1716       } else mumps->id.ICNTL(15) = 0;
1717     }
1718     if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscFree(pjcns_w));
1719     for (PetscInt r = 0; r < nr; r++) PetscCall(ISRestoreIndices(rows[r], (const PetscInt **)&rows_idx[r]));
1720     for (PetscInt c = 0; c < nc; c++) PetscCall(ISRestoreIndices(cols[c], (const PetscInt **)&cols_idx[c]));
1721     PetscCall(PetscFree4(rows, cols, rows_idx, cols_idx));
1722     if (!chol) PetscCheck(cumnnz == totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different number of nonzeros %" PetscCount_FMT " != %" PetscCount_FMT, cumnnz, totnnz);
1723     mumps->nest_vals_start[nr * nc] = cumnnz;
1724 
1725     /* Set pointers for final MUMPS data structure */
1726     mumps->nest_vals = vals;
1727     mumps->val_alloc = NULL; /* do not use val_alloc since it may be reallocated with the OMP callpath */
1728     mumps->val       = vals;
1729     mumps->irn       = irns;
1730     mumps->jcn       = jcns;
1731     mumps->nnz       = cumnnz;
1732   } else {
1733     PetscScalar *oval = mumps->nest_vals;
1734     for (PetscInt r = 0; r < nr; r++) {
1735       for (PetscInt c = 0; c < nc; c++) {
1736         PetscBool   conjugate = PETSC_FALSE;
1737         Mat         sub       = mats[r][c];
1738         PetscScalar vscale = 1.0, vshift = 0.0;
1739         PetscInt    midx = r * nc + c;
1740 
1741         if (!mumps->nest_convert_to_triples[midx]) continue;
1742         PetscCall(MatGetTranspose_TransposeVirtual(&sub, &conjugate, &vshift, &vscale, NULL));
1743         PetscCheck(vshift == 0.0, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Nonzero shift in parent MatShell");
1744         mumps->val = oval + mumps->nest_vals_start[midx];
1745         PetscCall((*mumps->nest_convert_to_triples[midx])(sub, shift, MAT_REUSE_MATRIX, mumps));
1746         if (conjugate) {
1747           PetscCount nnz = mumps->nest_vals_start[midx + 1] - mumps->nest_vals_start[midx];
1748           for (PetscCount k = 0; k < nnz; k++) mumps->val[k] = vscale * PetscConj(mumps->val[k]);
1749         } else if (vscale != 1.0) {
1750           PetscCount nnz = mumps->nest_vals_start[midx + 1] - mumps->nest_vals_start[midx];
1751           for (PetscCount k = 0; k < nnz; k++) mumps->val[k] *= vscale;
1752         }
1753       }
1754     }
1755     mumps->val = oval;
1756   }
1757   PetscFunctionReturn(PETSC_SUCCESS);
1758 }
1759 
1760 static PetscErrorCode MatDestroy_MUMPS(Mat A)
1761 {
1762   Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;
1763 
1764   PetscFunctionBegin;
1765   PetscCall(PetscFree(mumps->id.isol_loc));
1766   PetscCall(VecScatterDestroy(&mumps->scat_rhs));
1767   PetscCall(VecScatterDestroy(&mumps->scat_sol));
1768   PetscCall(VecDestroy(&mumps->b_seq));
1769   PetscCall(VecDestroy(&mumps->x_seq));
1770   PetscCall(PetscFree(mumps->id.perm_in));
1771   PetscCall(PetscFree(mumps->id.blkvar));
1772   PetscCall(PetscFree(mumps->id.blkptr));
1773   PetscCall(PetscFree2(mumps->irn, mumps->jcn));
1774   PetscCall(PetscFree(mumps->val_alloc));
1775   PetscCall(PetscFree(mumps->info));
1776   PetscCall(PetscFree(mumps->ICNTL_pre));
1777   PetscCall(PetscFree(mumps->CNTL_pre));
1778   PetscCall(MatMumpsResetSchur_Private(mumps));
1779   if (mumps->id.job != JOB_NULL) { /* cannot call PetscMUMPS_c() if JOB_INIT has never been called for this instance */
1780     mumps->id.job = JOB_END;
1781     PetscMUMPS_c(mumps);
1782     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in termination: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
1783     if (mumps->mumps_comm != MPI_COMM_NULL) {
1784       if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) PetscCallMPI(MPI_Comm_free(&mumps->mumps_comm));
1785       else PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A), &mumps->mumps_comm));
1786     }
1787   }
1788   PetscCall(MatMumpsFreeInternalID(&mumps->id));
1789 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1790   if (mumps->use_petsc_omp_support) {
1791     PetscCall(PetscOmpCtrlDestroy(&mumps->omp_ctrl));
1792     PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf));
1793     PetscCall(PetscFree3(mumps->rhs_nrow, mumps->rhs_recvcounts, mumps->rhs_disps));
1794   }
1795 #endif
1796   PetscCall(PetscFree(mumps->ia_alloc));
1797   PetscCall(PetscFree(mumps->ja_alloc));
1798   PetscCall(PetscFree(mumps->recvcount));
1799   PetscCall(PetscFree(mumps->reqs));
1800   PetscCall(PetscFree(mumps->irhs_loc));
1801   PetscCall(PetscFree2(mumps->nest_vals_start, mumps->nest_convert_to_triples));
1802   PetscCall(PetscFree(mumps->nest_vals));
1803   PetscCall(PetscFree(A->data));
1804 
1805   /* clear composed functions */
1806   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
1807   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorSetSchurIS_C", NULL));
1808   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorCreateSchurComplement_C", NULL));
1809   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetIcntl_C", NULL));
1810   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetIcntl_C", NULL));
1811   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetCntl_C", NULL));
1812   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetCntl_C", NULL));
1813   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfo_C", NULL));
1814   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfog_C", NULL));
1815   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfo_C", NULL));
1816   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfog_C", NULL));
1817   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetNullPivots_C", NULL));
1818   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverse_C", NULL));
1819   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverseTranspose_C", NULL));
1820   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetBlk_C", NULL));
1821   PetscFunctionReturn(PETSC_SUCCESS);
1822 }
1823 
1824 /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */
1825 static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A, PetscInt nrhs, const PetscScalar *array)
1826 {
1827   Mat_MUMPS        *mumps   = (Mat_MUMPS *)A->data;
1828   const PetscMPIInt ompsize = mumps->omp_comm_size;
1829   PetscInt          i, m, M, rstart;
1830 
1831   PetscFunctionBegin;
1832   PetscCall(MatGetSize(A, &M, NULL));
1833   PetscCall(MatGetLocalSize(A, &m, NULL));
1834   PetscCheck(M <= PETSC_MUMPS_INT_MAX, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt");
1835   if (ompsize == 1) {
1836     if (!mumps->irhs_loc) {
1837       mumps->nloc_rhs = (PetscMUMPSInt)m;
1838       PetscCall(PetscMalloc1(m, &mumps->irhs_loc));
1839       PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
1840       for (i = 0; i < m; i++) PetscCall(PetscMUMPSIntCast(rstart + i + 1, &mumps->irhs_loc[i])); /* use 1-based indices */
1841     }
1842     PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, m * nrhs, array, mumps->id.precision, &mumps->id.rhs_loc_len, &mumps->id.rhs_loc));
1843   } else {
1844 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1845     const PetscInt *ranges;
1846     PetscMPIInt     j, k, sendcount, *petsc_ranks, *omp_ranks;
1847     MPI_Group       petsc_group, omp_group;
1848     PetscScalar    *recvbuf = NULL;
1849 
1850     if (mumps->is_omp_master) {
1851       /* Lazily initialize the omp stuff for distributed rhs */
1852       if (!mumps->irhs_loc) {
1853         PetscCall(PetscMalloc2(ompsize, &omp_ranks, ompsize, &petsc_ranks));
1854         PetscCall(PetscMalloc3(ompsize, &mumps->rhs_nrow, ompsize, &mumps->rhs_recvcounts, ompsize, &mumps->rhs_disps));
1855         PetscCallMPI(MPI_Comm_group(mumps->petsc_comm, &petsc_group));
1856         PetscCallMPI(MPI_Comm_group(mumps->omp_comm, &omp_group));
1857         for (j = 0; j < ompsize; j++) omp_ranks[j] = j;
1858         PetscCallMPI(MPI_Group_translate_ranks(omp_group, ompsize, omp_ranks, petsc_group, petsc_ranks));
1859 
1860         /* Populate mumps->irhs_loc[], rhs_nrow[] */
1861         mumps->nloc_rhs = 0;
1862         PetscCall(MatGetOwnershipRanges(A, &ranges));
1863         for (j = 0; j < ompsize; j++) {
1864           mumps->rhs_nrow[j] = ranges[petsc_ranks[j] + 1] - ranges[petsc_ranks[j]];
1865           mumps->nloc_rhs += mumps->rhs_nrow[j];
1866         }
1867         PetscCall(PetscMalloc1(mumps->nloc_rhs, &mumps->irhs_loc));
1868         for (j = k = 0; j < ompsize; j++) {
1869           for (i = ranges[petsc_ranks[j]]; i < ranges[petsc_ranks[j] + 1]; i++, k++) PetscCall(PetscMUMPSIntCast(i + 1, &mumps->irhs_loc[k])); /* uses 1-based indices */
1870         }
1871 
1872         PetscCall(PetscFree2(omp_ranks, petsc_ranks));
1873         PetscCallMPI(MPI_Group_free(&petsc_group));
1874         PetscCallMPI(MPI_Group_free(&omp_group));
1875       }
1876 
1877       /* Realloc buffers when current nrhs is bigger than what we have met */
1878       if (nrhs > mumps->max_nrhs) {
1879         PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf));
1880         PetscCall(PetscMalloc2(mumps->nloc_rhs * nrhs, &mumps->rhs_loc, mumps->nloc_rhs * nrhs, &mumps->rhs_recvbuf));
1881         mumps->max_nrhs = nrhs;
1882       }
1883 
1884       /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */
1885       for (j = 0; j < ompsize; j++) PetscCall(PetscMPIIntCast(mumps->rhs_nrow[j] * nrhs, &mumps->rhs_recvcounts[j]));
1886       mumps->rhs_disps[0] = 0;
1887       for (j = 1; j < ompsize; j++) {
1888         mumps->rhs_disps[j] = mumps->rhs_disps[j - 1] + mumps->rhs_recvcounts[j - 1];
1889         PetscCheck(mumps->rhs_disps[j] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscMPIInt overflow!");
1890       }
1891       recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */
1892     }
1893 
1894     PetscCall(PetscMPIIntCast(m * nrhs, &sendcount));
1895     PetscCallMPI(MPI_Gatherv(array, sendcount, MPIU_SCALAR, recvbuf, mumps->rhs_recvcounts, mumps->rhs_disps, MPIU_SCALAR, 0, mumps->omp_comm));
1896 
1897     if (mumps->is_omp_master) {
1898       if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */
1899         PetscScalar *dst, *dstbase = mumps->rhs_loc;
1900         for (j = 0; j < ompsize; j++) {
1901           const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j];
1902           dst                    = dstbase;
1903           for (i = 0; i < nrhs; i++) {
1904             PetscCall(PetscArraycpy(dst, src, mumps->rhs_nrow[j]));
1905             src += mumps->rhs_nrow[j];
1906             dst += mumps->nloc_rhs;
1907           }
1908           dstbase += mumps->rhs_nrow[j];
1909         }
1910       }
1911       PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nloc_rhs * nrhs, mumps->rhs_loc, mumps->id.precision, &mumps->id.rhs_loc_len, &mumps->id.rhs_loc));
1912     }
1913 #endif /* PETSC_HAVE_OPENMP_SUPPORT */
1914   }
1915   mumps->id.nrhs     = (PetscMUMPSInt)nrhs;
1916   mumps->id.nloc_rhs = (PetscMUMPSInt)mumps->nloc_rhs;
1917   mumps->id.lrhs_loc = mumps->nloc_rhs;
1918   mumps->id.irhs_loc = mumps->irhs_loc;
1919   PetscFunctionReturn(PETSC_SUCCESS);
1920 }
1921 
1922 static PetscErrorCode MatSolve_MUMPS(Mat A, Vec b, Vec x)
1923 {
1924   Mat_MUMPS         *mumps  = (Mat_MUMPS *)A->data;
1925   const PetscScalar *barray = NULL;
1926   PetscScalar       *array;
1927   IS                 is_iden, is_petsc;
1928   PetscInt           i;
1929   PetscBool          second_solve = PETSC_FALSE;
1930   static PetscBool   cite1 = PETSC_FALSE, cite2 = PETSC_FALSE;
1931 
1932   PetscFunctionBegin;
1933   PetscCall(PetscCitationsRegister("@article{MUMPS01,\n  author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n  title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n  journal = {SIAM "
1934                                    "Journal on Matrix Analysis and Applications},\n  volume = {23},\n  number = {1},\n  pages = {15--41},\n  year = {2001}\n}\n",
1935                                    &cite1));
1936   PetscCall(PetscCitationsRegister("@article{MUMPS02,\n  author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n  title = {Hybrid scheduling for the parallel solution of linear systems},\n  journal = {Parallel "
1937                                    "Computing},\n  volume = {32},\n  number = {2},\n  pages = {136--156},\n  year = {2006}\n}\n",
1938                                    &cite2));
1939 
1940   PetscCall(VecFlag(x, A->factorerrortype));
1941   if (A->factorerrortype) {
1942     PetscCall(PetscInfo(A, "MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1943     PetscFunctionReturn(PETSC_SUCCESS);
1944   }
1945 
1946   mumps->id.nrhs = 1;
1947   if (mumps->petsc_size > 1) {
1948     if (mumps->ICNTL20 == 10) {
1949       mumps->id.ICNTL(20) = 10; /* dense distributed RHS, need to set rhs_loc[], irhs_loc[] */
1950       PetscCall(VecGetArrayRead(b, &barray));
1951       PetscCall(MatMumpsSetUpDistRHSInfo(A, 1, barray));
1952     } else {
1953       mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential b_seq vector*/
1954       PetscCall(VecScatterBegin(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD));
1955       PetscCall(VecScatterEnd(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD));
1956       if (!mumps->myid) {
1957         PetscCall(VecGetArray(mumps->b_seq, &array));
1958         PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->b_seq->map->n, array, mumps->id.precision, &mumps->id.rhs_len, &mumps->id.rhs));
1959       }
1960     }
1961   } else { /* petsc_size == 1, use MUMPS's dense centralized RHS feature, so that we don't need to bother with isol_loc[] to get the solution */
1962     mumps->id.ICNTL(20) = 0;
1963     PetscCall(VecCopy(b, x));
1964     PetscCall(VecGetArray(x, &array));
1965     PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, x->map->n, array, mumps->id.precision, &mumps->id.rhs_len, &mumps->id.rhs));
1966   }
1967 
1968   /*
1969      handle condensation step of Schur complement (if any)
1970      We set by default ICNTL(26) == -1 when Schur indices have been provided by the user.
1971      According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase
1972      Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system.
1973      This requires an extra call to PetscMUMPS_c and the computation of the factors for S
1974   */
1975   if (mumps->id.size_schur > 0) {
1976     PetscCheck(mumps->petsc_size <= 1, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc");
1977     if (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2) {
1978       second_solve = PETSC_TRUE;
1979       PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); // allocate id.redrhs
1980       mumps->id.ICNTL(26) = 1;                                /* condensation phase */
1981     } else if (mumps->id.ICNTL(26) == 1) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE));
1982   }
1983 
1984   mumps->id.job = JOB_SOLVE;
1985   PetscMUMPS_c(mumps); // reduced solve, put solution in id.redrhs
1986   PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2));
1987 
1988   /* handle expansion step of Schur complement (if any) */
1989   if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE));
1990   else if (mumps->id.ICNTL(26) == 1) { // condense the right hand side
1991     PetscCall(MatMumpsSolveSchur_Private(A));
1992     for (i = 0; i < mumps->id.size_schur; ++i) array[mumps->id.listvar_schur[i] - 1] = ID_FIELD_GET(mumps->id, redrhs, i);
1993   }
1994 
1995   if (mumps->petsc_size > 1) { /* convert mumps distributed solution to PETSc mpi x */
1996     if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) {
1997       /* when id.ICNTL(9) changes, the contents of ilsol_loc may change (not its size, lsol_loc), recreates scat_sol */
1998       PetscCall(VecScatterDestroy(&mumps->scat_sol));
1999     }
2000     if (!mumps->scat_sol) { /* create scatter scat_sol */
2001       PetscInt *isol2_loc = NULL;
2002       PetscCall(ISCreateStride(PETSC_COMM_SELF, mumps->id.lsol_loc, 0, 1, &is_iden)); /* from */
2003       PetscCall(PetscMalloc1(mumps->id.lsol_loc, &isol2_loc));
2004       for (i = 0; i < mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i] - 1;                        /* change Fortran style to C style */
2005       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, mumps->id.lsol_loc, isol2_loc, PETSC_OWN_POINTER, &is_petsc)); /* to */
2006       PetscCall(VecScatterCreate(mumps->x_seq, is_iden, x, is_petsc, &mumps->scat_sol));
2007       PetscCall(ISDestroy(&is_iden));
2008       PetscCall(ISDestroy(&is_petsc));
2009       mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */
2010     }
2011 
2012     PetscScalar *xarray;
2013     PetscCall(VecGetArray(mumps->x_seq, &xarray));
2014     PetscCall(MatMumpsCastMumpsScalarArray(mumps->id.lsol_loc, mumps->id.precision, mumps->id.sol_loc, xarray));
2015     PetscCall(VecRestoreArray(mumps->x_seq, &xarray));
2016     PetscCall(VecScatterBegin(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD));
2017     PetscCall(VecScatterEnd(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD));
2018 
2019     if (mumps->ICNTL20 == 10) { // distributed RHS
2020       PetscCall(VecRestoreArrayRead(b, &barray));
2021     } else if (!mumps->myid) { // centralized RHS
2022       PetscCall(VecRestoreArray(mumps->b_seq, &array));
2023     }
2024   } else {
2025     // id.rhs has the solution in mumps precision
2026     PetscCall(MatMumpsCastMumpsScalarArray(x->map->n, mumps->id.precision, mumps->id.rhs, array));
2027     PetscCall(VecRestoreArray(x, &array));
2028   }
2029 
2030   PetscCall(PetscLogFlops(2.0 * PetscMax(0, (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n)));
2031   PetscFunctionReturn(PETSC_SUCCESS);
2032 }
2033 
2034 static PetscErrorCode MatSolveTranspose_MUMPS(Mat A, Vec b, Vec x)
2035 {
2036   Mat_MUMPS          *mumps = (Mat_MUMPS *)A->data;
2037   const PetscMUMPSInt value = mumps->id.ICNTL(9);
2038 
2039   PetscFunctionBegin;
2040   mumps->id.ICNTL(9) = 0;
2041   PetscCall(MatSolve_MUMPS(A, b, x));
2042   mumps->id.ICNTL(9) = value;
2043   PetscFunctionReturn(PETSC_SUCCESS);
2044 }
2045 
2046 static PetscErrorCode MatMatSolve_MUMPS(Mat A, Mat B, Mat X)
2047 {
2048   Mat                Bt = NULL;
2049   PetscBool          denseX, denseB, flg, flgT;
2050   Mat_MUMPS         *mumps = (Mat_MUMPS *)A->data;
2051   PetscInt           i, nrhs, M, nrhsM;
2052   PetscScalar       *array;
2053   const PetscScalar *barray;
2054   PetscInt           lsol_loc, nlsol_loc, *idxx, iidx = 0;
2055   PetscMUMPSInt     *isol_loc, *isol_loc_save;
2056   PetscScalar       *sol_loc;
2057   void              *sol_loc_save;
2058   PetscCount         sol_loc_len_save;
2059   IS                 is_to, is_from;
2060   PetscInt           k, proc, j, m, myrstart;
2061   const PetscInt    *rstart;
2062   Vec                v_mpi, msol_loc;
2063   VecScatter         scat_sol;
2064   Vec                b_seq;
2065   VecScatter         scat_rhs;
2066   PetscScalar       *aa;
2067   PetscInt           spnr, *ia, *ja;
2068   Mat_MPIAIJ        *b = NULL;
2069 
2070   PetscFunctionBegin;
2071   PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &denseX, MATSEQDENSE, MATMPIDENSE, NULL));
2072   PetscCheck(denseX, PetscObjectComm((PetscObject)X), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix");
2073 
2074   PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &denseB, MATSEQDENSE, MATMPIDENSE, NULL));
2075 
2076   if (denseB) {
2077     PetscCheck(B->rmap->n == X->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Matrix B and X must have same row distribution");
2078     mumps->id.ICNTL(20) = 0; /* dense RHS */
2079   } else {                   /* sparse B */
2080     PetscCheck(X != B, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_IDN, "X and B must be different matrices");
2081     PetscCall(PetscObjectTypeCompare((PetscObject)B, MATTRANSPOSEVIRTUAL, &flgT));
2082     PetscCheck(flgT, PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_WRONG, "Matrix B must be MATTRANSPOSEVIRTUAL matrix");
2083     PetscCall(MatShellGetScalingShifts(B, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
2084     /* input B is transpose of actual RHS matrix,
2085      because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */
2086     PetscCall(MatTransposeGetMat(B, &Bt));
2087     mumps->id.ICNTL(20) = 1; /* sparse RHS */
2088   }
2089 
2090   PetscCall(MatGetSize(B, &M, &nrhs));
2091   PetscCall(PetscIntMultError(nrhs, M, &nrhsM));
2092   mumps->id.nrhs = (PetscMUMPSInt)nrhs;
2093   mumps->id.lrhs = (PetscMUMPSInt)M;
2094 
2095   if (mumps->petsc_size == 1) { // handle this easy case specially and return early
2096     PetscScalar *aa;
2097     PetscInt     spnr, *ia, *ja;
2098     PetscBool    second_solve = PETSC_FALSE;
2099 
2100     PetscCall(MatDenseGetArray(X, &array));
2101     if (denseB) {
2102       /* copy B to X */
2103       PetscCall(MatDenseGetArrayRead(B, &barray));
2104       PetscCall(PetscArraycpy(array, barray, nrhsM));
2105       PetscCall(MatDenseRestoreArrayRead(B, &barray));
2106     } else { /* sparse B */
2107       PetscCall(MatSeqAIJGetArray(Bt, &aa));
2108       PetscCall(MatGetRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
2109       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
2110       PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs));
2111       PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->id.nz_rhs, aa, mumps->id.precision, &mumps->id.rhs_sparse_len, &mumps->id.rhs_sparse));
2112     }
2113     PetscCall(MatMumpsMakeMumpsScalarArray(denseB, nrhsM, array, mumps->id.precision, &mumps->id.rhs_len, &mumps->id.rhs));
2114 
2115     /* handle condensation step of Schur complement (if any) */
2116     if (mumps->id.size_schur > 0) {
2117       if (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2) {
2118         second_solve = PETSC_TRUE;
2119         PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); // allocate id.redrhs
2120         mumps->id.ICNTL(26) = 1;                                /* condensation phase, i.e, to solve id.redrhs */
2121       } else if (mumps->id.ICNTL(26) == 1) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE));
2122     }
2123 
2124     mumps->id.job = JOB_SOLVE;
2125     PetscMUMPS_c(mumps);
2126     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2));
2127 
2128     /* handle expansion step of Schur complement (if any) */
2129     if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE));
2130     else if (mumps->id.ICNTL(26) == 1) { // condense the right hand side
2131       PetscCall(MatMumpsSolveSchur_Private(A));
2132       for (j = 0; j < nrhs; ++j)
2133         for (i = 0; i < mumps->id.size_schur; ++i) array[mumps->id.listvar_schur[i] - 1 + j * M] = ID_FIELD_GET(mumps->id, redrhs, i + j * mumps->id.lredrhs);
2134     }
2135 
2136     if (!denseB) { /* sparse B, restore ia, ja */
2137       PetscCall(MatSeqAIJRestoreArray(Bt, &aa));
2138       PetscCall(MatRestoreRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
2139       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure");
2140     }
2141 
2142     // no matter dense B or sparse B, solution is in id.rhs; convert it to array of X.
2143     PetscCall(MatMumpsCastMumpsScalarArray(nrhsM, mumps->id.precision, mumps->id.rhs, array));
2144     PetscCall(MatDenseRestoreArray(X, &array));
2145     PetscFunctionReturn(PETSC_SUCCESS);
2146   }
2147 
2148   /* parallel case: MUMPS requires rhs B to be centralized on the host! */
2149   PetscCheck(!mumps->id.ICNTL(19), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc");
2150 
2151   /* create msol_loc to hold mumps local solution */
2152   isol_loc_save         = mumps->id.isol_loc; /* save these, as we want to reuse them in MatSolve() */
2153   sol_loc_save          = mumps->id.sol_loc;
2154   sol_loc_len_save      = mumps->id.sol_loc_len;
2155   mumps->id.isol_loc    = NULL; // an init state
2156   mumps->id.sol_loc     = NULL;
2157   mumps->id.sol_loc_len = 0;
2158 
2159   lsol_loc = mumps->id.lsol_loc;
2160   PetscCall(PetscIntMultError(nrhs, lsol_loc, &nlsol_loc)); /* length of sol_loc */
2161   PetscCall(PetscMalloc2(nlsol_loc, &sol_loc, lsol_loc, &isol_loc));
2162   PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_FALSE, nlsol_loc, sol_loc, mumps->id.precision, &mumps->id.sol_loc_len, &mumps->id.sol_loc));
2163   mumps->id.isol_loc = isol_loc;
2164 
2165   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nlsol_loc, (PetscScalar *)sol_loc, &msol_loc));
2166 
2167   if (denseB) {
2168     if (mumps->ICNTL20 == 10) {
2169       mumps->id.ICNTL(20) = 10; /* dense distributed RHS */
2170       PetscCall(MatDenseGetArrayRead(B, &barray));
2171       PetscCall(MatMumpsSetUpDistRHSInfo(A, nrhs, barray)); // put barray to rhs_loc
2172       PetscCall(MatDenseRestoreArrayRead(B, &barray));
2173       PetscCall(MatGetLocalSize(B, &m, NULL));
2174       PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhsM, NULL, &v_mpi)); // will scatter the solution to v_mpi, which wraps X
2175     } else {
2176       mumps->id.ICNTL(20) = 0; /* dense centralized RHS */
2177       /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in
2178         very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank
2179         0, re-arrange B into desired order, which is a local operation.
2180       */
2181 
2182       /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */
2183       /* wrap dense rhs matrix B into a vector v_mpi */
2184       PetscCall(MatGetLocalSize(B, &m, NULL));
2185       PetscCall(MatDenseGetArrayRead(B, &barray));
2186       PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhsM, barray, &v_mpi));
2187       PetscCall(MatDenseRestoreArrayRead(B, &barray));
2188 
2189       /* scatter v_mpi to b_seq in proc[0]. With ICNTL(20) = 0, MUMPS requires rhs to be centralized on the host! */
2190       if (!mumps->myid) {
2191         PetscInt *idx;
2192         /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */
2193         PetscCall(PetscMalloc1(nrhsM, &idx));
2194         PetscCall(MatGetOwnershipRanges(B, &rstart));
2195         for (proc = 0, k = 0; proc < mumps->petsc_size; proc++) {
2196           for (j = 0; j < nrhs; j++) {
2197             for (i = rstart[proc]; i < rstart[proc + 1]; i++) idx[k++] = j * M + i;
2198           }
2199         }
2200 
2201         PetscCall(VecCreateSeq(PETSC_COMM_SELF, nrhsM, &b_seq));
2202         PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nrhsM, idx, PETSC_OWN_POINTER, &is_to));
2203         PetscCall(ISCreateStride(PETSC_COMM_SELF, nrhsM, 0, 1, &is_from));
2204       } else {
2205         PetscCall(VecCreateSeq(PETSC_COMM_SELF, 0, &b_seq));
2206         PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_to));
2207         PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_from));
2208       }
2209 
2210       PetscCall(VecScatterCreate(v_mpi, is_from, b_seq, is_to, &scat_rhs));
2211       PetscCall(VecScatterBegin(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD));
2212       PetscCall(ISDestroy(&is_to));
2213       PetscCall(ISDestroy(&is_from));
2214       PetscCall(VecScatterEnd(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD));
2215 
2216       if (!mumps->myid) { /* define rhs on the host */
2217         PetscCall(VecGetArrayRead(b_seq, &barray));
2218         PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, nrhsM, barray, mumps->id.precision, &mumps->id.rhs_len, &mumps->id.rhs));
2219         PetscCall(VecRestoreArrayRead(b_seq, &barray));
2220       }
2221     }
2222   } else { /* sparse B */
2223     b = (Mat_MPIAIJ *)Bt->data;
2224 
2225     /* wrap dense X into a vector v_mpi */
2226     PetscCall(MatGetLocalSize(X, &m, NULL));
2227     PetscCall(MatDenseGetArrayRead(X, &barray));
2228     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)X), 1, nrhs * m, nrhsM, barray, &v_mpi));
2229     PetscCall(MatDenseRestoreArrayRead(X, &barray));
2230 
2231     if (!mumps->myid) {
2232       PetscCall(MatSeqAIJGetArray(b->A, &aa));
2233       PetscCall(MatGetRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
2234       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
2235       PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs));
2236       PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, ((Mat_SeqAIJ *)b->A->data)->nz, aa, mumps->id.precision, &mumps->id.rhs_sparse_len, &mumps->id.rhs_sparse));
2237     } else {
2238       mumps->id.irhs_ptr    = NULL;
2239       mumps->id.irhs_sparse = NULL;
2240       mumps->id.nz_rhs      = 0;
2241       if (mumps->id.rhs_sparse_len) {
2242         PetscCall(PetscFree(mumps->id.rhs_sparse));
2243         mumps->id.rhs_sparse_len = 0;
2244       }
2245     }
2246   }
2247 
2248   /* solve phase */
2249   mumps->id.job = JOB_SOLVE;
2250   PetscMUMPS_c(mumps);
2251   PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
2252 
2253   /* scatter mumps distributed solution to PETSc vector v_mpi, which shares local arrays with solution matrix X */
2254   PetscCall(MatDenseGetArray(X, &array));
2255   PetscCall(VecPlaceArray(v_mpi, array));
2256 
2257   /* create scatter scat_sol */
2258   PetscCall(MatGetOwnershipRanges(X, &rstart));
2259   /* iidx: index for scatter mumps solution to PETSc X */
2260 
2261   PetscCall(ISCreateStride(PETSC_COMM_SELF, nlsol_loc, 0, 1, &is_from));
2262   PetscCall(PetscMalloc1(nlsol_loc, &idxx));
2263   for (i = 0; i < lsol_loc; i++) {
2264     isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */
2265 
2266     for (proc = 0; proc < mumps->petsc_size; proc++) {
2267       if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc + 1]) {
2268         myrstart = rstart[proc];
2269         k        = isol_loc[i] - myrstart;          /* local index on 1st column of PETSc vector X */
2270         iidx     = k + myrstart * nrhs;             /* maps mumps isol_loc[i] to PETSc index in X */
2271         m        = rstart[proc + 1] - rstart[proc]; /* rows of X for this proc */
2272         break;
2273       }
2274     }
2275 
2276     for (j = 0; j < nrhs; j++) idxx[i + j * lsol_loc] = iidx + j * m;
2277   }
2278   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nlsol_loc, idxx, PETSC_COPY_VALUES, &is_to));
2279   PetscCall(MatMumpsCastMumpsScalarArray(nlsol_loc, mumps->id.precision, mumps->id.sol_loc, sol_loc)); // Vec msol_loc is created with sol_loc[]
2280   PetscCall(VecScatterCreate(msol_loc, is_from, v_mpi, is_to, &scat_sol));
2281   PetscCall(VecScatterBegin(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD));
2282   PetscCall(ISDestroy(&is_from));
2283   PetscCall(ISDestroy(&is_to));
2284   PetscCall(VecScatterEnd(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD));
2285   PetscCall(MatDenseRestoreArray(X, &array));
2286 
2287   if (mumps->id.sol_loc_len) { // in case we allocated intermediate buffers
2288     mumps->id.sol_loc_len = 0;
2289     PetscCall(PetscFree(mumps->id.sol_loc));
2290   }
2291 
2292   // restore old values
2293   mumps->id.sol_loc     = sol_loc_save;
2294   mumps->id.sol_loc_len = sol_loc_len_save;
2295   mumps->id.isol_loc    = isol_loc_save;
2296 
2297   PetscCall(PetscFree2(sol_loc, isol_loc));
2298   PetscCall(PetscFree(idxx));
2299   PetscCall(VecDestroy(&msol_loc));
2300   PetscCall(VecDestroy(&v_mpi));
2301   if (!denseB) {
2302     if (!mumps->myid) {
2303       b = (Mat_MPIAIJ *)Bt->data;
2304       PetscCall(MatSeqAIJRestoreArray(b->A, &aa));
2305       PetscCall(MatRestoreRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
2306       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure");
2307     }
2308   } else {
2309     if (mumps->ICNTL20 == 0) {
2310       PetscCall(VecDestroy(&b_seq));
2311       PetscCall(VecScatterDestroy(&scat_rhs));
2312     }
2313   }
2314   PetscCall(VecScatterDestroy(&scat_sol));
2315   PetscCall(PetscLogFlops(nrhs * PetscMax(0, 2.0 * (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n)));
2316   PetscFunctionReturn(PETSC_SUCCESS);
2317 }
2318 
2319 static PetscErrorCode MatMatSolveTranspose_MUMPS(Mat A, Mat B, Mat X)
2320 {
2321   Mat_MUMPS          *mumps = (Mat_MUMPS *)A->data;
2322   const PetscMUMPSInt value = mumps->id.ICNTL(9);
2323 
2324   PetscFunctionBegin;
2325   mumps->id.ICNTL(9) = 0;
2326   PetscCall(MatMatSolve_MUMPS(A, B, X));
2327   mumps->id.ICNTL(9) = value;
2328   PetscFunctionReturn(PETSC_SUCCESS);
2329 }
2330 
2331 static PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A, Mat Bt, Mat X)
2332 {
2333   PetscBool flg;
2334   Mat       B;
2335 
2336   PetscFunctionBegin;
2337   PetscCall(PetscObjectTypeCompareAny((PetscObject)Bt, &flg, MATSEQAIJ, MATMPIAIJ, NULL));
2338   PetscCheck(flg, PetscObjectComm((PetscObject)Bt), PETSC_ERR_ARG_WRONG, "Matrix Bt must be MATAIJ matrix");
2339 
2340   /* Create B=Bt^T that uses Bt's data structure */
2341   PetscCall(MatCreateTranspose(Bt, &B));
2342 
2343   PetscCall(MatMatSolve_MUMPS(A, B, X));
2344   PetscCall(MatDestroy(&B));
2345   PetscFunctionReturn(PETSC_SUCCESS);
2346 }
2347 
2348 #if !defined(PETSC_USE_COMPLEX)
2349 /*
2350   input:
2351    F:        numeric factor
2352   output:
2353    nneg:     total number of negative pivots
2354    nzero:    total number of zero pivots
2355    npos:     (global dimension of F) - nneg - nzero
2356 */
2357 static PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos)
2358 {
2359   Mat_MUMPS  *mumps = (Mat_MUMPS *)F->data;
2360   PetscMPIInt size;
2361 
2362   PetscFunctionBegin;
2363   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)F), &size));
2364   /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */
2365   PetscCheck(size <= 1 || mumps->id.ICNTL(13) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia", mumps->id.INFOG(13));
2366 
2367   if (nneg) *nneg = mumps->id.INFOG(12);
2368   if (nzero || npos) {
2369     PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection");
2370     if (nzero) *nzero = mumps->id.INFOG(28);
2371     if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28));
2372   }
2373   PetscFunctionReturn(PETSC_SUCCESS);
2374 }
2375 #endif
2376 
2377 static PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse, Mat_MUMPS *mumps)
2378 {
2379   PetscMPIInt    nreqs;
2380   PetscMUMPSInt *irn, *jcn;
2381   PetscMPIInt    count;
2382   PetscCount     totnnz, remain;
2383   const PetscInt osize = mumps->omp_comm_size;
2384   PetscScalar   *val;
2385 
2386   PetscFunctionBegin;
2387   if (osize > 1) {
2388     if (reuse == MAT_INITIAL_MATRIX) {
2389       /* master first gathers counts of nonzeros to receive */
2390       if (mumps->is_omp_master) PetscCall(PetscMalloc1(osize, &mumps->recvcount));
2391       PetscCallMPI(MPI_Gather(&mumps->nnz, 1, MPIU_INT64, mumps->recvcount, 1, MPIU_INT64, 0 /*master*/, mumps->omp_comm));
2392 
2393       /* Then each computes number of send/recvs */
2394       if (mumps->is_omp_master) {
2395         /* Start from 1 since self communication is not done in MPI */
2396         nreqs = 0;
2397         for (PetscMPIInt i = 1; i < osize; i++) nreqs += (mumps->recvcount[i] + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX;
2398       } else {
2399         nreqs = (PetscMPIInt)(((mumps->nnz + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX));
2400       }
2401       PetscCall(PetscMalloc1(nreqs * 3, &mumps->reqs)); /* Triple the requests since we send irn, jcn and val separately */
2402 
2403       /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others.
2404          MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz
2405          might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size
2406          is very small, the current approach should have no extra overhead compared to MPI_Gatherv.
2407        */
2408       nreqs = 0; /* counter for actual send/recvs */
2409       if (mumps->is_omp_master) {
2410         totnnz = 0;
2411 
2412         for (PetscMPIInt i = 0; i < osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */
2413         PetscCall(PetscMalloc2(totnnz, &irn, totnnz, &jcn));
2414         PetscCall(PetscMalloc1(totnnz, &val));
2415 
2416         /* Self communication */
2417         PetscCall(PetscArraycpy(irn, mumps->irn, mumps->nnz));
2418         PetscCall(PetscArraycpy(jcn, mumps->jcn, mumps->nnz));
2419         PetscCall(PetscArraycpy(val, mumps->val, mumps->nnz));
2420 
2421         /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */
2422         PetscCall(PetscFree2(mumps->irn, mumps->jcn));
2423         PetscCall(PetscFree(mumps->val_alloc));
2424         mumps->nnz = totnnz;
2425         mumps->irn = irn;
2426         mumps->jcn = jcn;
2427         mumps->val = mumps->val_alloc = val;
2428 
2429         irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */
2430         jcn += mumps->recvcount[0];
2431         val += mumps->recvcount[0];
2432 
2433         /* Remote communication */
2434         for (PetscMPIInt i = 1; i < osize; i++) {
2435           count  = (PetscMPIInt)PetscMin(mumps->recvcount[i], (PetscMPIInt)PETSC_MPI_INT_MAX);
2436           remain = mumps->recvcount[i] - count;
2437           while (count > 0) {
2438             PetscCallMPI(MPIU_Irecv(irn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2439             PetscCallMPI(MPIU_Irecv(jcn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2440             PetscCallMPI(MPIU_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2441             irn += count;
2442             jcn += count;
2443             val += count;
2444             count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX);
2445             remain -= count;
2446           }
2447         }
2448       } else {
2449         irn    = mumps->irn;
2450         jcn    = mumps->jcn;
2451         val    = mumps->val;
2452         count  = (PetscMPIInt)PetscMin(mumps->nnz, (PetscMPIInt)PETSC_MPI_INT_MAX);
2453         remain = mumps->nnz - count;
2454         while (count > 0) {
2455           PetscCallMPI(MPIU_Isend(irn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2456           PetscCallMPI(MPIU_Isend(jcn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2457           PetscCallMPI(MPIU_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2458           irn += count;
2459           jcn += count;
2460           val += count;
2461           count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX);
2462           remain -= count;
2463         }
2464       }
2465     } else {
2466       nreqs = 0;
2467       if (mumps->is_omp_master) {
2468         val = mumps->val + mumps->recvcount[0];
2469         for (PetscMPIInt i = 1; i < osize; i++) { /* Remote communication only since self data is already in place */
2470           count  = (PetscMPIInt)PetscMin(mumps->recvcount[i], (PetscMPIInt)PETSC_MPI_INT_MAX);
2471           remain = mumps->recvcount[i] - count;
2472           while (count > 0) {
2473             PetscCallMPI(MPIU_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2474             val += count;
2475             count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX);
2476             remain -= count;
2477           }
2478         }
2479       } else {
2480         val    = mumps->val;
2481         count  = (PetscMPIInt)PetscMin(mumps->nnz, (PetscMPIInt)PETSC_MPI_INT_MAX);
2482         remain = mumps->nnz - count;
2483         while (count > 0) {
2484           PetscCallMPI(MPIU_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2485           val += count;
2486           count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX);
2487           remain -= count;
2488         }
2489       }
2490     }
2491     PetscCallMPI(MPI_Waitall(nreqs, mumps->reqs, MPI_STATUSES_IGNORE));
2492     mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */
2493   }
2494   PetscFunctionReturn(PETSC_SUCCESS);
2495 }
2496 
2497 static PetscErrorCode MatFactorNumeric_MUMPS(Mat F, Mat A, PETSC_UNUSED const MatFactorInfo *info)
2498 {
2499   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2500 
2501   PetscFunctionBegin;
2502   if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) {
2503     if (mumps->id.INFOG(1) == -6) PetscCall(PetscInfo(A, "MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2504     PetscCall(PetscInfo(A, "MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2505     PetscFunctionReturn(PETSC_SUCCESS);
2506   }
2507 
2508   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps));
2509   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX, mumps));
2510 
2511   /* numerical factorization phase */
2512   mumps->id.job = JOB_FACTNUMERIC;
2513   if (!mumps->id.ICNTL(18)) { /* A is centralized */
2514     if (!mumps->myid) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_len, &mumps->id.a));
2515   } else {
2516     PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_loc_len, &mumps->id.a_loc));
2517   }
2518 
2519   if (F->schur) {
2520     const PetscScalar *array;
2521     MUMPS_INT          size = mumps->id.size_schur;
2522     PetscCall(MatDenseGetArrayRead(F->schur, &array));
2523     PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_FALSE, size * size, array, mumps->id.precision, &mumps->id.schur_len, &mumps->id.schur));
2524     PetscCall(MatDenseRestoreArrayRead(F->schur, &array));
2525   }
2526 
2527   PetscMUMPS_c(mumps);
2528   if (mumps->id.INFOG(1) < 0) {
2529     PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2));
2530     if (mumps->id.INFOG(1) == -10) {
2531       PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2532       F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
2533     } else if (mumps->id.INFOG(1) == -13) {
2534       PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, cannot allocate required memory %d megabytes\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2535       F->factorerrortype = MAT_FACTOR_OUTMEMORY;
2536     } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) {
2537       PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d, problem with work array\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2538       F->factorerrortype = MAT_FACTOR_OUTMEMORY;
2539     } else {
2540       PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2541       F->factorerrortype = MAT_FACTOR_OTHER;
2542     }
2543   }
2544   PetscCheck(mumps->myid || mumps->id.ICNTL(16) <= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in numerical factorization: ICNTL(16)=%d " MUMPS_MANUALS, mumps->id.INFOG(16));
2545 
2546   F->assembled = PETSC_TRUE;
2547 
2548   if (F->schur) { /* reset Schur status to unfactored */
2549 #if defined(PETSC_HAVE_CUDA)
2550     F->schur->offloadmask = PETSC_OFFLOAD_CPU;
2551 #endif
2552     PetscScalar *array;
2553     PetscCall(MatDenseGetArray(F->schur, &array));
2554     PetscCall(MatMumpsCastMumpsScalarArray(mumps->id.size_schur * mumps->id.size_schur, mumps->id.precision, mumps->id.schur, array));
2555     PetscCall(MatDenseRestoreArray(F->schur, &array));
2556     if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
2557       mumps->id.ICNTL(19) = 2;
2558       PetscCall(MatTranspose(F->schur, MAT_INPLACE_MATRIX, &F->schur));
2559     }
2560     PetscCall(MatFactorRestoreSchurComplement(F, NULL, MAT_FACTOR_SCHUR_UNFACTORED));
2561   }
2562 
2563   /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */
2564   if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3;
2565 
2566   if (!mumps->is_omp_master) mumps->id.INFO(23) = 0;
2567   // MUMPS userguide: ISOL_loc should be allocated by the user between the factorization and the
2568   // solve phases. On exit from the solve phase, ISOL_loc(i) contains the index of the variables for
2569   // which the solution (in SOL_loc) is available on the local processor.
2570   // If successive calls to the solve phase (JOB= 3) are performed for a given matrix, ISOL_loc will
2571   // normally have the same contents for each of these calls. The only exception is the case of
2572   // unsymmetric matrices (SYM=1) when the transpose option is changed (see ICNTL(9)) and non
2573   // symmetric row/column exchanges (see ICNTL(6)) have occurred before the solve phase.
2574   if (mumps->petsc_size > 1) {
2575     PetscInt     lsol_loc;
2576     PetscScalar *array;
2577 
2578     /* distributed solution; Create x_seq=sol_loc for repeated use */
2579     if (mumps->x_seq) {
2580       PetscCall(VecScatterDestroy(&mumps->scat_sol));
2581       PetscCall(PetscFree(mumps->id.isol_loc));
2582       PetscCall(VecDestroy(&mumps->x_seq));
2583     }
2584     lsol_loc = mumps->id.INFO(23); /* length of sol_loc */
2585     PetscCall(PetscMalloc1(lsol_loc, &mumps->id.isol_loc));
2586     PetscCall(VecCreateSeq(PETSC_COMM_SELF, lsol_loc, &mumps->x_seq));
2587     PetscCall(VecGetArray(mumps->x_seq, &array));
2588     PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_FALSE, lsol_loc, array, mumps->id.precision, &mumps->id.sol_loc_len, &mumps->id.sol_loc));
2589     PetscCall(VecRestoreArray(mumps->x_seq, &array));
2590     mumps->id.lsol_loc = (PetscMUMPSInt)lsol_loc;
2591   }
2592   PetscCall(PetscLogFlops((double)ID_RINFO_GET(mumps->id, 2)));
2593   PetscFunctionReturn(PETSC_SUCCESS);
2594 }
2595 
2596 /* Sets MUMPS options from the options database */
2597 static PetscErrorCode MatSetFromOptions_MUMPS(Mat F, Mat A)
2598 {
2599   Mat_MUMPS    *mumps = (Mat_MUMPS *)F->data;
2600   PetscReal     cntl;
2601   PetscMUMPSInt icntl = 0, size, *listvar_schur;
2602   PetscInt      info[80], i, ninfo = 80, rbs, cbs;
2603   PetscBool     flg   = PETSC_FALSE;
2604   PetscBool     schur = mumps->id.icntl ? (PetscBool)(mumps->id.ICNTL(26) == -1) : (PetscBool)(mumps->ICNTL26 == -1);
2605   void         *arr;
2606 
2607   PetscFunctionBegin;
2608   PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "MUMPS Options", "Mat");
2609   if (mumps->id.job == JOB_NULL) { /* MatSetFromOptions_MUMPS() has never been called before */
2610     PetscPrecision precision  = PetscDefined(USE_REAL_SINGLE) ? PETSC_PRECISION_SINGLE : PETSC_PRECISION_DOUBLE;
2611     PetscInt       nthreads   = 0;
2612     PetscInt       nCNTL_pre  = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0;
2613     PetscInt       nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0;
2614     PetscMUMPSInt  nblk, *blkvar, *blkptr;
2615 
2616     mumps->petsc_comm = PetscObjectComm((PetscObject)A);
2617     PetscCallMPI(MPI_Comm_size(mumps->petsc_comm, &mumps->petsc_size));
2618     PetscCallMPI(MPI_Comm_rank(mumps->petsc_comm, &mumps->myid)); /* "if (!myid)" still works even if mumps_comm is different */
2619 
2620     PetscCall(PetscOptionsName("-mat_mumps_use_omp_threads", "Convert MPI processes into OpenMP threads", "None", &mumps->use_petsc_omp_support));
2621     if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */
2622     /* do not use PetscOptionsInt() so that the option -mat_mumps_use_omp_threads is not displayed twice in the help */
2623     PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)F)->prefix, "-mat_mumps_use_omp_threads", &nthreads, NULL));
2624     if (mumps->use_petsc_omp_support) {
2625       PetscCheck(!schur, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use -%smat_mumps_use_omp_threads with the Schur complement feature", ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : "");
2626 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
2627       PetscCall(PetscOmpCtrlCreate(mumps->petsc_comm, nthreads, &mumps->omp_ctrl));
2628       PetscCall(PetscOmpCtrlGetOmpComms(mumps->omp_ctrl, &mumps->omp_comm, &mumps->mumps_comm, &mumps->is_omp_master));
2629 #else
2630       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "The system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual",
2631               ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : "");
2632 #endif
2633     } else {
2634       mumps->omp_comm      = PETSC_COMM_SELF;
2635       mumps->mumps_comm    = mumps->petsc_comm;
2636       mumps->is_omp_master = PETSC_TRUE;
2637     }
2638     PetscCallMPI(MPI_Comm_size(mumps->omp_comm, &mumps->omp_comm_size));
2639     mumps->reqs = NULL;
2640     mumps->tag  = 0;
2641 
2642     if (mumps->mumps_comm != MPI_COMM_NULL) {
2643       if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) {
2644         /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */
2645         MPI_Comm comm;
2646         PetscCallMPI(MPI_Comm_dup(mumps->mumps_comm, &comm));
2647         mumps->mumps_comm = comm;
2648       } else PetscCall(PetscCommGetComm(mumps->petsc_comm, &mumps->mumps_comm));
2649     }
2650 
2651     mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm);
2652     mumps->id.job          = JOB_INIT;
2653     mumps->id.par          = 1; /* host participates factorizaton and solve */
2654     mumps->id.sym          = mumps->sym;
2655 
2656     size          = mumps->id.size_schur;
2657     arr           = mumps->id.schur;
2658     listvar_schur = mumps->id.listvar_schur;
2659     nblk          = mumps->id.nblk;
2660     blkvar        = mumps->id.blkvar;
2661     blkptr        = mumps->id.blkptr;
2662     if (PetscDefined(USE_DEBUG)) {
2663       for (PetscInt i = 0; i < size; i++)
2664         PetscCheck(listvar_schur[i] - 1 >= 0 && listvar_schur[i] - 1 < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid Schur index at position %" PetscInt_FMT "! %" PetscInt_FMT " must be in [0, %" PetscInt_FMT ")", i, (PetscInt)listvar_schur[i] - 1,
2665                    A->rmap->N);
2666     }
2667 
2668     PetscCall(PetscOptionsEnum("-pc_precision", "Precision used by MUMPS", "MATSOLVERMUMPS", PetscPrecisionTypes, (PetscEnum)precision, (PetscEnum *)&precision, NULL));
2669     PetscCheck(precision == PETSC_PRECISION_SINGLE || precision == PETSC_PRECISION_DOUBLE, PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "MUMPS does not support %s precision", PetscPrecisionTypes[precision]);
2670     PetscCheck(precision == PETSC_SCALAR_PRECISION || PetscDefined(HAVE_MUMPS_MIXED_PRECISION), PetscObjectComm((PetscObject)F), PETSC_ERR_USER, "Your MUMPS library does not support mixed precision, but which is needed with your specified PetscScalar");
2671     PetscCall(MatMumpsAllocateInternalID(&mumps->id, precision));
2672 
2673     PetscMUMPS_c(mumps);
2674     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
2675 
2676     /* set PETSc-MUMPS default options - override MUMPS default */
2677     mumps->id.ICNTL(3) = 0;
2678     mumps->id.ICNTL(4) = 0;
2679     if (mumps->petsc_size == 1) {
2680       mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */
2681       mumps->id.ICNTL(7)  = 7; /* automatic choice of ordering done by the package */
2682     } else {
2683       mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */
2684       mumps->id.ICNTL(21) = 1; /* distributed solution */
2685     }
2686     if (nblk && blkptr) {
2687       mumps->id.ICNTL(15) = 1;
2688       mumps->id.nblk      = nblk;
2689       mumps->id.blkvar    = blkvar;
2690       mumps->id.blkptr    = blkptr;
2691     } else mumps->id.ICNTL(15) = 0;
2692 
2693     /* restore cached ICNTL and CNTL values */
2694     for (icntl = 0; icntl < nICNTL_pre; ++icntl) mumps->id.ICNTL(mumps->ICNTL_pre[1 + 2 * icntl]) = mumps->ICNTL_pre[2 + 2 * icntl];
2695     for (icntl = 0; icntl < nCNTL_pre; ++icntl) ID_CNTL_SET(mumps->id, (PetscInt)mumps->CNTL_pre[1 + 2 * icntl], mumps->CNTL_pre[2 + 2 * icntl]);
2696 
2697     PetscCall(PetscFree(mumps->ICNTL_pre));
2698     PetscCall(PetscFree(mumps->CNTL_pre));
2699 
2700     if (schur) {
2701       mumps->id.size_schur    = size;
2702       mumps->id.schur_lld     = size;
2703       mumps->id.schur         = arr;
2704       mumps->id.listvar_schur = listvar_schur;
2705       if (mumps->petsc_size > 1) {
2706         PetscBool gs; /* gs is false if any rank other than root has non-empty IS */
2707 
2708         mumps->id.ICNTL(19) = 1;                                                                            /* MUMPS returns Schur centralized on the host */
2709         gs                  = mumps->myid ? (mumps->id.size_schur ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */
2710         PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &gs, 1, MPI_C_BOOL, MPI_LAND, mumps->petsc_comm));
2711         PetscCheck(gs, PETSC_COMM_SELF, PETSC_ERR_SUP, "MUMPS distributed parallel Schur complements not yet supported from PETSc");
2712       } else {
2713         if (F->factortype == MAT_FACTOR_LU) {
2714           mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */
2715         } else {
2716           mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */
2717         }
2718       }
2719       mumps->id.ICNTL(26) = -1;
2720     }
2721 
2722     /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code.
2723        For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS.
2724      */
2725     PetscCallMPI(MPI_Bcast(mumps->id.icntl, 40, MPI_INT, 0, mumps->omp_comm));
2726     PetscCallMPI(MPI_Bcast(mumps->id.cntl, 15, MPIU_MUMPSREAL(&mumps->id), 0, mumps->omp_comm));
2727 
2728     mumps->scat_rhs = NULL;
2729     mumps->scat_sol = NULL;
2730   }
2731   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_1", "ICNTL(1): output stream for error messages", "None", mumps->id.ICNTL(1), &icntl, &flg));
2732   if (flg) mumps->id.ICNTL(1) = icntl;
2733   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_2", "ICNTL(2): output stream for diagnostic printing, statistics, and warning", "None", mumps->id.ICNTL(2), &icntl, &flg));
2734   if (flg) mumps->id.ICNTL(2) = icntl;
2735   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_3", "ICNTL(3): output stream for global information, collected on the host", "None", mumps->id.ICNTL(3), &icntl, &flg));
2736   if (flg) mumps->id.ICNTL(3) = icntl;
2737 
2738   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_4", "ICNTL(4): level of printing (0 to 4)", "None", mumps->id.ICNTL(4), &icntl, &flg));
2739   if (flg) mumps->id.ICNTL(4) = icntl;
2740   if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */
2741 
2742   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_6", "ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)", "None", mumps->id.ICNTL(6), &icntl, &flg));
2743   if (flg) mumps->id.ICNTL(6) = icntl;
2744 
2745   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_7", "ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)", "None", mumps->id.ICNTL(7), &icntl, &flg));
2746   if (flg) {
2747     PetscCheck(icntl != 1 && icntl >= 0 && icntl <= 7, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto");
2748     mumps->id.ICNTL(7) = icntl;
2749   }
2750 
2751   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_8", "ICNTL(8): scaling strategy (-2 to 8 or 77)", "None", mumps->id.ICNTL(8), &mumps->id.ICNTL(8), NULL));
2752   /* PetscCall(PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL)); handled by MatSolveTranspose_MUMPS() */
2753   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_10", "ICNTL(10): max num of refinements", "None", mumps->id.ICNTL(10), &mumps->id.ICNTL(10), NULL));
2754   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_11", "ICNTL(11): statistics related to an error analysis (via -ksp_view)", "None", mumps->id.ICNTL(11), &mumps->id.ICNTL(11), NULL));
2755   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_12", "ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)", "None", mumps->id.ICNTL(12), &mumps->id.ICNTL(12), NULL));
2756   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_13", "ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting", "None", mumps->id.ICNTL(13), &mumps->id.ICNTL(13), NULL));
2757   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_14", "ICNTL(14): percentage increase in the estimated working space", "None", mumps->id.ICNTL(14), &mumps->id.ICNTL(14), NULL));
2758   PetscCall(MatGetBlockSizes(A, &rbs, &cbs));
2759   if (rbs == cbs && rbs > 1) mumps->id.ICNTL(15) = (PetscMUMPSInt)-rbs;
2760   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_15", "ICNTL(15): compression of the input matrix resulting from a block format", "None", mumps->id.ICNTL(15), &mumps->id.ICNTL(15), &flg));
2761   if (flg) {
2762     if (mumps->id.ICNTL(15) < 0) PetscCheck((-mumps->id.ICNTL(15) % cbs == 0) && (-mumps->id.ICNTL(15) % rbs == 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "The opposite of -mat_mumps_icntl_15 must be a multiple of the column and row blocksizes");
2763     else if (mumps->id.ICNTL(15) > 0) {
2764       const PetscInt *bsizes;
2765       PetscInt        nblocks, p, *blkptr = NULL;
2766       PetscMPIInt    *recvcounts, *displs, n;
2767       PetscMPIInt     rank, size = 0;
2768 
2769       PetscCall(MatGetVariableBlockSizes(A, &nblocks, &bsizes));
2770       flg = PETSC_TRUE;
2771       for (p = 0; p < nblocks; ++p) {
2772         if (bsizes[p] > 1) break;
2773       }
2774       if (p == nblocks) flg = PETSC_FALSE;
2775       PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &flg, 1, MPI_C_BOOL, MPI_LOR, PetscObjectComm((PetscObject)A)));
2776       if (flg) { // if at least one process supplies variable block sizes and they are not all set to 1
2777         PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
2778         if (rank == 0) PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
2779         PetscCall(PetscCalloc2(size, &recvcounts, size + 1, &displs));
2780         PetscCall(PetscMPIIntCast(nblocks, &n));
2781         PetscCallMPI(MPI_Gather(&n, 1, MPI_INT, recvcounts, 1, MPI_INT, 0, PetscObjectComm((PetscObject)A)));
2782         for (PetscInt p = 0; p < size; ++p) displs[p + 1] = displs[p] + recvcounts[p];
2783         PetscCall(PetscMalloc1(displs[size] + 1, &blkptr));
2784         PetscCallMPI(MPI_Bcast(displs + size, 1, MPIU_INT, 0, PetscObjectComm((PetscObject)A)));
2785         PetscCallMPI(MPI_Gatherv(bsizes, n, MPIU_INT, blkptr + 1, recvcounts, displs, MPIU_INT, 0, PetscObjectComm((PetscObject)A)));
2786         if (rank == 0) {
2787           blkptr[0] = 1;
2788           for (PetscInt p = 0; p < n; ++p) blkptr[p + 1] += blkptr[p];
2789           PetscCall(MatMumpsSetBlk(F, displs[size], NULL, blkptr));
2790         }
2791         PetscCall(PetscFree2(recvcounts, displs));
2792         PetscCall(PetscFree(blkptr));
2793       }
2794     }
2795   }
2796   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_19", "ICNTL(19): computes the Schur complement", "None", mumps->id.ICNTL(19), &mumps->id.ICNTL(19), NULL));
2797   if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */
2798     PetscCall(MatDestroy(&F->schur));
2799     PetscCall(MatMumpsResetSchur_Private(mumps));
2800   }
2801 
2802   /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps",
2803      and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran
2804      and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...'
2805      This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug,
2806      see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590.
2807      In short, we could not use distributed RHS until with MPICH v4.0b1 or we enabled a workaround in mumps-5.6.2+
2808    */
2809   mumps->ICNTL20 = 10; /* Distributed dense RHS, by default */
2810 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) || (PetscDefined(HAVE_MPICH) && MPICH_NUMVERSION < 40000101) || PetscDefined(HAVE_MSMPI)
2811   mumps->ICNTL20 = 0; /* Centralized dense RHS, if need be */
2812 #endif
2813   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_20", "ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides", "None", mumps->ICNTL20, &mumps->ICNTL20, &flg));
2814   PetscCheck(!flg || mumps->ICNTL20 == 10 || mumps->ICNTL20 == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10", (int)mumps->ICNTL20);
2815 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0)
2816   PetscCheck(!flg || mumps->ICNTL20 != 10, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=10 is not supported before MUMPS-5.3.0");
2817 #endif
2818   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL)); we only use distributed solution vector */
2819 
2820   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_22", "ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)", "None", mumps->id.ICNTL(22), &mumps->id.ICNTL(22), NULL));
2821   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_23", "ICNTL(23): max size of the working memory (MB) that can allocate per processor", "None", mumps->id.ICNTL(23), &mumps->id.ICNTL(23), NULL));
2822   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_24", "ICNTL(24): detection of null pivot rows (0 or 1)", "None", mumps->id.ICNTL(24), &mumps->id.ICNTL(24), NULL));
2823   if (mumps->id.ICNTL(24)) mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */
2824 
2825   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_25", "ICNTL(25): computes a solution of a deficient matrix and a null space basis", "None", mumps->id.ICNTL(25), &mumps->id.ICNTL(25), NULL));
2826   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_26", "ICNTL(26): drives the solution phase if a Schur complement matrix", "None", mumps->id.ICNTL(26), &mumps->id.ICNTL(26), NULL));
2827   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_27", "ICNTL(27): controls the blocking size for multiple right-hand sides", "None", mumps->id.ICNTL(27), &mumps->id.ICNTL(27), NULL));
2828   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_28", "ICNTL(28): use 1 for sequential analysis and ICNTL(7) ordering, or 2 for parallel analysis and ICNTL(29) ordering", "None", mumps->id.ICNTL(28), &mumps->id.ICNTL(28), NULL));
2829   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_29", "ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis", "None", mumps->id.ICNTL(29), &mumps->id.ICNTL(29), NULL));
2830   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL)); */ /* call MatMumpsGetInverse() directly */
2831   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_31", "ICNTL(31): indicates which factors may be discarded during factorization", "None", mumps->id.ICNTL(31), &mumps->id.ICNTL(31), NULL));
2832   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elimination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL));  -- not supported by PETSc API */
2833   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_33", "ICNTL(33): compute determinant", "None", mumps->id.ICNTL(33), &mumps->id.ICNTL(33), NULL));
2834   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_35", "ICNTL(35): activates Block Low Rank (BLR) based factorization", "None", mumps->id.ICNTL(35), &mumps->id.ICNTL(35), NULL));
2835   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_36", "ICNTL(36): choice of BLR factorization variant", "None", mumps->id.ICNTL(36), &mumps->id.ICNTL(36), NULL));
2836   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_37", "ICNTL(37): compression of the contribution blocks (CB)", "None", mumps->id.ICNTL(37), &mumps->id.ICNTL(37), NULL));
2837   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_38", "ICNTL(38): estimated compression rate of LU factors with BLR", "None", mumps->id.ICNTL(38), &mumps->id.ICNTL(38), NULL));
2838   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_48", "ICNTL(48): multithreading with tree parallelism", "None", mumps->id.ICNTL(48), &mumps->id.ICNTL(48), NULL));
2839   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_49", "ICNTL(49): compact workarray at the end of factorization phase", "None", mumps->id.ICNTL(49), &mumps->id.ICNTL(49), NULL));
2840   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_56", "ICNTL(56): postponing and rank-revealing factorization", "None", mumps->id.ICNTL(56), &mumps->id.ICNTL(56), NULL));
2841   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_58", "ICNTL(58): defines options for symbolic factorization", "None", mumps->id.ICNTL(58), &mumps->id.ICNTL(58), NULL));
2842 
2843   PetscCall(PetscOptionsReal("-mat_mumps_cntl_1", "CNTL(1): relative pivoting threshold", "None", (PetscReal)ID_CNTL_GET(mumps->id, 1), &cntl, &flg));
2844   if (flg) ID_CNTL_SET(mumps->id, 1, cntl);
2845   PetscCall(PetscOptionsReal("-mat_mumps_cntl_2", "CNTL(2): stopping criterion of refinement", "None", (PetscReal)ID_CNTL_GET(mumps->id, 2), &cntl, &flg));
2846   if (flg) ID_CNTL_SET(mumps->id, 2, cntl);
2847   PetscCall(PetscOptionsReal("-mat_mumps_cntl_3", "CNTL(3): absolute pivoting threshold", "None", (PetscReal)ID_CNTL_GET(mumps->id, 3), &cntl, &flg));
2848   if (flg) ID_CNTL_SET(mumps->id, 3, cntl);
2849   PetscCall(PetscOptionsReal("-mat_mumps_cntl_4", "CNTL(4): value for static pivoting", "None", (PetscReal)ID_CNTL_GET(mumps->id, 4), &cntl, &flg));
2850   if (flg) ID_CNTL_SET(mumps->id, 4, cntl);
2851   PetscCall(PetscOptionsReal("-mat_mumps_cntl_5", "CNTL(5): fixation for null pivots", "None", (PetscReal)ID_CNTL_GET(mumps->id, 5), &cntl, &flg));
2852   if (flg) ID_CNTL_SET(mumps->id, 5, cntl);
2853   PetscCall(PetscOptionsReal("-mat_mumps_cntl_7", "CNTL(7): dropping parameter used during BLR", "None", (PetscReal)ID_CNTL_GET(mumps->id, 7), &cntl, &flg));
2854   if (flg) ID_CNTL_SET(mumps->id, 7, cntl);
2855 
2856   PetscCall(PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL));
2857 
2858   PetscCall(PetscOptionsIntArray("-mat_mumps_view_info", "request INFO local to each processor", "", info, &ninfo, NULL));
2859   if (ninfo) {
2860     PetscCheck(ninfo <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "number of INFO %" PetscInt_FMT " must <= 80", ninfo);
2861     PetscCall(PetscMalloc1(ninfo, &mumps->info));
2862     mumps->ninfo = ninfo;
2863     for (i = 0; i < ninfo; i++) {
2864       PetscCheck(info[i] >= 0 && info[i] <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "index of INFO %" PetscInt_FMT " must between 1 and 80", ninfo);
2865       mumps->info[i] = info[i];
2866     }
2867   }
2868   PetscOptionsEnd();
2869   PetscFunctionReturn(PETSC_SUCCESS);
2870 }
2871 
2872 static PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F, Mat A, PETSC_UNUSED const MatFactorInfo *info, Mat_MUMPS *mumps)
2873 {
2874   PetscFunctionBegin;
2875   if (mumps->id.INFOG(1) < 0) {
2876     PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in analysis: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
2877     if (mumps->id.INFOG(1) == -6) {
2878       PetscCall(PetscInfo(F, "MUMPS error in analysis: matrix is singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2879       F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT;
2880     } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) {
2881       PetscCall(PetscInfo(F, "MUMPS error in analysis: problem with work array, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2882       F->factorerrortype = MAT_FACTOR_OUTMEMORY;
2883     } else {
2884       PetscCall(PetscInfo(F, "MUMPS error in analysis: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS "\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2885       F->factorerrortype = MAT_FACTOR_OTHER;
2886     }
2887   }
2888   if (!mumps->id.n) F->factorerrortype = MAT_FACTOR_NOERROR;
2889   PetscFunctionReturn(PETSC_SUCCESS);
2890 }
2891 
2892 static PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F, Mat A, IS r, PETSC_UNUSED IS c, const MatFactorInfo *info)
2893 {
2894   Mat_MUMPS     *mumps = (Mat_MUMPS *)F->data;
2895   Vec            b;
2896   const PetscInt M = A->rmap->N;
2897 
2898   PetscFunctionBegin;
2899   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
2900     /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */
2901     PetscFunctionReturn(PETSC_SUCCESS);
2902   }
2903 
2904   /* Set MUMPS options from the options database */
2905   PetscCall(MatSetFromOptions_MUMPS(F, A));
2906 
2907   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
2908   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps));
2909 
2910   /* analysis phase */
2911   mumps->id.job = JOB_FACTSYMBOLIC;
2912   PetscCall(PetscMUMPSIntCast(M, &mumps->id.n));
2913   switch (mumps->id.ICNTL(18)) {
2914   case 0: /* centralized assembled matrix input */
2915     if (!mumps->myid) {
2916       mumps->id.nnz = mumps->nnz;
2917       mumps->id.irn = mumps->irn;
2918       mumps->id.jcn = mumps->jcn;
2919       if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_len, &mumps->id.a));
2920       if (r && mumps->id.ICNTL(7) == 7) {
2921         mumps->id.ICNTL(7) = 1;
2922         if (!mumps->myid) {
2923           const PetscInt *idx;
2924           PetscInt        i;
2925 
2926           PetscCall(PetscMalloc1(M, &mumps->id.perm_in));
2927           PetscCall(ISGetIndices(r, &idx));
2928           for (i = 0; i < M; i++) PetscCall(PetscMUMPSIntCast(idx[i] + 1, &mumps->id.perm_in[i])); /* perm_in[]: start from 1, not 0! */
2929           PetscCall(ISRestoreIndices(r, &idx));
2930         }
2931       }
2932     }
2933     break;
2934   case 3: /* distributed assembled matrix input (size>1) */
2935     mumps->id.nnz_loc = mumps->nnz;
2936     mumps->id.irn_loc = mumps->irn;
2937     mumps->id.jcn_loc = mumps->jcn;
2938     if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_loc_len, &mumps->id.a_loc));
2939     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2940       PetscCall(MatCreateVecs(A, NULL, &b));
2941       PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq));
2942       PetscCall(VecDestroy(&b));
2943     }
2944     break;
2945   }
2946   PetscMUMPS_c(mumps);
2947   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps));
2948 
2949   F->ops->lufactornumeric   = MatFactorNumeric_MUMPS;
2950   F->ops->solve             = MatSolve_MUMPS;
2951   F->ops->solvetranspose    = MatSolveTranspose_MUMPS;
2952   F->ops->matsolve          = MatMatSolve_MUMPS;
2953   F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS;
2954   F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS;
2955 
2956   mumps->matstruc = SAME_NONZERO_PATTERN;
2957   PetscFunctionReturn(PETSC_SUCCESS);
2958 }
2959 
2960 /* Note the PETSc r and c permutations are ignored */
2961 static PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F, Mat A, PETSC_UNUSED IS r, PETSC_UNUSED IS c, const MatFactorInfo *info)
2962 {
2963   Mat_MUMPS     *mumps = (Mat_MUMPS *)F->data;
2964   Vec            b;
2965   const PetscInt M = A->rmap->N;
2966 
2967   PetscFunctionBegin;
2968   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
2969     /* F is assembled by a previous call of MatLUFactorSymbolic_BAIJMUMPS() */
2970     PetscFunctionReturn(PETSC_SUCCESS);
2971   }
2972 
2973   /* Set MUMPS options from the options database */
2974   PetscCall(MatSetFromOptions_MUMPS(F, A));
2975 
2976   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
2977   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps));
2978 
2979   /* analysis phase */
2980   mumps->id.job = JOB_FACTSYMBOLIC;
2981   PetscCall(PetscMUMPSIntCast(M, &mumps->id.n));
2982   switch (mumps->id.ICNTL(18)) {
2983   case 0: /* centralized assembled matrix input */
2984     if (!mumps->myid) {
2985       mumps->id.nnz = mumps->nnz;
2986       mumps->id.irn = mumps->irn;
2987       mumps->id.jcn = mumps->jcn;
2988       if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_len, &mumps->id.a));
2989     }
2990     break;
2991   case 3: /* distributed assembled matrix input (size>1) */
2992     mumps->id.nnz_loc = mumps->nnz;
2993     mumps->id.irn_loc = mumps->irn;
2994     mumps->id.jcn_loc = mumps->jcn;
2995     if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_loc_len, &mumps->id.a_loc));
2996     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2997       PetscCall(MatCreateVecs(A, NULL, &b));
2998       PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq));
2999       PetscCall(VecDestroy(&b));
3000     }
3001     break;
3002   }
3003   PetscMUMPS_c(mumps);
3004   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps));
3005 
3006   F->ops->lufactornumeric   = MatFactorNumeric_MUMPS;
3007   F->ops->solve             = MatSolve_MUMPS;
3008   F->ops->solvetranspose    = MatSolveTranspose_MUMPS;
3009   F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS;
3010 
3011   mumps->matstruc = SAME_NONZERO_PATTERN;
3012   PetscFunctionReturn(PETSC_SUCCESS);
3013 }
3014 
3015 /* Note the PETSc r permutation and factor info are ignored */
3016 static PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F, Mat A, PETSC_UNUSED IS r, const MatFactorInfo *info)
3017 {
3018   Mat_MUMPS     *mumps = (Mat_MUMPS *)F->data;
3019   Vec            b;
3020   const PetscInt M = A->rmap->N;
3021 
3022   PetscFunctionBegin;
3023   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
3024     /* F is assembled by a previous call of MatCholeskyFactorSymbolic_MUMPS() */
3025     PetscFunctionReturn(PETSC_SUCCESS);
3026   }
3027 
3028   /* Set MUMPS options from the options database */
3029   PetscCall(MatSetFromOptions_MUMPS(F, A));
3030 
3031   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
3032   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps));
3033 
3034   /* analysis phase */
3035   mumps->id.job = JOB_FACTSYMBOLIC;
3036   PetscCall(PetscMUMPSIntCast(M, &mumps->id.n));
3037   switch (mumps->id.ICNTL(18)) {
3038   case 0: /* centralized assembled matrix input */
3039     if (!mumps->myid) {
3040       mumps->id.nnz = mumps->nnz;
3041       mumps->id.irn = mumps->irn;
3042       mumps->id.jcn = mumps->jcn;
3043       if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_len, &mumps->id.a));
3044     }
3045     break;
3046   case 3: /* distributed assembled matrix input (size>1) */
3047     mumps->id.nnz_loc = mumps->nnz;
3048     mumps->id.irn_loc = mumps->irn;
3049     mumps->id.jcn_loc = mumps->jcn;
3050     if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_loc_len, &mumps->id.a_loc));
3051     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
3052       PetscCall(MatCreateVecs(A, NULL, &b));
3053       PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq));
3054       PetscCall(VecDestroy(&b));
3055     }
3056     break;
3057   }
3058   PetscMUMPS_c(mumps);
3059   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps));
3060 
3061   F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS;
3062   F->ops->solve                 = MatSolve_MUMPS;
3063   F->ops->solvetranspose        = MatSolve_MUMPS;
3064   F->ops->matsolve              = MatMatSolve_MUMPS;
3065   F->ops->mattransposesolve     = MatMatTransposeSolve_MUMPS;
3066   F->ops->matsolvetranspose     = MatMatSolveTranspose_MUMPS;
3067 #if defined(PETSC_USE_COMPLEX)
3068   F->ops->getinertia = NULL;
3069 #else
3070   F->ops->getinertia = MatGetInertia_SBAIJMUMPS;
3071 #endif
3072 
3073   mumps->matstruc = SAME_NONZERO_PATTERN;
3074   PetscFunctionReturn(PETSC_SUCCESS);
3075 }
3076 
3077 static PetscErrorCode MatView_MUMPS(Mat A, PetscViewer viewer)
3078 {
3079   PetscBool         isascii;
3080   PetscViewerFormat format;
3081   Mat_MUMPS        *mumps = (Mat_MUMPS *)A->data;
3082 
3083   PetscFunctionBegin;
3084   /* check if matrix is mumps type */
3085   if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(PETSC_SUCCESS);
3086 
3087   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
3088   if (isascii) {
3089     PetscCall(PetscViewerGetFormat(viewer, &format));
3090     if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
3091       PetscCall(PetscViewerASCIIPrintf(viewer, "MUMPS run parameters:\n"));
3092       if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
3093         PetscCall(PetscViewerASCIIPrintf(viewer, "  SYM (matrix type):                   %d\n", mumps->id.sym));
3094         PetscCall(PetscViewerASCIIPrintf(viewer, "  PAR (host participation):            %d\n", mumps->id.par));
3095         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(1) (output for error):         %d\n", mumps->id.ICNTL(1)));
3096         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(2) (output of diagnostic msg): %d\n", mumps->id.ICNTL(2)));
3097         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(3) (output for global info):   %d\n", mumps->id.ICNTL(3)));
3098         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(4) (level of printing):        %d\n", mumps->id.ICNTL(4)));
3099         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(5) (input mat struct):         %d\n", mumps->id.ICNTL(5)));
3100         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(6) (matrix prescaling):        %d\n", mumps->id.ICNTL(6)));
3101         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(7) (sequential matrix ordering):%d\n", mumps->id.ICNTL(7)));
3102         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(8) (scaling strategy):         %d\n", mumps->id.ICNTL(8)));
3103         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(10) (max num of refinements):  %d\n", mumps->id.ICNTL(10)));
3104         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(11) (error analysis):          %d\n", mumps->id.ICNTL(11)));
3105         if (mumps->id.ICNTL(11) > 0) {
3106           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(4) (inf norm of input mat):        %g\n", (double)ID_RINFOG_GET(mumps->id, 4)));
3107           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(5) (inf norm of solution):         %g\n", (double)ID_RINFOG_GET(mumps->id, 5)));
3108           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(6) (inf norm of residual):         %g\n", (double)ID_RINFOG_GET(mumps->id, 6)));
3109           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(7),RINFOG(8) (backward error est): %g, %g\n", (double)ID_RINFOG_GET(mumps->id, 7), (double)ID_RINFOG_GET(mumps->id, 8)));
3110           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(9) (error estimate):               %g\n", (double)ID_RINFOG_GET(mumps->id, 9)));
3111           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n", (double)ID_RINFOG_GET(mumps->id, 10), (double)ID_RINFOG_GET(mumps->id, 11)));
3112         }
3113         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(12) (efficiency control):                         %d\n", mumps->id.ICNTL(12)));
3114         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(13) (sequential factorization of the root node):  %d\n", mumps->id.ICNTL(13)));
3115         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(14) (percentage of estimated workspace increase): %d\n", mumps->id.ICNTL(14)));
3116         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(15) (compression of the input matrix):            %d\n", mumps->id.ICNTL(15)));
3117         /* ICNTL(15-17) not used */
3118         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(18) (input mat struct):                           %d\n", mumps->id.ICNTL(18)));
3119         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(19) (Schur complement info):                      %d\n", mumps->id.ICNTL(19)));
3120         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(20) (RHS sparse pattern):                         %d\n", mumps->id.ICNTL(20)));
3121         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(21) (solution struct):                            %d\n", mumps->id.ICNTL(21)));
3122         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(22) (in-core/out-of-core facility):               %d\n", mumps->id.ICNTL(22)));
3123         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(23) (max size of memory can be allocated locally):%d\n", mumps->id.ICNTL(23)));
3124 
3125         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(24) (detection of null pivot rows):               %d\n", mumps->id.ICNTL(24)));
3126         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(25) (computation of a null space basis):          %d\n", mumps->id.ICNTL(25)));
3127         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(26) (Schur options for RHS or solution):          %d\n", mumps->id.ICNTL(26)));
3128         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(27) (blocking size for multiple RHS):             %d\n", mumps->id.ICNTL(27)));
3129         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(28) (use parallel or sequential ordering):        %d\n", mumps->id.ICNTL(28)));
3130         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(29) (parallel ordering):                          %d\n", mumps->id.ICNTL(29)));
3131 
3132         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(30) (user-specified set of entries in inv(A)):    %d\n", mumps->id.ICNTL(30)));
3133         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(31) (factors is discarded in the solve phase):    %d\n", mumps->id.ICNTL(31)));
3134         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(33) (compute determinant):                        %d\n", mumps->id.ICNTL(33)));
3135         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(35) (activate BLR based factorization):           %d\n", mumps->id.ICNTL(35)));
3136         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(36) (choice of BLR factorization variant):        %d\n", mumps->id.ICNTL(36)));
3137         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(37) (compression of the contribution blocks):     %d\n", mumps->id.ICNTL(37)));
3138         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(38) (estimated compression rate of LU factors):   %d\n", mumps->id.ICNTL(38)));
3139         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(48) (multithreading with tree parallelism):       %d\n", mumps->id.ICNTL(48)));
3140         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(49) (compact workarray at the end of factorization phase):%d\n", mumps->id.ICNTL(49)));
3141         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(56) (postponing and rank-revealing factorization):%d\n", mumps->id.ICNTL(56)));
3142         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(58) (options for symbolic factorization):         %d\n", mumps->id.ICNTL(58)));
3143 
3144         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(1) (relative pivoting threshold):      %g\n", (double)ID_CNTL_GET(mumps->id, 1)));
3145         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(2) (stopping criterion of refinement): %g\n", (double)ID_CNTL_GET(mumps->id, 2)));
3146         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(3) (absolute pivoting threshold):      %g\n", (double)ID_CNTL_GET(mumps->id, 3)));
3147         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(4) (value of static pivoting):         %g\n", (double)ID_CNTL_GET(mumps->id, 4)));
3148         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(5) (fixation for null pivots):         %g\n", (double)ID_CNTL_GET(mumps->id, 5)));
3149         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(7) (dropping parameter for BLR):       %g\n", (double)ID_CNTL_GET(mumps->id, 7)));
3150 
3151         /* information local to each processor */
3152         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(1) (local estimated flops for the elimination after analysis):\n"));
3153         PetscCall(PetscViewerASCIIPushSynchronized(viewer));
3154         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %g\n", mumps->myid, (double)ID_RINFO_GET(mumps->id, 1)));
3155         PetscCall(PetscViewerFlush(viewer));
3156         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(2) (local estimated flops for the assembly after factorization):\n"));
3157         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %g\n", mumps->myid, (double)ID_RINFO_GET(mumps->id, 2)));
3158         PetscCall(PetscViewerFlush(viewer));
3159         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(3) (local estimated flops for the elimination after factorization):\n"));
3160         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %g\n", mumps->myid, (double)ID_RINFO_GET(mumps->id, 3)));
3161         PetscCall(PetscViewerFlush(viewer));
3162 
3163         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization):\n"));
3164         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(15)));
3165         PetscCall(PetscViewerFlush(viewer));
3166 
3167         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization):\n"));
3168         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(16)));
3169         PetscCall(PetscViewerFlush(viewer));
3170 
3171         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(23) (num of pivots eliminated on this processor after factorization):\n"));
3172         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(23)));
3173         PetscCall(PetscViewerFlush(viewer));
3174 
3175         if (mumps->ninfo && mumps->ninfo <= 80) {
3176           PetscInt i;
3177           for (i = 0; i < mumps->ninfo; i++) {
3178             PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(%" PetscInt_FMT "):\n", mumps->info[i]));
3179             PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(mumps->info[i])));
3180             PetscCall(PetscViewerFlush(viewer));
3181           }
3182         }
3183         PetscCall(PetscViewerASCIIPopSynchronized(viewer));
3184       } else PetscCall(PetscViewerASCIIPrintf(viewer, "  Use -%sksp_view ::ascii_info_detail to display information for all processes\n", ((PetscObject)A)->prefix ? ((PetscObject)A)->prefix : ""));
3185 
3186       if (mumps->myid == 0) { /* information from the host */
3187         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFOG(1) (global estimated flops for the elimination after analysis): %g\n", (double)ID_RINFOG_GET(mumps->id, 1)));
3188         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFOG(2) (global estimated flops for the assembly after factorization): %g\n", (double)ID_RINFOG_GET(mumps->id, 2)));
3189         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFOG(3) (global estimated flops for the elimination after factorization): %g\n", (double)ID_RINFOG_GET(mumps->id, 3)));
3190         PetscCall(PetscViewerASCIIPrintf(viewer, "  (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n", (double)ID_RINFOG_GET(mumps->id, 12), (double)ID_RINFOG_GET(mumps->id, 13), mumps->id.INFOG(34)));
3191 
3192         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(3)));
3193         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(4)));
3194         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(5) (estimated maximum front size in the complete tree): %d\n", mumps->id.INFOG(5)));
3195         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(6) (number of nodes in the complete tree): %d\n", mumps->id.INFOG(6)));
3196         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(7) (ordering option effectively used after analysis): %d\n", mumps->id.INFOG(7)));
3197         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n", mumps->id.INFOG(8)));
3198         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n", mumps->id.INFOG(9)));
3199         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(10) (total integer space store the matrix factors after factorization): %d\n", mumps->id.INFOG(10)));
3200         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(11) (order of largest frontal matrix after factorization): %d\n", mumps->id.INFOG(11)));
3201         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(12) (number of off-diagonal pivots): %d\n", mumps->id.INFOG(12)));
3202         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(13) (number of delayed pivots after factorization): %d\n", mumps->id.INFOG(13)));
3203         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(14) (number of memory compress after factorization): %d\n", mumps->id.INFOG(14)));
3204         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(15) (number of steps of iterative refinement after solution): %d\n", mumps->id.INFOG(15)));
3205         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n", mumps->id.INFOG(16)));
3206         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n", mumps->id.INFOG(17)));
3207         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n", mumps->id.INFOG(18)));
3208         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n", mumps->id.INFOG(19)));
3209         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(20) (estimated number of entries in the factors): %d\n", mumps->id.INFOG(20)));
3210         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n", mumps->id.INFOG(21)));
3211         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n", mumps->id.INFOG(22)));
3212         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n", mumps->id.INFOG(23)));
3213         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n", mumps->id.INFOG(24)));
3214         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n", mumps->id.INFOG(25)));
3215         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(28) (after factorization: number of null pivots encountered): %d\n", mumps->id.INFOG(28)));
3216         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n", mumps->id.INFOG(29)));
3217         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n", mumps->id.INFOG(30), mumps->id.INFOG(31)));
3218         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(32) (after analysis: type of analysis done): %d\n", mumps->id.INFOG(32)));
3219         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(33) (value used for ICNTL(8)): %d\n", mumps->id.INFOG(33)));
3220         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(34) (exponent of the determinant if determinant is requested): %d\n", mumps->id.INFOG(34)));
3221         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n", mumps->id.INFOG(35)));
3222         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(36)));
3223         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n", mumps->id.INFOG(37)));
3224         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(38)));
3225         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n", mumps->id.INFOG(39)));
3226       }
3227     }
3228   }
3229   PetscFunctionReturn(PETSC_SUCCESS);
3230 }
3231 
3232 static PetscErrorCode MatGetInfo_MUMPS(Mat A, PETSC_UNUSED MatInfoType flag, MatInfo *info)
3233 {
3234   Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;
3235 
3236   PetscFunctionBegin;
3237   info->block_size        = 1.0;
3238   info->nz_allocated      = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20);
3239   info->nz_used           = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20);
3240   info->nz_unneeded       = 0.0;
3241   info->assemblies        = 0.0;
3242   info->mallocs           = 0.0;
3243   info->memory            = 0.0;
3244   info->fill_ratio_given  = 0;
3245   info->fill_ratio_needed = 0;
3246   info->factor_mallocs    = 0;
3247   PetscFunctionReturn(PETSC_SUCCESS);
3248 }
3249 
3250 static PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is)
3251 {
3252   Mat_MUMPS         *mumps = (Mat_MUMPS *)F->data;
3253   const PetscScalar *arr;
3254   const PetscInt    *idxs;
3255   PetscInt           size, i;
3256 
3257   PetscFunctionBegin;
3258   PetscCall(ISGetLocalSize(is, &size));
3259   /* Schur complement matrix */
3260   PetscCall(MatDestroy(&F->schur));
3261   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size, size, NULL, &F->schur));
3262   PetscCall(MatDenseGetArrayRead(F->schur, &arr));
3263   // don't allocate mumps->id.schur[] now as its precision is yet to know
3264   PetscCall(PetscMUMPSIntCast(size, &mumps->id.size_schur));
3265   PetscCall(PetscMUMPSIntCast(size, &mumps->id.schur_lld));
3266   PetscCall(MatDenseRestoreArrayRead(F->schur, &arr));
3267   if (mumps->sym == 1) PetscCall(MatSetOption(F->schur, MAT_SPD, PETSC_TRUE));
3268 
3269   /* MUMPS expects Fortran style indices */
3270   PetscCall(PetscFree(mumps->id.listvar_schur));
3271   PetscCall(PetscMalloc1(size, &mumps->id.listvar_schur));
3272   PetscCall(ISGetIndices(is, &idxs));
3273   for (i = 0; i < size; i++) PetscCall(PetscMUMPSIntCast(idxs[i] + 1, &mumps->id.listvar_schur[i]));
3274   PetscCall(ISRestoreIndices(is, &idxs));
3275   /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */
3276   if (mumps->id.icntl) mumps->id.ICNTL(26) = -1;
3277   else mumps->ICNTL26 = -1;
3278   PetscFunctionReturn(PETSC_SUCCESS);
3279 }
3280 
3281 static PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F, Mat *S)
3282 {
3283   Mat          St;
3284   Mat_MUMPS   *mumps = (Mat_MUMPS *)F->data;
3285   PetscScalar *array;
3286   PetscInt     i, j, N = mumps->id.size_schur;
3287 
3288   PetscFunctionBegin;
3289   PetscCheck(mumps->id.ICNTL(19), PetscObjectComm((PetscObject)F), PETSC_ERR_ORDER, "Schur complement mode not selected! Call MatFactorSetSchurIS() to enable it");
3290   PetscCall(MatCreate(PETSC_COMM_SELF, &St));
3291   PetscCall(MatSetSizes(St, PETSC_DECIDE, PETSC_DECIDE, mumps->id.size_schur, mumps->id.size_schur));
3292   PetscCall(MatSetType(St, MATDENSE));
3293   PetscCall(MatSetUp(St));
3294   PetscCall(MatDenseGetArray(St, &array));
3295   if (!mumps->sym) {                /* MUMPS always return a full matrix */
3296     if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
3297       for (i = 0; i < N; i++) {
3298         for (j = 0; j < N; j++) array[j * N + i] = ID_FIELD_GET(mumps->id, schur, i * N + j);
3299       }
3300     } else { /* stored by columns */
3301       PetscCall(MatMumpsCastMumpsScalarArray(N * N, mumps->id.precision, mumps->id.schur, array));
3302     }
3303   } else {                          /* either full or lower-triangular (not packed) */
3304     if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */
3305       for (i = 0; i < N; i++) {
3306         for (j = i; j < N; j++) array[i * N + j] = array[j * N + i] = ID_FIELD_GET(mumps->id, schur, i * N + j);
3307       }
3308     } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */
3309       PetscCall(MatMumpsCastMumpsScalarArray(N * N, mumps->id.precision, mumps->id.schur, array));
3310     } else { /* ICNTL(19) == 1 lower triangular stored by rows */
3311       for (i = 0; i < N; i++) {
3312         for (j = 0; j < i + 1; j++) array[i * N + j] = array[j * N + i] = ID_FIELD_GET(mumps->id, schur, i * N + j);
3313       }
3314     }
3315   }
3316   PetscCall(MatDenseRestoreArray(St, &array));
3317   *S = St;
3318   PetscFunctionReturn(PETSC_SUCCESS);
3319 }
3320 
3321 static PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt ival)
3322 {
3323   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3324 
3325   PetscFunctionBegin;
3326   if (mumps->id.job == JOB_NULL) {                                            /* need to cache icntl and ival since PetscMUMPS_c() has never been called */
3327     PetscMUMPSInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; /* number of already cached ICNTL */
3328     for (i = 0; i < nICNTL_pre; ++i)
3329       if (mumps->ICNTL_pre[1 + 2 * i] == icntl) break; /* is this ICNTL already cached? */
3330     if (i == nICNTL_pre) {                             /* not already cached */
3331       if (i > 0) PetscCall(PetscRealloc(sizeof(PetscMUMPSInt) * (2 * nICNTL_pre + 3), &mumps->ICNTL_pre));
3332       else PetscCall(PetscCalloc(sizeof(PetscMUMPSInt) * 3, &mumps->ICNTL_pre));
3333       mumps->ICNTL_pre[0]++;
3334     }
3335     mumps->ICNTL_pre[1 + 2 * i] = (PetscMUMPSInt)icntl;
3336     PetscCall(PetscMUMPSIntCast(ival, mumps->ICNTL_pre + 2 + 2 * i));
3337   } else PetscCall(PetscMUMPSIntCast(ival, &mumps->id.ICNTL(icntl)));
3338   PetscFunctionReturn(PETSC_SUCCESS);
3339 }
3340 
3341 static PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt *ival)
3342 {
3343   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3344 
3345   PetscFunctionBegin;
3346   if (mumps->id.job == JOB_NULL) {
3347     PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0;
3348     *ival = 0;
3349     for (i = 0; i < nICNTL_pre; ++i) {
3350       if (mumps->ICNTL_pre[1 + 2 * i] == icntl) *ival = mumps->ICNTL_pre[2 + 2 * i];
3351     }
3352   } else *ival = mumps->id.ICNTL(icntl);
3353   PetscFunctionReturn(PETSC_SUCCESS);
3354 }
3355 
3356 /*@
3357   MatMumpsSetIcntl - Set MUMPS parameter ICNTL() <https://mumps-solver.org/index.php?page=doc>
3358 
3359   Logically Collective
3360 
3361   Input Parameters:
3362 + F     - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3363 . icntl - index of MUMPS parameter array `ICNTL()`
3364 - ival  - value of MUMPS `ICNTL(icntl)`
3365 
3366   Options Database Key:
3367 . -mat_mumps_icntl_<icntl> <ival> - change the option numbered `icntl` to `ival`
3368 
3369   Level: beginner
3370 
3371   Note:
3372   Ignored if MUMPS is not installed or `F` is not a MUMPS matrix
3373 
3374 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
3375 @*/
3376 PetscErrorCode MatMumpsSetIcntl(Mat F, PetscInt icntl, PetscInt ival)
3377 {
3378   PetscFunctionBegin;
3379   PetscValidType(F, 1);
3380   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3381   PetscValidLogicalCollectiveInt(F, icntl, 2);
3382   PetscValidLogicalCollectiveInt(F, ival, 3);
3383   PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 48 || icntl == 49 || icntl == 56 || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl);
3384   PetscTryMethod(F, "MatMumpsSetIcntl_C", (Mat, PetscInt, PetscInt), (F, icntl, ival));
3385   PetscFunctionReturn(PETSC_SUCCESS);
3386 }
3387 
3388 /*@
3389   MatMumpsGetIcntl - Get MUMPS parameter ICNTL() <https://mumps-solver.org/index.php?page=doc>
3390 
3391   Logically Collective
3392 
3393   Input Parameters:
3394 + F     - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3395 - icntl - index of MUMPS parameter array ICNTL()
3396 
3397   Output Parameter:
3398 . ival - value of MUMPS ICNTL(icntl)
3399 
3400   Level: beginner
3401 
3402 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
3403 @*/
3404 PetscErrorCode MatMumpsGetIcntl(Mat F, PetscInt icntl, PetscInt *ival)
3405 {
3406   PetscFunctionBegin;
3407   PetscValidType(F, 1);
3408   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3409   PetscValidLogicalCollectiveInt(F, icntl, 2);
3410   PetscAssertPointer(ival, 3);
3411   PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 48 || icntl == 49 || icntl == 56 || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl);
3412   PetscUseMethod(F, "MatMumpsGetIcntl_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival));
3413   PetscFunctionReturn(PETSC_SUCCESS);
3414 }
3415 
3416 static PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal val)
3417 {
3418   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3419 
3420   PetscFunctionBegin;
3421   if (mumps->id.job == JOB_NULL) {
3422     PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0;
3423     for (i = 0; i < nCNTL_pre; ++i)
3424       if (mumps->CNTL_pre[1 + 2 * i] == icntl) break;
3425     if (i == nCNTL_pre) {
3426       if (i > 0) PetscCall(PetscRealloc(sizeof(PetscReal) * (2 * nCNTL_pre + 3), &mumps->CNTL_pre));
3427       else PetscCall(PetscCalloc(sizeof(PetscReal) * 3, &mumps->CNTL_pre));
3428       mumps->CNTL_pre[0]++;
3429     }
3430     mumps->CNTL_pre[1 + 2 * i] = icntl;
3431     mumps->CNTL_pre[2 + 2 * i] = val;
3432   } else ID_CNTL_SET(mumps->id, icntl, val);
3433   PetscFunctionReturn(PETSC_SUCCESS);
3434 }
3435 
3436 static PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal *val)
3437 {
3438   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3439 
3440   PetscFunctionBegin;
3441   if (mumps->id.job == JOB_NULL) {
3442     PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0;
3443     *val = 0.0;
3444     for (i = 0; i < nCNTL_pre; ++i) {
3445       if (mumps->CNTL_pre[1 + 2 * i] == icntl) *val = mumps->CNTL_pre[2 + 2 * i];
3446     }
3447   } else *val = ID_CNTL_GET(mumps->id, icntl);
3448   PetscFunctionReturn(PETSC_SUCCESS);
3449 }
3450 
3451 /*@
3452   MatMumpsSetCntl - Set MUMPS parameter CNTL() <https://mumps-solver.org/index.php?page=doc>
3453 
3454   Logically Collective
3455 
3456   Input Parameters:
3457 + F     - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3458 . icntl - index of MUMPS parameter array `CNTL()`
3459 - val   - value of MUMPS `CNTL(icntl)`
3460 
3461   Options Database Key:
3462 . -mat_mumps_cntl_<icntl> <val> - change the option numbered icntl to ival
3463 
3464   Level: beginner
3465 
3466   Note:
3467   Ignored if MUMPS is not installed or `F` is not a MUMPS matrix
3468 
3469 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
3470 @*/
3471 PetscErrorCode MatMumpsSetCntl(Mat F, PetscInt icntl, PetscReal val)
3472 {
3473   PetscFunctionBegin;
3474   PetscValidType(F, 1);
3475   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3476   PetscValidLogicalCollectiveInt(F, icntl, 2);
3477   PetscValidLogicalCollectiveReal(F, val, 3);
3478   PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl);
3479   PetscTryMethod(F, "MatMumpsSetCntl_C", (Mat, PetscInt, PetscReal), (F, icntl, val));
3480   PetscFunctionReturn(PETSC_SUCCESS);
3481 }
3482 
3483 /*@
3484   MatMumpsGetCntl - Get MUMPS parameter CNTL() <https://mumps-solver.org/index.php?page=doc>
3485 
3486   Logically Collective
3487 
3488   Input Parameters:
3489 + F     - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3490 - icntl - index of MUMPS parameter array CNTL()
3491 
3492   Output Parameter:
3493 . val - value of MUMPS CNTL(icntl)
3494 
3495   Level: beginner
3496 
3497 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
3498 @*/
3499 PetscErrorCode MatMumpsGetCntl(Mat F, PetscInt icntl, PetscReal *val)
3500 {
3501   PetscFunctionBegin;
3502   PetscValidType(F, 1);
3503   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3504   PetscValidLogicalCollectiveInt(F, icntl, 2);
3505   PetscAssertPointer(val, 3);
3506   PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl);
3507   PetscUseMethod(F, "MatMumpsGetCntl_C", (Mat, PetscInt, PetscReal *), (F, icntl, val));
3508   PetscFunctionReturn(PETSC_SUCCESS);
3509 }
3510 
3511 static PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F, PetscInt icntl, PetscInt *info)
3512 {
3513   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3514 
3515   PetscFunctionBegin;
3516   *info = mumps->id.INFO(icntl);
3517   PetscFunctionReturn(PETSC_SUCCESS);
3518 }
3519 
3520 static PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F, PetscInt icntl, PetscInt *infog)
3521 {
3522   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3523 
3524   PetscFunctionBegin;
3525   *infog = mumps->id.INFOG(icntl);
3526   PetscFunctionReturn(PETSC_SUCCESS);
3527 }
3528 
3529 static PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfo)
3530 {
3531   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3532 
3533   PetscFunctionBegin;
3534   *rinfo = ID_RINFO_GET(mumps->id, icntl);
3535   PetscFunctionReturn(PETSC_SUCCESS);
3536 }
3537 
3538 static PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfog)
3539 {
3540   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3541 
3542   PetscFunctionBegin;
3543   *rinfog = ID_RINFOG_GET(mumps->id, icntl);
3544   PetscFunctionReturn(PETSC_SUCCESS);
3545 }
3546 
3547 static PetscErrorCode MatMumpsGetNullPivots_MUMPS(Mat F, PetscInt *size, PetscInt **array)
3548 {
3549   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3550 
3551   PetscFunctionBegin;
3552   PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection");
3553   *size  = 0;
3554   *array = NULL;
3555   if (!mumps->myid) {
3556     *size = mumps->id.INFOG(28);
3557     PetscCall(PetscMalloc1(*size, array));
3558     for (int i = 0; i < *size; i++) (*array)[i] = mumps->id.pivnul_list[i] - 1;
3559   }
3560   PetscFunctionReturn(PETSC_SUCCESS);
3561 }
3562 
3563 static PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F, Mat spRHS)
3564 {
3565   Mat          Bt = NULL, Btseq = NULL;
3566   PetscBool    flg;
3567   Mat_MUMPS   *mumps = (Mat_MUMPS *)F->data;
3568   PetscScalar *aa;
3569   PetscInt     spnr, *ia, *ja, M, nrhs;
3570 
3571   PetscFunctionBegin;
3572   PetscAssertPointer(spRHS, 2);
3573   PetscCall(PetscObjectTypeCompare((PetscObject)spRHS, MATTRANSPOSEVIRTUAL, &flg));
3574   PetscCheck(flg, PetscObjectComm((PetscObject)spRHS), PETSC_ERR_ARG_WRONG, "Matrix spRHS must be type MATTRANSPOSEVIRTUAL matrix");
3575   PetscCall(MatShellGetScalingShifts(spRHS, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
3576   PetscCall(MatTransposeGetMat(spRHS, &Bt));
3577 
3578   PetscCall(MatMumpsSetIcntl(F, 30, 1));
3579 
3580   if (mumps->petsc_size > 1) {
3581     Mat_MPIAIJ *b = (Mat_MPIAIJ *)Bt->data;
3582     Btseq         = b->A;
3583   } else {
3584     Btseq = Bt;
3585   }
3586 
3587   PetscCall(MatGetSize(spRHS, &M, &nrhs));
3588   mumps->id.nrhs = (PetscMUMPSInt)nrhs;
3589   PetscCall(PetscMUMPSIntCast(M, &mumps->id.lrhs));
3590   mumps->id.rhs = NULL;
3591 
3592   if (!mumps->myid) {
3593     PetscCall(MatSeqAIJGetArray(Btseq, &aa));
3594     PetscCall(MatGetRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
3595     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
3596     PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs));
3597     PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, ((Mat_SeqAIJ *)Btseq->data)->nz, aa, mumps->id.precision, &mumps->id.rhs_sparse_len, &mumps->id.rhs_sparse));
3598   } else {
3599     mumps->id.irhs_ptr    = NULL;
3600     mumps->id.irhs_sparse = NULL;
3601     mumps->id.nz_rhs      = 0;
3602     if (mumps->id.rhs_sparse_len) {
3603       PetscCall(PetscFree(mumps->id.rhs_sparse));
3604       mumps->id.rhs_sparse_len = 0;
3605     }
3606   }
3607   mumps->id.ICNTL(20) = 1; /* rhs is sparse */
3608   mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */
3609 
3610   /* solve phase */
3611   mumps->id.job = JOB_SOLVE;
3612   PetscMUMPS_c(mumps);
3613   PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2));
3614 
3615   if (!mumps->myid) {
3616     PetscCall(MatSeqAIJRestoreArray(Btseq, &aa));
3617     PetscCall(MatRestoreRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
3618     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
3619   }
3620   PetscFunctionReturn(PETSC_SUCCESS);
3621 }
3622 
3623 /*@
3624   MatMumpsGetInverse - Get user-specified set of entries in inverse of `A` <https://mumps-solver.org/index.php?page=doc>
3625 
3626   Logically Collective
3627 
3628   Input Parameter:
3629 . F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3630 
3631   Output Parameter:
3632 . spRHS - sequential sparse matrix in `MATTRANSPOSEVIRTUAL` format with requested entries of inverse of `A`
3633 
3634   Level: beginner
3635 
3636 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()`
3637 @*/
3638 PetscErrorCode MatMumpsGetInverse(Mat F, Mat spRHS)
3639 {
3640   PetscFunctionBegin;
3641   PetscValidType(F, 1);
3642   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3643   PetscUseMethod(F, "MatMumpsGetInverse_C", (Mat, Mat), (F, spRHS));
3644   PetscFunctionReturn(PETSC_SUCCESS);
3645 }
3646 
3647 static PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F, Mat spRHST)
3648 {
3649   Mat spRHS;
3650 
3651   PetscFunctionBegin;
3652   PetscCall(MatCreateTranspose(spRHST, &spRHS));
3653   PetscCall(MatMumpsGetInverse_MUMPS(F, spRHS));
3654   PetscCall(MatDestroy(&spRHS));
3655   PetscFunctionReturn(PETSC_SUCCESS);
3656 }
3657 
3658 /*@
3659   MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix $A^T $ <https://mumps-solver.org/index.php?page=doc>
3660 
3661   Logically Collective
3662 
3663   Input Parameter:
3664 . F - the factored matrix of A obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3665 
3666   Output Parameter:
3667 . spRHST - sequential sparse matrix in `MATAIJ` format containing the requested entries of inverse of `A`^T
3668 
3669   Level: beginner
3670 
3671 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()`, `MatMumpsGetInverse()`
3672 @*/
3673 PetscErrorCode MatMumpsGetInverseTranspose(Mat F, Mat spRHST)
3674 {
3675   PetscBool flg;
3676 
3677   PetscFunctionBegin;
3678   PetscValidType(F, 1);
3679   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3680   PetscCall(PetscObjectTypeCompareAny((PetscObject)spRHST, &flg, MATSEQAIJ, MATMPIAIJ, NULL));
3681   PetscCheck(flg, PetscObjectComm((PetscObject)spRHST), PETSC_ERR_ARG_WRONG, "Matrix spRHST must be MATAIJ matrix");
3682   PetscUseMethod(F, "MatMumpsGetInverseTranspose_C", (Mat, Mat), (F, spRHST));
3683   PetscFunctionReturn(PETSC_SUCCESS);
3684 }
3685 
3686 static PetscErrorCode MatMumpsSetBlk_MUMPS(Mat F, PetscInt nblk, const PetscInt blkvar[], const PetscInt blkptr[])
3687 {
3688   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3689 
3690   PetscFunctionBegin;
3691   if (nblk) {
3692     PetscAssertPointer(blkptr, 4);
3693     PetscCall(PetscMUMPSIntCast(nblk, &mumps->id.nblk));
3694     PetscCall(PetscFree(mumps->id.blkptr));
3695     PetscCall(PetscMalloc1(nblk + 1, &mumps->id.blkptr));
3696     for (PetscInt i = 0; i < nblk + 1; ++i) PetscCall(PetscMUMPSIntCast(blkptr[i], mumps->id.blkptr + i));
3697     // mumps->id.icntl[] might have not been allocated, which is done in MatSetFromOptions_MUMPS(). So we don't assign ICNTL(15).
3698     // We use id.nblk and id.blkptr to know what values to set to ICNTL(15) in MatSetFromOptions_MUMPS().
3699     // mumps->id.ICNTL(15) = 1;
3700     if (blkvar) {
3701       PetscCall(PetscFree(mumps->id.blkvar));
3702       PetscCall(PetscMalloc1(F->rmap->N, &mumps->id.blkvar));
3703       for (PetscInt i = 0; i < F->rmap->N; ++i) PetscCall(PetscMUMPSIntCast(blkvar[i], mumps->id.blkvar + i));
3704     }
3705   } else {
3706     PetscCall(PetscFree(mumps->id.blkptr));
3707     PetscCall(PetscFree(mumps->id.blkvar));
3708     // mumps->id.ICNTL(15) = 0;
3709     mumps->id.nblk = 0;
3710   }
3711   PetscFunctionReturn(PETSC_SUCCESS);
3712 }
3713 
3714 /*@
3715   MatMumpsSetBlk - Set user-specified variable block sizes to be used with `-mat_mumps_icntl_15 1`
3716 
3717   Not collective, only relevant on the first process of the MPI communicator
3718 
3719   Input Parameters:
3720 + F      - the factored matrix of A obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3721 . nblk   - the number of blocks
3722 . blkvar - see MUMPS documentation, `blkvar(blkptr(iblk):blkptr(iblk+1)-1)`, (`iblk=1, nblk`) holds the variables associated to block `iblk`
3723 - blkptr - array starting at 1 and of size `nblk + 1` storing the prefix sum of all blocks
3724 
3725   Level: advanced
3726 
3727 .seealso: [](ch_matrices), `MATSOLVERMUMPS`, `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatSetVariableBlockSizes()`
3728 @*/
3729 PetscErrorCode MatMumpsSetBlk(Mat F, PetscInt nblk, const PetscInt blkvar[], const PetscInt blkptr[])
3730 {
3731   PetscFunctionBegin;
3732   PetscValidType(F, 1);
3733   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3734   PetscUseMethod(F, "MatMumpsSetBlk_C", (Mat, PetscInt, const PetscInt[], const PetscInt[]), (F, nblk, blkvar, blkptr));
3735   PetscFunctionReturn(PETSC_SUCCESS);
3736 }
3737 
3738 /*@
3739   MatMumpsGetInfo - Get MUMPS parameter INFO() <https://mumps-solver.org/index.php?page=doc>
3740 
3741   Logically Collective
3742 
3743   Input Parameters:
3744 + F     - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3745 - icntl - index of MUMPS parameter array INFO()
3746 
3747   Output Parameter:
3748 . ival - value of MUMPS INFO(icntl)
3749 
3750   Level: beginner
3751 
3752 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
3753 @*/
3754 PetscErrorCode MatMumpsGetInfo(Mat F, PetscInt icntl, PetscInt *ival)
3755 {
3756   PetscFunctionBegin;
3757   PetscValidType(F, 1);
3758   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3759   PetscAssertPointer(ival, 3);
3760   PetscUseMethod(F, "MatMumpsGetInfo_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival));
3761   PetscFunctionReturn(PETSC_SUCCESS);
3762 }
3763 
3764 /*@
3765   MatMumpsGetInfog - Get MUMPS parameter INFOG() <https://mumps-solver.org/index.php?page=doc>
3766 
3767   Logically Collective
3768 
3769   Input Parameters:
3770 + F     - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3771 - icntl - index of MUMPS parameter array INFOG()
3772 
3773   Output Parameter:
3774 . ival - value of MUMPS INFOG(icntl)
3775 
3776   Level: beginner
3777 
3778 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
3779 @*/
3780 PetscErrorCode MatMumpsGetInfog(Mat F, PetscInt icntl, PetscInt *ival)
3781 {
3782   PetscFunctionBegin;
3783   PetscValidType(F, 1);
3784   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3785   PetscAssertPointer(ival, 3);
3786   PetscUseMethod(F, "MatMumpsGetInfog_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival));
3787   PetscFunctionReturn(PETSC_SUCCESS);
3788 }
3789 
3790 /*@
3791   MatMumpsGetRinfo - Get MUMPS parameter RINFO() <https://mumps-solver.org/index.php?page=doc>
3792 
3793   Logically Collective
3794 
3795   Input Parameters:
3796 + F     - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3797 - icntl - index of MUMPS parameter array RINFO()
3798 
3799   Output Parameter:
3800 . val - value of MUMPS RINFO(icntl)
3801 
3802   Level: beginner
3803 
3804 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfog()`
3805 @*/
3806 PetscErrorCode MatMumpsGetRinfo(Mat F, PetscInt icntl, PetscReal *val)
3807 {
3808   PetscFunctionBegin;
3809   PetscValidType(F, 1);
3810   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3811   PetscAssertPointer(val, 3);
3812   PetscUseMethod(F, "MatMumpsGetRinfo_C", (Mat, PetscInt, PetscReal *), (F, icntl, val));
3813   PetscFunctionReturn(PETSC_SUCCESS);
3814 }
3815 
3816 /*@
3817   MatMumpsGetRinfog - Get MUMPS parameter RINFOG() <https://mumps-solver.org/index.php?page=doc>
3818 
3819   Logically Collective
3820 
3821   Input Parameters:
3822 + F     - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3823 - icntl - index of MUMPS parameter array RINFOG()
3824 
3825   Output Parameter:
3826 . val - value of MUMPS RINFOG(icntl)
3827 
3828   Level: beginner
3829 
3830 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`
3831 @*/
3832 PetscErrorCode MatMumpsGetRinfog(Mat F, PetscInt icntl, PetscReal *val)
3833 {
3834   PetscFunctionBegin;
3835   PetscValidType(F, 1);
3836   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3837   PetscAssertPointer(val, 3);
3838   PetscUseMethod(F, "MatMumpsGetRinfog_C", (Mat, PetscInt, PetscReal *), (F, icntl, val));
3839   PetscFunctionReturn(PETSC_SUCCESS);
3840 }
3841 
3842 /*@
3843   MatMumpsGetNullPivots - Get MUMPS parameter PIVNUL_LIST() <https://mumps-solver.org/index.php?page=doc>
3844 
3845   Logically Collective
3846 
3847   Input Parameter:
3848 . F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3849 
3850   Output Parameters:
3851 + size  - local size of the array. The size of the array is non-zero only on MPI rank 0
3852 - array - array of rows with null pivot, these rows follow 0-based indexing. The array gets allocated within the function and the user is responsible
3853           for freeing this array.
3854 
3855   Level: beginner
3856 
3857 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`
3858 @*/
3859 PetscErrorCode MatMumpsGetNullPivots(Mat F, PetscInt *size, PetscInt **array)
3860 {
3861   PetscFunctionBegin;
3862   PetscValidType(F, 1);
3863   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3864   PetscAssertPointer(size, 2);
3865   PetscAssertPointer(array, 3);
3866   PetscUseMethod(F, "MatMumpsGetNullPivots_C", (Mat, PetscInt *, PetscInt **), (F, size, array));
3867   PetscFunctionReturn(PETSC_SUCCESS);
3868 }
3869 
3870 /*MC
3871   MATSOLVERMUMPS -  A matrix type providing direct solvers (LU and Cholesky) for
3872   MPI distributed and sequential matrices via the external package MUMPS <https://mumps-solver.org/index.php?page=doc>
3873 
3874   Works with `MATAIJ` and `MATSBAIJ` matrices
3875 
3876   Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS
3877 
3878   Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode.
3879   See details below.
3880 
3881   Use `-pc_type cholesky` or `lu` `-pc_factor_mat_solver_type mumps` to use this direct solver
3882 
3883   Options Database Keys:
3884 +  -mat_mumps_icntl_1  - ICNTL(1): output stream for error messages
3885 .  -mat_mumps_icntl_2  - ICNTL(2): output stream for diagnostic printing, statistics, and warning
3886 .  -mat_mumps_icntl_3  - ICNTL(3): output stream for global information, collected on the host
3887 .  -mat_mumps_icntl_4  - ICNTL(4): level of printing (0 to 4)
3888 .  -mat_mumps_icntl_6  - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)
3889 .  -mat_mumps_icntl_7  - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto
3890                           Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only)
3891 .  -mat_mumps_icntl_8  - ICNTL(8): scaling strategy (-2 to 8 or 77)
3892 .  -mat_mumps_icntl_10 - ICNTL(10): max num of refinements
3893 .  -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view)
3894 .  -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)
3895 .  -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting
3896 .  -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space
3897 .  -mat_mumps_icntl_15 - ICNTL(15): compression of the input matrix resulting from a block format
3898 .  -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement
3899 .  -mat_mumps_icntl_20 - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS
3900 .  -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)
3901 .  -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor
3902 .  -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1)
3903 .  -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis
3904 .  -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix
3905 .  -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ICNTL(7) ordering, or 2 for parallel analysis and ICNTL(29) ordering
3906 .  -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis
3907 .  -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A)
3908 .  -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization
3909 .  -mat_mumps_icntl_33 - ICNTL(33): compute determinant
3910 .  -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature
3911 .  -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant
3912 .  -mat_mumps_icntl_37 - ICNTL(37): compression of the contribution blocks (CB)
3913 .  -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR
3914 .  -mat_mumps_icntl_48 - ICNTL(48): multithreading with tree parallelism
3915 .  -mat_mumps_icntl_49 - ICNTL(49): compact workarray at the end of factorization phase
3916 .  -mat_mumps_icntl_58 - ICNTL(58): options for symbolic factorization
3917 .  -mat_mumps_cntl_1   - CNTL(1): relative pivoting threshold
3918 .  -mat_mumps_cntl_2   - CNTL(2): stopping criterion of refinement
3919 .  -mat_mumps_cntl_3   - CNTL(3): absolute pivoting threshold
3920 .  -mat_mumps_cntl_4   - CNTL(4): value for static pivoting
3921 .  -mat_mumps_cntl_5   - CNTL(5): fixation for null pivots
3922 .  -mat_mumps_cntl_7   - CNTL(7): precision of the dropping parameter used during BLR factorization
3923 -  -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS.
3924                                     Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual.
3925 
3926   Level: beginner
3927 
3928   Notes:
3929   MUMPS Cholesky does not handle (complex) Hermitian matrices (see User's Guide at <https://mumps-solver.org/index.php?page=doc>) so using it will
3930   error if the matrix is Hermitian.
3931 
3932   When used within a `KSP`/`PC` solve the options are prefixed with that of the `PC`. Otherwise one can set the options prefix by calling
3933   `MatSetOptionsPrefixFactor()` on the matrix from which the factor was obtained or `MatSetOptionsPrefix()` on the factor matrix.
3934 
3935   When a MUMPS factorization fails inside a KSP solve, for example with a `KSP_DIVERGED_PC_FAILED`, one can find the MUMPS information about
3936   the failure with
3937 .vb
3938           KSPGetPC(ksp,&pc);
3939           PCFactorGetMatrix(pc,&mat);
3940           MatMumpsGetInfo(mat,....);
3941           MatMumpsGetInfog(mat,....); etc.
3942 .ve
3943   Or run with `-ksp_error_if_not_converged` and the program will be stopped and the information printed in the error message.
3944 
3945   MUMPS provides 64-bit integer support in two build modes:
3946   full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and
3947   requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI).
3948 
3949   selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices,
3950   MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and
3951   columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit
3952   integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS.
3953 
3954   With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc.
3955 
3956   Two modes to run MUMPS/PETSc with OpenMP
3957 .vb
3958    Set `OMP_NUM_THREADS` and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP
3959    threads per rank, then you may use "export `OMP_NUM_THREADS` = 16 && mpirun -n 4 ./test".
3960 .ve
3961 
3962 .vb
3963    `-mat_mumps_use_omp_threads` [m] and run your code with as many MPI ranks as the number of cores. For example,
3964    if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16"
3965 .ve
3966 
3967    To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part
3968    (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with `--with-openmp` `--download-hwloc`
3969    (or `--with-hwloc`), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS
3970    libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS
3971    (PETSc will automatically try to utilized a threaded BLAS if `--with-openmp` is provided).
3972 
3973    If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI
3974    processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of
3975    size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm
3976    are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set
3977    by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs.
3978    In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets,
3979    if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind
3980    MPI ranks to cores, then with `-mat_mumps_use_omp_threads` 16, a master rank (and threads it spawns) will use half cores in socket 0, and half
3981    cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the
3982    problem will not happen. Therefore, when you use `-mat_mumps_use_omp_threads`, you need to keep an eye on your MPI rank mapping and CPU binding.
3983    For example, with the Slurm job scheduler, one can use srun `--cpu-bind`=verbose -m block:block to map consecutive MPI ranks to sockets and
3984    examine the mapping result.
3985 
3986    PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set `OMP_PROC_BIND` and `OMP_PLACES` in job scripts,
3987    for example, export `OMP_PLACES`=threads and export `OMP_PROC_BIND`=spread. One does not need to export `OMP_NUM_THREADS`=m in job scripts as PETSc
3988    calls `omp_set_num_threads`(m) internally before calling MUMPS.
3989 
3990    See {cite}`heroux2011bi` and {cite}`gutierrez2017accommodating`
3991 
3992 .seealso: [](ch_matrices), `Mat`, `PCFactorSetMatSolverType()`, `MatSolverType`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`, `MatMumpsSetBlk()`, `KSPGetPC()`, `PCFactorGetMatrix()`
3993 M*/
3994 
3995 static PetscErrorCode MatFactorGetSolverType_mumps(PETSC_UNUSED Mat A, MatSolverType *type)
3996 {
3997   PetscFunctionBegin;
3998   *type = MATSOLVERMUMPS;
3999   PetscFunctionReturn(PETSC_SUCCESS);
4000 }
4001 
4002 /* MatGetFactor for Seq and MPI AIJ matrices */
4003 static PetscErrorCode MatGetFactor_aij_mumps(Mat A, MatFactorType ftype, Mat *F)
4004 {
4005   Mat         B;
4006   Mat_MUMPS  *mumps;
4007   PetscBool   isSeqAIJ, isDiag, isDense;
4008   PetscMPIInt size;
4009 
4010   PetscFunctionBegin;
4011 #if defined(PETSC_USE_COMPLEX)
4012   if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) {
4013     PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n"));
4014     *F = NULL;
4015     PetscFunctionReturn(PETSC_SUCCESS);
4016   }
4017 #endif
4018   /* Create the factorization matrix */
4019   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isSeqAIJ));
4020   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATDIAGONAL, &isDiag));
4021   PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &isDense, MATSEQDENSE, MATMPIDENSE, NULL));
4022   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
4023   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
4024   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name));
4025   PetscCall(MatSetUp(B));
4026 
4027   PetscCall(PetscNew(&mumps));
4028 
4029   B->ops->view    = MatView_MUMPS;
4030   B->ops->getinfo = MatGetInfo_MUMPS;
4031 
4032   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
4033   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
4034   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
4035   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
4036   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
4037   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
4038   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
4039   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
4040   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
4041   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
4042   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
4043   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
4044   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
4045   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
4046   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS));
4047 
4048   if (ftype == MAT_FACTOR_LU) {
4049     B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
4050     B->factortype            = MAT_FACTOR_LU;
4051     if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij;
4052     else if (isDiag) mumps->ConvertToTriples = MatConvertToTriples_diagonal_xaij;
4053     else if (isDense) mumps->ConvertToTriples = MatConvertToTriples_dense_xaij;
4054     else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij;
4055     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]));
4056     mumps->sym = 0;
4057   } else {
4058     B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
4059     B->factortype                  = MAT_FACTOR_CHOLESKY;
4060     if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij;
4061     else if (isDiag) mumps->ConvertToTriples = MatConvertToTriples_diagonal_xaij;
4062     else if (isDense) mumps->ConvertToTriples = MatConvertToTriples_dense_xaij;
4063     else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij;
4064     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY]));
4065 #if defined(PETSC_USE_COMPLEX)
4066     mumps->sym = 2;
4067 #else
4068     if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1;
4069     else mumps->sym = 2;
4070 #endif
4071   }
4072 
4073   /* set solvertype */
4074   PetscCall(PetscFree(B->solvertype));
4075   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
4076   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
4077   if (size == 1) {
4078     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
4079     B->canuseordering = PETSC_TRUE;
4080   }
4081   B->ops->destroy = MatDestroy_MUMPS;
4082   B->data         = (void *)mumps;
4083 
4084   *F               = B;
4085   mumps->id.job    = JOB_NULL;
4086   mumps->ICNTL_pre = NULL;
4087   mumps->CNTL_pre  = NULL;
4088   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
4089   PetscFunctionReturn(PETSC_SUCCESS);
4090 }
4091 
4092 /* MatGetFactor for Seq and MPI SBAIJ matrices */
4093 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A, PETSC_UNUSED MatFactorType ftype, Mat *F)
4094 {
4095   Mat         B;
4096   Mat_MUMPS  *mumps;
4097   PetscBool   isSeqSBAIJ;
4098   PetscMPIInt size;
4099 
4100   PetscFunctionBegin;
4101 #if defined(PETSC_USE_COMPLEX)
4102   if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) {
4103     PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n"));
4104     *F = NULL;
4105     PetscFunctionReturn(PETSC_SUCCESS);
4106   }
4107 #endif
4108   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
4109   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
4110   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name));
4111   PetscCall(MatSetUp(B));
4112 
4113   PetscCall(PetscNew(&mumps));
4114   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &isSeqSBAIJ));
4115   if (isSeqSBAIJ) {
4116     mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij;
4117   } else {
4118     mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij;
4119   }
4120 
4121   B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
4122   B->ops->view                   = MatView_MUMPS;
4123   B->ops->getinfo                = MatGetInfo_MUMPS;
4124 
4125   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
4126   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
4127   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
4128   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
4129   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
4130   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
4131   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
4132   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
4133   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
4134   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
4135   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
4136   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
4137   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
4138   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
4139   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS));
4140 
4141   B->factortype = MAT_FACTOR_CHOLESKY;
4142 #if defined(PETSC_USE_COMPLEX)
4143   mumps->sym = 2;
4144 #else
4145   if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1;
4146   else mumps->sym = 2;
4147 #endif
4148 
4149   /* set solvertype */
4150   PetscCall(PetscFree(B->solvertype));
4151   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
4152   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
4153   if (size == 1) {
4154     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
4155     B->canuseordering = PETSC_TRUE;
4156   }
4157   PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY]));
4158   B->ops->destroy = MatDestroy_MUMPS;
4159   B->data         = (void *)mumps;
4160 
4161   *F               = B;
4162   mumps->id.job    = JOB_NULL;
4163   mumps->ICNTL_pre = NULL;
4164   mumps->CNTL_pre  = NULL;
4165   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
4166   PetscFunctionReturn(PETSC_SUCCESS);
4167 }
4168 
4169 static PetscErrorCode MatGetFactor_baij_mumps(Mat A, MatFactorType ftype, Mat *F)
4170 {
4171   Mat         B;
4172   Mat_MUMPS  *mumps;
4173   PetscBool   isSeqBAIJ;
4174   PetscMPIInt size;
4175 
4176   PetscFunctionBegin;
4177   /* Create the factorization matrix */
4178   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &isSeqBAIJ));
4179   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
4180   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
4181   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name));
4182   PetscCall(MatSetUp(B));
4183 
4184   PetscCall(PetscNew(&mumps));
4185   PetscCheck(ftype == MAT_FACTOR_LU, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead");
4186   B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS;
4187   B->factortype            = MAT_FACTOR_LU;
4188   if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij;
4189   else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij;
4190   mumps->sym = 0;
4191   PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]));
4192 
4193   B->ops->view    = MatView_MUMPS;
4194   B->ops->getinfo = MatGetInfo_MUMPS;
4195 
4196   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
4197   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
4198   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
4199   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
4200   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
4201   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
4202   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
4203   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
4204   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
4205   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
4206   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
4207   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
4208   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
4209   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
4210   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS));
4211 
4212   /* set solvertype */
4213   PetscCall(PetscFree(B->solvertype));
4214   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
4215   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
4216   if (size == 1) {
4217     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
4218     B->canuseordering = PETSC_TRUE;
4219   }
4220   B->ops->destroy = MatDestroy_MUMPS;
4221   B->data         = (void *)mumps;
4222 
4223   *F               = B;
4224   mumps->id.job    = JOB_NULL;
4225   mumps->ICNTL_pre = NULL;
4226   mumps->CNTL_pre  = NULL;
4227   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
4228   PetscFunctionReturn(PETSC_SUCCESS);
4229 }
4230 
4231 /* MatGetFactor for Seq and MPI SELL matrices */
4232 static PetscErrorCode MatGetFactor_sell_mumps(Mat A, MatFactorType ftype, Mat *F)
4233 {
4234   Mat         B;
4235   Mat_MUMPS  *mumps;
4236   PetscBool   isSeqSELL;
4237   PetscMPIInt size;
4238 
4239   PetscFunctionBegin;
4240   /* Create the factorization matrix */
4241   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSELL, &isSeqSELL));
4242   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
4243   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
4244   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name));
4245   PetscCall(MatSetUp(B));
4246 
4247   PetscCall(PetscNew(&mumps));
4248 
4249   B->ops->view    = MatView_MUMPS;
4250   B->ops->getinfo = MatGetInfo_MUMPS;
4251 
4252   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
4253   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
4254   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
4255   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
4256   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
4257   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
4258   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
4259   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
4260   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
4261   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
4262   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
4263   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
4264 
4265   PetscCheck(ftype == MAT_FACTOR_LU, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented");
4266   B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
4267   B->factortype            = MAT_FACTOR_LU;
4268   PetscCheck(isSeqSELL, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented");
4269   mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij;
4270   mumps->sym              = 0;
4271   PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]));
4272 
4273   /* set solvertype */
4274   PetscCall(PetscFree(B->solvertype));
4275   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
4276   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
4277   if (size == 1) {
4278     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization  */
4279     B->canuseordering = PETSC_TRUE;
4280   }
4281   B->ops->destroy = MatDestroy_MUMPS;
4282   B->data         = (void *)mumps;
4283 
4284   *F               = B;
4285   mumps->id.job    = JOB_NULL;
4286   mumps->ICNTL_pre = NULL;
4287   mumps->CNTL_pre  = NULL;
4288   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
4289   PetscFunctionReturn(PETSC_SUCCESS);
4290 }
4291 
4292 /* MatGetFactor for MATNEST matrices */
4293 static PetscErrorCode MatGetFactor_nest_mumps(Mat A, MatFactorType ftype, Mat *F)
4294 {
4295   Mat         B, **mats;
4296   Mat_MUMPS  *mumps;
4297   PetscInt    nr, nc;
4298   PetscMPIInt size;
4299   PetscBool   flg = PETSC_TRUE;
4300 
4301   PetscFunctionBegin;
4302 #if defined(PETSC_USE_COMPLEX)
4303   if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) {
4304     PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n"));
4305     *F = NULL;
4306     PetscFunctionReturn(PETSC_SUCCESS);
4307   }
4308 #endif
4309 
4310   /* Return if some condition is not satisfied */
4311   *F = NULL;
4312   PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats));
4313   if (ftype == MAT_FACTOR_CHOLESKY) {
4314     IS       *rows, *cols;
4315     PetscInt *m, *M;
4316 
4317     PetscCheck(nr == nc, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MAT_FACTOR_CHOLESKY not supported for nest sizes %" PetscInt_FMT " != %" PetscInt_FMT ". Use MAT_FACTOR_LU.", nr, nc);
4318     PetscCall(PetscMalloc2(nr, &rows, nc, &cols));
4319     PetscCall(MatNestGetISs(A, rows, cols));
4320     for (PetscInt r = 0; flg && r < nr; r++) PetscCall(ISEqualUnsorted(rows[r], cols[r], &flg));
4321     if (!flg) {
4322       PetscCall(PetscFree2(rows, cols));
4323       PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for unequal row and column maps. Use MAT_FACTOR_LU.\n"));
4324       PetscFunctionReturn(PETSC_SUCCESS);
4325     }
4326     PetscCall(PetscMalloc2(nr, &m, nr, &M));
4327     for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetMinMax(rows[r], &m[r], &M[r]));
4328     for (PetscInt r = 0; flg && r < nr; r++)
4329       for (PetscInt k = r + 1; flg && k < nr; k++)
4330         if ((m[k] <= m[r] && m[r] <= M[k]) || (m[k] <= M[r] && M[r] <= M[k])) flg = PETSC_FALSE;
4331     PetscCall(PetscFree2(m, M));
4332     PetscCall(PetscFree2(rows, cols));
4333     if (!flg) {
4334       PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for intersecting row maps. Use MAT_FACTOR_LU.\n"));
4335       PetscFunctionReturn(PETSC_SUCCESS);
4336     }
4337   }
4338 
4339   for (PetscInt r = 0; r < nr; r++) {
4340     for (PetscInt c = 0; c < nc; c++) {
4341       Mat       sub = mats[r][c];
4342       PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isDiag, isDense;
4343 
4344       if (!sub || (ftype == MAT_FACTOR_CHOLESKY && c < r)) continue;
4345       PetscCall(MatGetTranspose_TransposeVirtual(&sub, NULL, NULL, NULL, NULL));
4346       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ));
4347       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ));
4348       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ));
4349       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ));
4350       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ));
4351       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ));
4352       PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATDIAGONAL, &isDiag));
4353       PetscCall(PetscObjectTypeCompareAny((PetscObject)sub, &isDense, MATSEQDENSE, MATMPIDENSE, NULL));
4354       if (ftype == MAT_FACTOR_CHOLESKY) {
4355         if (r == c) {
4356           if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isSeqSBAIJ && !isMPISBAIJ && !isDiag && !isDense) {
4357             PetscCall(PetscInfo(sub, "MAT_FACTOR_CHOLESKY not supported for diagonal block of type %s.\n", ((PetscObject)sub)->type_name));
4358             flg = PETSC_FALSE;
4359           }
4360         } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isDiag && !isDense) {
4361           PetscCall(PetscInfo(sub, "MAT_FACTOR_CHOLESKY not supported for off-diagonal block of type %s.\n", ((PetscObject)sub)->type_name));
4362           flg = PETSC_FALSE;
4363         }
4364       } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isDiag && !isDense) {
4365         PetscCall(PetscInfo(sub, "MAT_FACTOR_LU not supported for block of type %s.\n", ((PetscObject)sub)->type_name));
4366         flg = PETSC_FALSE;
4367       }
4368     }
4369   }
4370   if (!flg) PetscFunctionReturn(PETSC_SUCCESS);
4371 
4372   /* Create the factorization matrix */
4373   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
4374   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
4375   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name));
4376   PetscCall(MatSetUp(B));
4377 
4378   PetscCall(PetscNew(&mumps));
4379 
4380   B->ops->view    = MatView_MUMPS;
4381   B->ops->getinfo = MatGetInfo_MUMPS;
4382 
4383   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
4384   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
4385   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
4386   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
4387   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
4388   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
4389   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
4390   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
4391   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
4392   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
4393   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
4394   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
4395   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
4396   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
4397   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS));
4398 
4399   if (ftype == MAT_FACTOR_LU) {
4400     B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
4401     B->factortype            = MAT_FACTOR_LU;
4402     mumps->sym               = 0;
4403   } else {
4404     B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
4405     B->factortype                  = MAT_FACTOR_CHOLESKY;
4406 #if defined(PETSC_USE_COMPLEX)
4407     mumps->sym = 2;
4408 #else
4409     if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1;
4410     else mumps->sym = 2;
4411 #endif
4412   }
4413   mumps->ConvertToTriples = MatConvertToTriples_nest_xaij;
4414   PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[ftype]));
4415 
4416   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
4417   if (size == 1) {
4418     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
4419     B->canuseordering = PETSC_TRUE;
4420   }
4421 
4422   /* set solvertype */
4423   PetscCall(PetscFree(B->solvertype));
4424   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
4425   B->ops->destroy = MatDestroy_MUMPS;
4426   B->data         = (void *)mumps;
4427 
4428   *F               = B;
4429   mumps->id.job    = JOB_NULL;
4430   mumps->ICNTL_pre = NULL;
4431   mumps->CNTL_pre  = NULL;
4432   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
4433   PetscFunctionReturn(PETSC_SUCCESS);
4434 }
4435 
4436 PETSC_INTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void)
4437 {
4438   PetscFunctionBegin;
4439   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
4440   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
4441   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps));
4442   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps));
4443   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPISBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps));
4444   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
4445   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
4446   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps));
4447   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps));
4448   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps));
4449   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSELL, MAT_FACTOR_LU, MatGetFactor_sell_mumps));
4450   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATDIAGONAL, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
4451   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATDIAGONAL, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
4452   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQDENSE, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
4453   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQDENSE, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
4454   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIDENSE, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
4455   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIDENSE, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
4456   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_LU, MatGetFactor_nest_mumps));
4457   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_CHOLESKY, MatGetFactor_nest_mumps));
4458   PetscFunctionReturn(PETSC_SUCCESS);
4459 }
4460