xref: /petsc/src/mat/impls/aij/mpi/mumps/mumps.c (revision e60de12f8eb415fb12bea4d986cc0f41d687d6da)
1 /*
2     Provides an interface to the MUMPS sparse solver
3 */
4 #include <petscpkg_version.h>
5 #include <petscsf.h>
6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I  "petscmat.h"  I*/
7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
8 #include <../src/mat/impls/sell/mpi/mpisell.h>
9 #include <petsc/private/vecimpl.h>
10 
11 #define MUMPS_MANUALS "(see users manual https://mumps-solver.org/index.php?page=doc \"Error and warning diagnostics\")"
12 
13 EXTERN_C_BEGIN
14 #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
15   #include <cmumps_c.h>
16   #include <zmumps_c.h>
17   #include <smumps_c.h>
18   #include <dmumps_c.h>
19 #else
20   #if defined(PETSC_USE_COMPLEX)
21     #if defined(PETSC_USE_REAL_SINGLE)
22       #include <cmumps_c.h>
23       #define MUMPS_c     cmumps_c
24       #define MumpsScalar CMUMPS_COMPLEX
25     #else
26       #include <zmumps_c.h>
27       #define MUMPS_c     zmumps_c
28       #define MumpsScalar ZMUMPS_COMPLEX
29     #endif
30   #else
31     #if defined(PETSC_USE_REAL_SINGLE)
32       #include <smumps_c.h>
33       #define MUMPS_c     smumps_c
34       #define MumpsScalar SMUMPS_REAL
35     #else
36       #include <dmumps_c.h>
37       #define MUMPS_c     dmumps_c
38       #define MumpsScalar DMUMPS_REAL
39     #endif
40   #endif
41 #endif
42 #if defined(PETSC_USE_COMPLEX)
43   #if defined(PETSC_USE_REAL_SINGLE)
44     #define MUMPS_STRUC_C CMUMPS_STRUC_C
45   #else
46     #define MUMPS_STRUC_C ZMUMPS_STRUC_C
47   #endif
48 #else
49   #if defined(PETSC_USE_REAL_SINGLE)
50     #define MUMPS_STRUC_C SMUMPS_STRUC_C
51   #else
52     #define MUMPS_STRUC_C DMUMPS_STRUC_C
53   #endif
54 #endif
55 EXTERN_C_END
56 
57 #define JOB_INIT         -1
58 #define JOB_NULL         0
59 #define JOB_FACTSYMBOLIC 1
60 #define JOB_FACTNUMERIC  2
61 #define JOB_SOLVE        3
62 #define JOB_END          -2
63 
64 /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for
65    number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the
66    naming convention in PetscMPIInt, PetscBLASInt etc.
67 */
68 typedef MUMPS_INT PetscMUMPSInt;
69 
70 #if PETSC_PKG_MUMPS_VERSION_GE(5, 3, 0)
71   #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */
72     #error "PETSc has not been tested with full 64-bit MUMPS and we choose to error out"
73   #endif
74 #else
75   #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */
76     #error "PETSc has not been tested with full 64-bit MUMPS and we choose to error out"
77   #endif
78 #endif
79 
80 #define MPIU_MUMPSINT       MPI_INT
81 #define PETSC_MUMPS_INT_MAX 2147483647
82 #define PETSC_MUMPS_INT_MIN -2147483648
83 
84 /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/
85 static inline PetscErrorCode PetscMUMPSIntCast(PetscCount a, PetscMUMPSInt *b)
86 {
87   PetscFunctionBegin;
88 #if PetscDefined(USE_64BIT_INDICES)
89   PetscAssert(a <= PETSC_MUMPS_INT_MAX && a >= PETSC_MUMPS_INT_MIN, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt");
90 #endif
91   *b = (PetscMUMPSInt)a;
92   PetscFunctionReturn(PETSC_SUCCESS);
93 }
94 
95 /* Put these utility routines here since they are only used in this file */
96 static inline PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems PetscOptionsObject, const char opt[], const char text[], const char man[], PetscMUMPSInt currentvalue, PetscMUMPSInt *value, PetscBool *set, PetscMUMPSInt lb, PetscMUMPSInt ub)
97 {
98   PetscInt  myval;
99   PetscBool myset;
100 
101   PetscFunctionBegin;
102   /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */
103   PetscCall(PetscOptionsInt_Private(PetscOptionsObject, opt, text, man, (PetscInt)currentvalue, &myval, &myset, lb, ub));
104   if (myset) PetscCall(PetscMUMPSIntCast(myval, value));
105   if (set) *set = myset;
106   PetscFunctionReturn(PETSC_SUCCESS);
107 }
108 #define PetscOptionsMUMPSInt(a, b, c, d, e, f) PetscOptionsMUMPSInt_Private(PetscOptionsObject, a, b, c, d, e, f, PETSC_MUMPS_INT_MIN, PETSC_MUMPS_INT_MAX)
109 
110 // An abstract type for specific MUMPS types {S,D,C,Z}MUMPS_STRUC_C.
111 //
112 // With the abstract (outer) type, we can write shared code. We call MUMPS through a type-to-be-determined inner field within the abstract type.
113 // Before/after calling MUMPS, we need to copy in/out fields between the outer and the inner, which seems expensive. But note that the large fixed size
114 // arrays within the types are directly linked. At the end, we only need to copy ~20 intergers/pointers, which is doable. See PreMumpsCall()/PostMumpsCall().
115 //
116 // Not all fields in the specific types are exposed in the abstract type. We only need those used by the PETSc/MUMPS interface.
117 // Notably, DMUMPS_COMPLEX* and DMUMPS_REAL* fields are now declared as void *. Their type will be determined by the the actual precision to be used.
118 // Also note that we added some *_len fields not in specific types to track sizes of those MumpsScalar buffers.
119 typedef struct {
120   PetscPrecision precision;   // precision used by MUMPS
121   void          *internal_id; // the data structure passed to MUMPS, whose actual type {S,D,C,Z}MUMPS_STRUC_C is to be decided by precision and PETSc's use of complex
122 
123   // aliased fields from internal_id, so that we can use XMUMPS_STRUC_C to write shared code across different precisions.
124   MUMPS_INT  sym, par, job;
125   MUMPS_INT  comm_fortran; /* Fortran communicator */
126   MUMPS_INT *icntl;
127   void      *cntl; // MumpsReal, fixed size array
128   MUMPS_INT  n;
129   MUMPS_INT  nblk;
130 
131   /* Assembled entry */
132   MUMPS_INT8 nnz;
133   MUMPS_INT *irn;
134   MUMPS_INT *jcn;
135   void      *a; // MumpsScalar, centralized input
136   PetscCount a_len;
137 
138   /* Distributed entry */
139   MUMPS_INT8 nnz_loc;
140   MUMPS_INT *irn_loc;
141   MUMPS_INT *jcn_loc;
142   void      *a_loc; // MumpsScalar, distributed input
143   PetscCount a_loc_len;
144 
145   /* Matrix by blocks */
146   MUMPS_INT *blkptr;
147   MUMPS_INT *blkvar;
148 
149   /* Ordering, if given by user */
150   MUMPS_INT *perm_in;
151 
152   /* RHS, solution, ouptput data and statistics */
153   void      *rhs, *redrhs, *rhs_sparse, *sol_loc, *rhs_loc;                 // MumpsScalar buffers
154   PetscCount rhs_len, redrhs_len, rhs_sparse_len, sol_loc_len, rhs_loc_len; // length of buffers (in MumpsScalar) IF allocated in a different precision than PetscScalar
155 
156   MUMPS_INT *irhs_sparse, *irhs_ptr, *isol_loc, *irhs_loc;
157   MUMPS_INT  nrhs, lrhs, lredrhs, nz_rhs, lsol_loc, nloc_rhs, lrhs_loc;
158   // MUMPS_INT  nsol_loc; // introduced in MUMPS-5.7, but PETSc doesn't use it; would cause compile errors with the widely used 5.6. If you add it, must also update PreMumpsCall() and guard this with #if PETSC_PKG_MUMPS_VERSION_GE(5, 7, 0)
159   MUMPS_INT  schur_lld;
160   MUMPS_INT *info, *infog;   // fixed size array
161   void      *rinfo, *rinfog; // MumpsReal, fixed size array
162 
163   /* Null space */
164   MUMPS_INT *pivnul_list; // allocated by MUMPS!
165   MUMPS_INT *mapping;     // allocated by MUMPS!
166 
167   /* Schur */
168   MUMPS_INT  size_schur;
169   MUMPS_INT *listvar_schur;
170   void      *schur; // MumpsScalar
171   PetscCount schur_len;
172 
173   /* For out-of-core */
174   char *ooc_tmpdir; // fixed size array
175   char *ooc_prefix; // fixed size array
176 } XMUMPS_STRUC_C;
177 
178 // Note: fixed-size arrays are allocated by MUMPS; redirect them to the outer struct
179 #define AllocatInternalID(MUMPS_STRUC_T, outer) \
180   do { \
181     MUMPS_STRUC_T *inner; \
182     PetscCall(PetscNew(&inner)); \
183     outer->icntl      = inner->icntl; \
184     outer->cntl       = inner->cntl; \
185     outer->info       = inner->info; \
186     outer->infog      = inner->infog; \
187     outer->rinfo      = inner->rinfo; \
188     outer->rinfog     = inner->rinfog; \
189     outer->ooc_tmpdir = inner->ooc_tmpdir; \
190     outer->ooc_prefix = inner->ooc_prefix; \
191     /* the three field should never change after init */ \
192     inner->comm_fortran = outer->comm_fortran; \
193     inner->par          = outer->par; \
194     inner->sym          = outer->sym; \
195     outer->internal_id  = inner; \
196   } while (0)
197 
198 // Allocate the internal [SDCZ]MUMPS_STRUC_C ID data structure in the given <precision>, and link fields of the outer and the inner
199 static inline PetscErrorCode MatMumpsAllocateInternalID(XMUMPS_STRUC_C *outer, PetscPrecision precision)
200 {
201   PetscFunctionBegin;
202   outer->precision = precision;
203 #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
204   #if defined(PETSC_USE_COMPLEX)
205   if (precision == PETSC_PRECISION_SINGLE) AllocatInternalID(CMUMPS_STRUC_C, outer);
206   else AllocatInternalID(ZMUMPS_STRUC_C, outer);
207   #else
208   if (precision == PETSC_PRECISION_SINGLE) AllocatInternalID(SMUMPS_STRUC_C, outer);
209   else AllocatInternalID(DMUMPS_STRUC_C, outer);
210   #endif
211 #else
212   AllocatInternalID(MUMPS_STRUC_C, outer);
213 #endif
214   PetscFunctionReturn(PETSC_SUCCESS);
215 }
216 
217 #define FreeInternalIDFields(MUMPS_STRUC_T, outer) \
218   do { \
219     MUMPS_STRUC_T *inner = (MUMPS_STRUC_T *)(outer)->internal_id; \
220     PetscCall(PetscFree(inner->a)); \
221     PetscCall(PetscFree(inner->a_loc)); \
222     PetscCall(PetscFree(inner->redrhs)); \
223     PetscCall(PetscFree(inner->rhs)); \
224     PetscCall(PetscFree(inner->rhs_sparse)); \
225     PetscCall(PetscFree(inner->rhs_loc)); \
226     PetscCall(PetscFree(inner->sol_loc)); \
227     PetscCall(PetscFree(inner->schur)); \
228   } while (0)
229 
230 static inline PetscErrorCode MatMumpsFreeInternalID(XMUMPS_STRUC_C *outer)
231 {
232   PetscFunctionBegin;
233   if (outer->internal_id) { // sometimes, the inner is never created before we destroy the outer
234 #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
235     const PetscPrecision mumps_precision = outer->precision;
236     if (mumps_precision != PETSC_SCALAR_PRECISION) { // Free internal buffers if we used mixed precision
237   #if defined(PETSC_USE_COMPLEX)
238       if (mumps_precision == PETSC_PRECISION_SINGLE) FreeInternalIDFields(CMUMPS_STRUC_C, outer);
239       else FreeInternalIDFields(ZMUMPS_STRUC_C, outer);
240   #else
241       if (mumps_precision == PETSC_PRECISION_SINGLE) FreeInternalIDFields(SMUMPS_STRUC_C, outer);
242       else FreeInternalIDFields(DMUMPS_STRUC_C, outer);
243   #endif
244     }
245 #endif
246     PetscCall(PetscFree(outer->internal_id));
247   }
248   PetscFunctionReturn(PETSC_SUCCESS);
249 }
250 
251 // Make a companion MumpsScalar array (with a given PetscScalar array), to hold at least <n> MumpsScalars in the given <precision> and return the address at <ma>.
252 // <convert> indicates if we need to convert PetscScalars to MumpsScalars after allocating the MumpsScalar array.
253 // (For bravity, we use <ma> for array address and <m> for its length in MumpsScalar, though in code they should be <*ma> and <*m>)
254 // If <ma> already points to a buffer/array, on input <m> should be its length. Note the buffer might be freed if it is not big enough for this request.
255 //
256 // The returned array is a companion, so how it is created depends on if PetscScalar and MumpsScalar are the same.
257 // 1) If they are different, a separate array will be made and its length and address will be provided at <m> and <ma> on output.
258 // 2) Otherwise, <pa> will be returned in <ma>, and <m> will be zero on output.
259 //
260 //
261 //   Input parameters:
262 // + convert   - whether to do PetscScalar to MumpsScalar conversion
263 // . n         - length of the PetscScalar array
264 // . pa        - [n]], points to the PetscScalar array
265 // . precision - precision of MumpsScalar
266 // . m         - on input, length of an existing MumpsScalar array <ma> if any, otherwise *m is just zero.
267 // - ma        - on input, an existing MumpsScalar array if any.
268 //
269 //   Output parameters:
270 // + m  - length of the MumpsScalar buffer at <ma> if MumpsScalar is different from PetscScalar, otherwise 0
271 // . ma - the MumpsScalar array, which could be an alias of <pa> when the two types are the same.
272 //
273 //   Note:
274 //    New memory, if allocated, is done via PetscMalloc1(), and is owned by caller.
275 static PetscErrorCode MatMumpsMakeMumpsScalarArray(PetscBool convert, PetscCount n, const PetscScalar *pa, PetscPrecision precision, PetscCount *m, void **ma)
276 {
277   PetscFunctionBegin;
278 #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
279   const PetscPrecision mumps_precision = precision;
280   PetscCheck(precision == PETSC_PRECISION_SINGLE || precision == PETSC_PRECISION_DOUBLE, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unsupported precicison (%d). Must be single or double", (int)precision);
281   #if defined(PETSC_USE_COMPLEX)
282   if (mumps_precision != PETSC_SCALAR_PRECISION) {
283     if (mumps_precision == PETSC_PRECISION_SINGLE) {
284       if (*m < n) {
285         PetscCall(PetscFree(*ma));
286         PetscCall(PetscMalloc1(n, (CMUMPS_COMPLEX **)ma));
287         *m = n;
288       }
289       if (convert) {
290         CMUMPS_COMPLEX *b = *(CMUMPS_COMPLEX **)ma;
291         for (PetscCount i = 0; i < n; i++) {
292           b[i].r = PetscRealPart(pa[i]);
293           b[i].i = PetscImaginaryPart(pa[i]);
294         }
295       }
296     } else {
297       if (*m < n) {
298         PetscCall(PetscFree(*ma));
299         PetscCall(PetscMalloc1(n, (ZMUMPS_COMPLEX **)ma));
300         *m = n;
301       }
302       if (convert) {
303         ZMUMPS_COMPLEX *b = *(ZMUMPS_COMPLEX **)ma;
304         for (PetscCount i = 0; i < n; i++) {
305           b[i].r = PetscRealPart(pa[i]);
306           b[i].i = PetscImaginaryPart(pa[i]);
307         }
308       }
309     }
310   }
311   #else
312   if (mumps_precision != PETSC_SCALAR_PRECISION) {
313     if (mumps_precision == PETSC_PRECISION_SINGLE) {
314       if (*m < n) {
315         PetscCall(PetscFree(*ma));
316         PetscCall(PetscMalloc1(n, (SMUMPS_REAL **)ma));
317         *m = n;
318       }
319       if (convert) {
320         SMUMPS_REAL *b = *(SMUMPS_REAL **)ma;
321         for (PetscCount i = 0; i < n; i++) b[i] = pa[i];
322       }
323     } else {
324       if (*m < n) {
325         PetscCall(PetscFree(*ma));
326         PetscCall(PetscMalloc1(n, (DMUMPS_REAL **)ma));
327         *m = n;
328       }
329       if (convert) {
330         DMUMPS_REAL *b = *(DMUMPS_REAL **)ma;
331         for (PetscCount i = 0; i < n; i++) b[i] = pa[i];
332       }
333     }
334   }
335   #endif
336   else
337 #endif
338   {
339     if (*m != 0) PetscCall(PetscFree(*ma)); // free existing buffer if any
340     *ma = (void *)pa;                       // same precision, make them alias
341     *m  = 0;
342   }
343   PetscFunctionReturn(PETSC_SUCCESS);
344 }
345 
346 // Cast a MumpsScalar array <ma[n]> in <mumps_precision> to a PetscScalar array at address <pa>.
347 //
348 // 1) If the two types are different, cast array elements.
349 // 2) Otherwise, this works as a memcpy; of course, if the two addresses are equal, it is a no-op.
350 static PetscErrorCode MatMumpsCastMumpsScalarArray(PetscCount n, PetscPrecision mumps_precision, const void *ma, PetscScalar *pa)
351 {
352   PetscFunctionBegin;
353 #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
354   if (mumps_precision != PETSC_SCALAR_PRECISION) {
355   #if defined(PETSC_USE_COMPLEX)
356     if (mumps_precision == PETSC_PRECISION_SINGLE) {
357       PetscReal         *a = (PetscReal *)pa;
358       const SMUMPS_REAL *b = (const SMUMPS_REAL *)ma;
359       for (PetscCount i = 0; i < 2 * n; i++) a[i] = b[i];
360     } else {
361       PetscReal         *a = (PetscReal *)pa;
362       const DMUMPS_REAL *b = (const DMUMPS_REAL *)ma;
363       for (PetscCount i = 0; i < 2 * n; i++) a[i] = b[i];
364     }
365   #else
366     if (mumps_precision == PETSC_PRECISION_SINGLE) {
367       const SMUMPS_REAL *b = (const SMUMPS_REAL *)ma;
368       for (PetscCount i = 0; i < n; i++) pa[i] = b[i];
369     } else {
370       const DMUMPS_REAL *b = (const DMUMPS_REAL *)ma;
371       for (PetscCount i = 0; i < n; i++) pa[i] = b[i];
372     }
373   #endif
374   } else
375 #endif
376     PetscCall(PetscArraycpy((PetscScalar *)pa, (PetscScalar *)ma, n));
377   PetscFunctionReturn(PETSC_SUCCESS);
378 }
379 
380 // Cast a PetscScalar array <pa[n]> to a MumpsScalar array in the given <mumps_precision> at address <ma>.
381 //
382 // 1) If the two types are different, cast array elements.
383 // 2) Otherwise, this works as a memcpy; of course, if the two addresses are equal, it is a no-op.
384 static PetscErrorCode MatMumpsCastPetscScalarArray(PetscCount n, const PetscScalar *pa, PetscPrecision mumps_precision, const void *ma)
385 {
386   PetscFunctionBegin;
387 #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
388   if (mumps_precision != PETSC_SCALAR_PRECISION) {
389   #if defined(PETSC_USE_COMPLEX)
390     if (mumps_precision == PETSC_PRECISION_SINGLE) {
391       CMUMPS_COMPLEX *b = (CMUMPS_COMPLEX *)ma;
392       for (PetscCount i = 0; i < n; i++) {
393         b[i].r = PetscRealPart(pa[i]);
394         b[i].i = PetscImaginaryPart(pa[i]);
395       }
396     } else {
397       ZMUMPS_COMPLEX *b = (ZMUMPS_COMPLEX *)ma;
398       for (PetscCount i = 0; i < n; i++) {
399         b[i].r = PetscRealPart(pa[i]);
400         b[i].i = PetscImaginaryPart(pa[i]);
401       }
402     }
403   #else
404     if (mumps_precision == PETSC_PRECISION_SINGLE) {
405       SMUMPS_REAL *b = (SMUMPS_REAL *)ma;
406       for (PetscCount i = 0; i < n; i++) b[i] = pa[i];
407     } else {
408       DMUMPS_REAL *b = (DMUMPS_REAL *)ma;
409       for (PetscCount i = 0; i < n; i++) b[i] = pa[i];
410     }
411   #endif
412   } else
413 #endif
414     PetscCall(PetscArraycpy((PetscScalar *)ma, (PetscScalar *)pa, n));
415   PetscFunctionReturn(PETSC_SUCCESS);
416 }
417 
418 static inline MPI_Datatype MPIU_MUMPSREAL(const XMUMPS_STRUC_C *id)
419 {
420   return id->precision == PETSC_PRECISION_DOUBLE ? MPI_DOUBLE : MPI_FLOAT;
421 }
422 
423 #define PreMumpsCall(inner, outer, mumpsscalar) \
424   do { \
425     inner->job           = outer->job; \
426     inner->n             = outer->n; \
427     inner->nblk          = outer->nblk; \
428     inner->nnz           = outer->nnz; \
429     inner->irn           = outer->irn; \
430     inner->jcn           = outer->jcn; \
431     inner->a             = (mumpsscalar *)outer->a; \
432     inner->nnz_loc       = outer->nnz_loc; \
433     inner->irn_loc       = outer->irn_loc; \
434     inner->jcn_loc       = outer->jcn_loc; \
435     inner->a_loc         = (mumpsscalar *)outer->a_loc; \
436     inner->blkptr        = outer->blkptr; \
437     inner->blkvar        = outer->blkvar; \
438     inner->perm_in       = outer->perm_in; \
439     inner->rhs           = (mumpsscalar *)outer->rhs; \
440     inner->redrhs        = (mumpsscalar *)outer->redrhs; \
441     inner->rhs_sparse    = (mumpsscalar *)outer->rhs_sparse; \
442     inner->sol_loc       = (mumpsscalar *)outer->sol_loc; \
443     inner->rhs_loc       = (mumpsscalar *)outer->rhs_loc; \
444     inner->irhs_sparse   = outer->irhs_sparse; \
445     inner->irhs_ptr      = outer->irhs_ptr; \
446     inner->isol_loc      = outer->isol_loc; \
447     inner->irhs_loc      = outer->irhs_loc; \
448     inner->nrhs          = outer->nrhs; \
449     inner->lrhs          = outer->lrhs; \
450     inner->lredrhs       = outer->lredrhs; \
451     inner->nz_rhs        = outer->nz_rhs; \
452     inner->lsol_loc      = outer->lsol_loc; \
453     inner->nloc_rhs      = outer->nloc_rhs; \
454     inner->lrhs_loc      = outer->lrhs_loc; \
455     inner->schur_lld     = outer->schur_lld; \
456     inner->size_schur    = outer->size_schur; \
457     inner->listvar_schur = outer->listvar_schur; \
458     inner->schur         = (mumpsscalar *)outer->schur; \
459   } while (0)
460 
461 #define PostMumpsCall(inner, outer) \
462   do { \
463     outer->pivnul_list = inner->pivnul_list; \
464     outer->mapping     = inner->mapping; \
465   } while (0)
466 
467 // Entry for PETSc to call mumps
468 static inline PetscErrorCode PetscCallMumps_Private(XMUMPS_STRUC_C *outer)
469 {
470   PetscFunctionBegin;
471 #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
472   #if defined(PETSC_USE_COMPLEX)
473   if (outer->precision == PETSC_PRECISION_SINGLE) {
474     CMUMPS_STRUC_C *inner = (CMUMPS_STRUC_C *)outer->internal_id;
475     PreMumpsCall(inner, outer, CMUMPS_COMPLEX);
476     PetscStackCallExternalVoid("cmumps_c", cmumps_c(inner));
477     PostMumpsCall(inner, outer);
478   } else {
479     ZMUMPS_STRUC_C *inner = (ZMUMPS_STRUC_C *)outer->internal_id;
480     PreMumpsCall(inner, outer, ZMUMPS_COMPLEX);
481     PetscStackCallExternalVoid("zmumps_c", zmumps_c(inner));
482     PostMumpsCall(inner, outer);
483   }
484   #else
485   if (outer->precision == PETSC_PRECISION_SINGLE) {
486     SMUMPS_STRUC_C *inner = (SMUMPS_STRUC_C *)outer->internal_id;
487     PreMumpsCall(inner, outer, SMUMPS_REAL);
488     PetscStackCallExternalVoid("smumps_c", smumps_c(inner));
489     PostMumpsCall(inner, outer);
490   } else {
491     DMUMPS_STRUC_C *inner = (DMUMPS_STRUC_C *)outer->internal_id;
492     PreMumpsCall(inner, outer, DMUMPS_REAL);
493     PetscStackCallExternalVoid("dmumps_c", dmumps_c(inner));
494     PostMumpsCall(inner, outer);
495   }
496   #endif
497 #else
498   MUMPS_STRUC_C *inner = (MUMPS_STRUC_C *)outer->internal_id;
499   PreMumpsCall(inner, outer, MumpsScalar);
500   PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(inner));
501   PostMumpsCall(inner, outer);
502 #endif
503   PetscFunctionReturn(PETSC_SUCCESS);
504 }
505 
506 /* macros s.t. indices match MUMPS documentation */
507 #define ICNTL(I) icntl[(I) - 1]
508 #define INFOG(I) infog[(I) - 1]
509 #define INFO(I)  info[(I) - 1]
510 
511 // Get a value from a MumpsScalar array, which is the <F> field in the struct of MUMPS_STRUC_C. The value is convertible to PetscScalar. Note no minus 1 on I!
512 #if defined(PETSC_USE_COMPLEX)
513   #define ID_FIELD_GET(ID, F, I) ((ID).precision == PETSC_PRECISION_SINGLE ? ((CMUMPS_COMPLEX *)(ID).F)[I].r + PETSC_i * ((CMUMPS_COMPLEX *)(ID).F)[I].i : ((ZMUMPS_COMPLEX *)(ID).F)[I].r + PETSC_i * ((ZMUMPS_COMPLEX *)(ID).F)[I].i)
514 #else
515   #define ID_FIELD_GET(ID, F, I) ((ID).precision == PETSC_PRECISION_SINGLE ? ((float *)(ID).F)[I] : ((double *)(ID).F)[I])
516 #endif
517 
518 // Get a value from MumpsReal arrays. The value is convertible to PetscReal.
519 #define ID_CNTL_GET(ID, I)   ((ID).precision == PETSC_PRECISION_SINGLE ? ((float *)(ID).cntl)[(I) - 1] : ((double *)(ID).cntl)[(I) - 1])
520 #define ID_RINFOG_GET(ID, I) ((ID).precision == PETSC_PRECISION_SINGLE ? ((float *)(ID).rinfog)[(I) - 1] : ((double *)(ID).rinfog)[(I) - 1])
521 #define ID_RINFO_GET(ID, I)  ((ID).precision == PETSC_PRECISION_SINGLE ? ((float *)(ID).rinfo)[(I) - 1] : ((double *)(ID).rinfo)[(I) - 1])
522 
523 // Set the I-th entry of the MumpsReal array id.cntl[] with a PetscReal <VAL>
524 #define ID_CNTL_SET(ID, I, VAL) \
525   do { \
526     if ((ID).precision == PETSC_PRECISION_SINGLE) ((float *)(ID).cntl)[(I) - 1] = (VAL); \
527     else ((double *)(ID).cntl)[(I) - 1] = (VAL); \
528   } while (0)
529 
530 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */
531 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
532   #define PetscMUMPS_c(mumps) \
533     do { \
534       if (mumps->use_petsc_omp_support) { \
535         if (mumps->is_omp_master) { \
536           PetscCall(PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl)); \
537           PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \
538           PetscCall(PetscCallMumps_Private(&mumps->id)); \
539           PetscCall(PetscFPTrapPop()); \
540           PetscCall(PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl)); \
541         } \
542         PetscCall(PetscOmpCtrlBarrier(mumps->omp_ctrl)); \
543         /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific      \
544          to processes, so we only Bcast info[1], an error code and leave others (since they do not have   \
545          an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82.                   \
546          omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \
547       */ \
548         MUMPS_STRUC_C tmp; /* All MUMPS_STRUC_C types have same lengths on these info arrays */ \
549         PetscCallMPI(MPI_Bcast(mumps->id.infog, PETSC_STATIC_ARRAY_LENGTH(tmp.infog), MPIU_MUMPSINT, 0, mumps->omp_comm)); \
550         PetscCallMPI(MPI_Bcast(mumps->id.info, PETSC_STATIC_ARRAY_LENGTH(tmp.info), MPIU_MUMPSINT, 0, mumps->omp_comm)); \
551         PetscCallMPI(MPI_Bcast(mumps->id.rinfog, PETSC_STATIC_ARRAY_LENGTH(tmp.rinfog), MPIU_MUMPSREAL(&mumps->id), 0, mumps->omp_comm)); \
552         PetscCallMPI(MPI_Bcast(mumps->id.rinfo, PETSC_STATIC_ARRAY_LENGTH(tmp.rinfo), MPIU_MUMPSREAL(&mumps->id), 0, mumps->omp_comm)); \
553       } else { \
554         PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \
555         PetscCall(PetscCallMumps_Private(&mumps->id)); \
556         PetscCall(PetscFPTrapPop()); \
557       } \
558     } while (0)
559 #else
560   #define PetscMUMPS_c(mumps) \
561     do { \
562       PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \
563       PetscCall(PetscCallMumps_Private(&mumps->id)); \
564       PetscCall(PetscFPTrapPop()); \
565     } while (0)
566 #endif
567 
568 typedef struct Mat_MUMPS Mat_MUMPS;
569 struct Mat_MUMPS {
570   XMUMPS_STRUC_C id;
571 
572   MatStructure   matstruc;
573   PetscMPIInt    myid, petsc_size;
574   PetscMUMPSInt *irn, *jcn;       /* the (i,j,v) triplets passed to mumps. */
575   PetscScalar   *val, *val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */
576   PetscCount     nnz;             /* number of nonzeros. The type is called selective 64-bit in mumps */
577   PetscMUMPSInt  sym;
578   MPI_Comm       mumps_comm;
579   PetscMUMPSInt *ICNTL_pre;
580   PetscReal     *CNTL_pre;
581   PetscMUMPSInt  ICNTL9_pre;         /* check if ICNTL(9) is changed from previous MatSolve */
582   VecScatter     scat_rhs, scat_sol; /* used by MatSolve() */
583   PetscMUMPSInt  ICNTL20;            /* use centralized (0) or distributed (10) dense RHS */
584   PetscMUMPSInt  ICNTL26;
585   PetscMUMPSInt  lrhs_loc, nloc_rhs, *irhs_loc;
586 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
587   PetscInt    *rhs_nrow, max_nrhs;
588   PetscMPIInt *rhs_recvcounts, *rhs_disps;
589   PetscScalar *rhs_loc, *rhs_recvbuf;
590 #endif
591   Vec            b_seq, x_seq;
592   PetscInt       ninfo, *info; /* which INFO to display */
593   PetscInt       sizeredrhs;
594   PetscScalar   *schur_sol;
595   PetscInt       schur_sizesol;
596   PetscScalar   *redrhs;              // buffer in PetscScalar in case MumpsScalar is in a different precision
597   PetscMUMPSInt *ia_alloc, *ja_alloc; /* work arrays used for the CSR struct for sparse rhs */
598   PetscCount     cur_ilen, cur_jlen;  /* current len of ia_alloc[], ja_alloc[] */
599   PetscErrorCode (*ConvertToTriples)(Mat, PetscInt, MatReuse, Mat_MUMPS *);
600 
601   /* Support for MATNEST */
602   PetscErrorCode (**nest_convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *);
603   PetscCount  *nest_vals_start;
604   PetscScalar *nest_vals;
605 
606   /* stuff used by petsc/mumps OpenMP support*/
607   PetscBool    use_petsc_omp_support;
608   PetscOmpCtrl omp_ctrl;             /* an OpenMP controller that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */
609   MPI_Comm     petsc_comm, omp_comm; /* petsc_comm is PETSc matrix's comm */
610   PetscCount  *recvcount;            /* a collection of nnz on omp_master */
611   PetscMPIInt  tag, omp_comm_size;
612   PetscBool    is_omp_master; /* is this rank the master of omp_comm */
613   MPI_Request *reqs;
614 };
615 
616 /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt.
617    Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices.
618  */
619 static PetscErrorCode PetscMUMPSIntCSRCast(PETSC_UNUSED Mat_MUMPS *mumps, PetscInt nrow, PetscInt *ia, PetscInt *ja, PetscMUMPSInt **ia_mumps, PetscMUMPSInt **ja_mumps, PetscMUMPSInt *nnz_mumps)
620 {
621   PetscInt nnz = ia[nrow] - 1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscCount since mumps only uses PetscMUMPSInt for rhs */
622 
623   PetscFunctionBegin;
624 #if defined(PETSC_USE_64BIT_INDICES)
625   {
626     PetscInt i;
627     if (nrow + 1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */
628       PetscCall(PetscFree(mumps->ia_alloc));
629       PetscCall(PetscMalloc1(nrow + 1, &mumps->ia_alloc));
630       mumps->cur_ilen = nrow + 1;
631     }
632     if (nnz > mumps->cur_jlen) {
633       PetscCall(PetscFree(mumps->ja_alloc));
634       PetscCall(PetscMalloc1(nnz, &mumps->ja_alloc));
635       mumps->cur_jlen = nnz;
636     }
637     for (i = 0; i < nrow + 1; i++) PetscCall(PetscMUMPSIntCast(ia[i], &mumps->ia_alloc[i]));
638     for (i = 0; i < nnz; i++) PetscCall(PetscMUMPSIntCast(ja[i], &mumps->ja_alloc[i]));
639     *ia_mumps = mumps->ia_alloc;
640     *ja_mumps = mumps->ja_alloc;
641   }
642 #else
643   *ia_mumps = ia;
644   *ja_mumps = ja;
645 #endif
646   PetscCall(PetscMUMPSIntCast(nnz, nnz_mumps));
647   PetscFunctionReturn(PETSC_SUCCESS);
648 }
649 
650 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS *mumps)
651 {
652   PetscFunctionBegin;
653   PetscCall(PetscFree(mumps->id.listvar_schur));
654   PetscCall(PetscFree(mumps->redrhs)); // if needed, id.redrhs will be freed in MatMumpsFreeInternalID()
655   PetscCall(PetscFree(mumps->schur_sol));
656   mumps->id.size_schur = 0;
657   mumps->id.schur_lld  = 0;
658   if (mumps->id.internal_id) mumps->id.ICNTL(19) = 0; // sometimes, the inner id is yet built
659   PetscFunctionReturn(PETSC_SUCCESS);
660 }
661 
662 /* solve with rhs in mumps->id.redrhs and return in the same location */
663 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F)
664 {
665   Mat_MUMPS           *mumps = (Mat_MUMPS *)F->data;
666   Mat                  S, B, X; // solve S*X = B; all three matrices are dense
667   MatFactorSchurStatus schurstatus;
668   PetscInt             sizesol;
669   const PetscScalar   *xarray;
670 
671   PetscFunctionBegin;
672   PetscCall(MatFactorFactorizeSchurComplement(F));
673   PetscCall(MatFactorGetSchurComplement(F, &S, &schurstatus));
674   PetscCall(MatMumpsCastMumpsScalarArray(mumps->sizeredrhs, mumps->id.precision, mumps->id.redrhs, mumps->redrhs));
675 
676   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->redrhs, &B));
677   PetscCall(MatSetType(B, ((PetscObject)S)->type_name));
678 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
679   PetscCall(MatBindToCPU(B, S->boundtocpu));
680 #endif
681   switch (schurstatus) {
682   case MAT_FACTOR_SCHUR_FACTORED:
683     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->redrhs, &X));
684     PetscCall(MatSetType(X, ((PetscObject)S)->type_name));
685 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
686     PetscCall(MatBindToCPU(X, S->boundtocpu));
687 #endif
688     if (!mumps->id.ICNTL(9)) { /* transpose solve */
689       PetscCall(MatMatSolveTranspose(S, B, X));
690     } else {
691       PetscCall(MatMatSolve(S, B, X));
692     }
693     break;
694   case MAT_FACTOR_SCHUR_INVERTED:
695     sizesol = mumps->id.nrhs * mumps->id.size_schur;
696     if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) {
697       PetscCall(PetscFree(mumps->schur_sol));
698       PetscCall(PetscMalloc1(sizesol, &mumps->schur_sol));
699       mumps->schur_sizesol = sizesol;
700     }
701     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->schur_sol, &X));
702     PetscCall(MatSetType(X, ((PetscObject)S)->type_name));
703 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
704     PetscCall(MatBindToCPU(X, S->boundtocpu));
705 #endif
706     PetscCall(MatProductCreateWithMat(S, B, NULL, X));
707     if (!mumps->id.ICNTL(9)) { /* transpose solve */
708       PetscCall(MatProductSetType(X, MATPRODUCT_AtB));
709     } else {
710       PetscCall(MatProductSetType(X, MATPRODUCT_AB));
711     }
712     PetscCall(MatProductSetFromOptions(X));
713     PetscCall(MatProductSymbolic(X));
714     PetscCall(MatProductNumeric(X));
715 
716     PetscCall(MatCopy(X, B, SAME_NONZERO_PATTERN));
717     break;
718   default:
719     SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unhandled MatFactorSchurStatus %d", F->schur_status);
720   }
721   // MUST get the array from X (not B), though they share the same host array. We can only guarantee X has the correct data on device.
722   PetscCall(MatDenseGetArrayRead(X, &xarray)); // xarray should be mumps->redrhs, but using MatDenseGetArrayRead is safer with GPUs.
723   PetscCall(MatMumpsCastPetscScalarArray(mumps->sizeredrhs, xarray, mumps->id.precision, mumps->id.redrhs));
724   PetscCall(MatDenseRestoreArrayRead(X, &xarray));
725   PetscCall(MatFactorRestoreSchurComplement(F, &S, schurstatus));
726   PetscCall(MatDestroy(&B));
727   PetscCall(MatDestroy(&X));
728   PetscFunctionReturn(PETSC_SUCCESS);
729 }
730 
731 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion)
732 {
733   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
734 
735   PetscFunctionBegin;
736   if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */
737     PetscFunctionReturn(PETSC_SUCCESS);
738   }
739   if (!expansion) { /* prepare for the condensation step */
740     PetscInt sizeredrhs = mumps->id.nrhs * mumps->id.size_schur;
741     /* allocate MUMPS internal array to store reduced right-hand sides */
742     if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) {
743       mumps->id.lredrhs = mumps->id.size_schur;
744       mumps->sizeredrhs = mumps->id.nrhs * mumps->id.lredrhs;
745       if (mumps->id.redrhs_len) PetscCall(PetscFree(mumps->id.redrhs));
746       PetscCall(PetscFree(mumps->redrhs));
747       PetscCall(PetscMalloc1(mumps->sizeredrhs, &mumps->redrhs));
748       PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_FALSE, mumps->sizeredrhs, mumps->redrhs, mumps->id.precision, &mumps->id.redrhs_len, &mumps->id.redrhs));
749     }
750   } else {                                    /* prepare for the expansion step */
751     PetscCall(MatMumpsSolveSchur_Private(F)); /* solve Schur complement, put solution in id.redrhs (this has to be done by the MUMPS user, so basically us) */
752     mumps->id.ICNTL(26) = 2;                  /* expansion phase */
753     PetscMUMPS_c(mumps);
754     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2));
755     /* restore defaults */
756     mumps->id.ICNTL(26) = -1;
757     /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */
758     if (mumps->id.nrhs > 1) {
759       if (mumps->id.redrhs_len) PetscCall(PetscFree(mumps->id.redrhs));
760       PetscCall(PetscFree(mumps->redrhs));
761       mumps->id.redrhs_len = 0;
762       mumps->id.lredrhs    = 0;
763       mumps->sizeredrhs    = 0;
764     }
765   }
766   PetscFunctionReturn(PETSC_SUCCESS);
767 }
768 
769 /*
770   MatConvertToTriples_A_B - convert PETSc matrix to triples: row[nz], col[nz], val[nz]
771 
772   input:
773     A       - matrix in aij,baij or sbaij format
774     shift   - 0: C style output triple; 1: Fortran style output triple.
775     reuse   - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple
776               MAT_REUSE_MATRIX:   only the values in v array are updated
777   output:
778     nnz     - dim of r, c, and v (number of local nonzero entries of A)
779     r, c, v - row and col index, matrix values (matrix triples)
780 
781   The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is
782   freed with PetscFree(mumps->irn);  This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means
783   that the PetscMalloc() cannot easily be replaced with a PetscMalloc3().
784 
785  */
786 
787 static PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
788 {
789   const PetscScalar *av;
790   const PetscInt    *ai, *aj, *ajj, M = A->rmap->n;
791   PetscCount         nz, rnz, k;
792   PetscMUMPSInt     *row, *col;
793   Mat_SeqAIJ        *aa = (Mat_SeqAIJ *)A->data;
794 
795   PetscFunctionBegin;
796   PetscCall(MatSeqAIJGetArrayRead(A, &av));
797   if (reuse == MAT_INITIAL_MATRIX) {
798     nz = aa->nz;
799     ai = aa->i;
800     aj = aa->j;
801     PetscCall(PetscMalloc2(nz, &row, nz, &col));
802     for (PetscCount i = k = 0; i < M; i++) {
803       rnz = ai[i + 1] - ai[i];
804       ajj = aj + ai[i];
805       for (PetscCount j = 0; j < rnz; j++) {
806         PetscCall(PetscMUMPSIntCast(i + shift, &row[k]));
807         PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[k]));
808         k++;
809       }
810     }
811     mumps->val = (PetscScalar *)av;
812     mumps->irn = row;
813     mumps->jcn = col;
814     mumps->nnz = nz;
815   } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, av, aa->nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqaij_seqaij(), so one needs to copy the memory */
816   else mumps->val = (PetscScalar *)av;                                           /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */
817   PetscCall(MatSeqAIJRestoreArrayRead(A, &av));
818   PetscFunctionReturn(PETSC_SUCCESS);
819 }
820 
821 static PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
822 {
823   PetscCount     nz, i, j, k, r;
824   Mat_SeqSELL   *a = (Mat_SeqSELL *)A->data;
825   PetscMUMPSInt *row, *col;
826 
827   PetscFunctionBegin;
828   nz = a->sliidx[a->totalslices];
829   if (reuse == MAT_INITIAL_MATRIX) {
830     PetscCall(PetscMalloc2(nz, &row, nz, &col));
831     for (i = k = 0; i < a->totalslices; i++) {
832       for (j = a->sliidx[i], r = 0; j < a->sliidx[i + 1]; j++, r = ((r + 1) & 0x07)) PetscCall(PetscMUMPSIntCast(8 * i + r + shift, &row[k++]));
833     }
834     for (i = 0; i < nz; i++) PetscCall(PetscMUMPSIntCast(a->colidx[i] + shift, &col[i]));
835     mumps->irn = row;
836     mumps->jcn = col;
837     mumps->nnz = nz;
838     mumps->val = a->val;
839   } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, a->val, nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqsell_seqaij(), so one needs to copy the memory */
840   else mumps->val = a->val;                                                      /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */
841   PetscFunctionReturn(PETSC_SUCCESS);
842 }
843 
844 static PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
845 {
846   Mat_SeqBAIJ    *aa = (Mat_SeqBAIJ *)A->data;
847   const PetscInt *ai, *aj, *ajj, bs2 = aa->bs2;
848   PetscCount      M, nz = bs2 * aa->nz, idx = 0, rnz, i, j, k, m;
849   PetscInt        bs;
850   PetscMUMPSInt  *row, *col;
851 
852   PetscFunctionBegin;
853   if (reuse == MAT_INITIAL_MATRIX) {
854     PetscCall(MatGetBlockSize(A, &bs));
855     M  = A->rmap->N / bs;
856     ai = aa->i;
857     aj = aa->j;
858     PetscCall(PetscMalloc2(nz, &row, nz, &col));
859     for (i = 0; i < M; i++) {
860       ajj = aj + ai[i];
861       rnz = ai[i + 1] - ai[i];
862       for (k = 0; k < rnz; k++) {
863         for (j = 0; j < bs; j++) {
864           for (m = 0; m < bs; m++) {
865             PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[idx]));
866             PetscCall(PetscMUMPSIntCast(bs * ajj[k] + j + shift, &col[idx]));
867             idx++;
868           }
869         }
870       }
871     }
872     mumps->irn = row;
873     mumps->jcn = col;
874     mumps->nnz = nz;
875     mumps->val = aa->a;
876   } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, aa->a, nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqbaij_seqaij(), so one needs to copy the memory */
877   else mumps->val = aa->a;                                                      /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */
878   PetscFunctionReturn(PETSC_SUCCESS);
879 }
880 
881 static PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
882 {
883   const PetscInt *ai, *aj, *ajj;
884   PetscInt        bs;
885   PetscCount      nz, rnz, i, j, k, m;
886   PetscMUMPSInt  *row, *col;
887   PetscScalar    *val;
888   Mat_SeqSBAIJ   *aa  = (Mat_SeqSBAIJ *)A->data;
889   const PetscInt  bs2 = aa->bs2, mbs = aa->mbs;
890 #if defined(PETSC_USE_COMPLEX)
891   PetscBool isset, hermitian;
892 #endif
893 
894   PetscFunctionBegin;
895 #if defined(PETSC_USE_COMPLEX)
896   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
897   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
898 #endif
899   ai = aa->i;
900   aj = aa->j;
901   PetscCall(MatGetBlockSize(A, &bs));
902   if (reuse == MAT_INITIAL_MATRIX) {
903     const PetscCount alloc_size = aa->nz * bs2;
904 
905     PetscCall(PetscMalloc2(alloc_size, &row, alloc_size, &col));
906     if (bs > 1) {
907       PetscCall(PetscMalloc1(alloc_size, &mumps->val_alloc));
908       mumps->val = mumps->val_alloc;
909     } else {
910       mumps->val = aa->a;
911     }
912     mumps->irn = row;
913     mumps->jcn = col;
914   } else {
915     row = mumps->irn;
916     col = mumps->jcn;
917   }
918   val = mumps->val;
919 
920   nz = 0;
921   if (bs > 1) {
922     for (i = 0; i < mbs; i++) {
923       rnz = ai[i + 1] - ai[i];
924       ajj = aj + ai[i];
925       for (j = 0; j < rnz; j++) {
926         for (k = 0; k < bs; k++) {
927           for (m = 0; m < bs; m++) {
928             if (ajj[j] > i || k >= m) {
929               if (reuse == MAT_INITIAL_MATRIX) {
930                 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[nz]));
931                 PetscCall(PetscMUMPSIntCast(ajj[j] * bs + k + shift, &col[nz]));
932               }
933               val[nz++] = aa->a[(ai[i] + j) * bs2 + m + k * bs];
934             }
935           }
936         }
937       }
938     }
939   } else if (reuse == MAT_INITIAL_MATRIX) {
940     for (i = 0; i < mbs; i++) {
941       rnz = ai[i + 1] - ai[i];
942       ajj = aj + ai[i];
943       for (j = 0; j < rnz; j++) {
944         PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
945         PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz]));
946         nz++;
947       }
948     }
949     PetscCheck(nz == aa->nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different numbers of nonzeros %" PetscCount_FMT " != %" PetscInt_FMT, nz, aa->nz);
950   } else if (mumps->nest_vals)
951     PetscCall(PetscArraycpy(mumps->val, aa->a, aa->nz)); /* bs == 1 and MAT_REUSE_MATRIX, MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqsbaij_seqsbaij(), so one needs to copy the memory */
952   else mumps->val = aa->a;                               /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */
953   if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz;
954   PetscFunctionReturn(PETSC_SUCCESS);
955 }
956 
957 static PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
958 {
959   const PetscInt    *ai, *aj, *ajj, *adiag, M = A->rmap->n;
960   PetscCount         nz, rnz, i, j;
961   const PetscScalar *av, *v1;
962   PetscScalar       *val;
963   PetscMUMPSInt     *row, *col;
964   Mat_SeqAIJ        *aa = (Mat_SeqAIJ *)A->data;
965   PetscBool          diagDense;
966 #if defined(PETSC_USE_COMPLEX)
967   PetscBool hermitian, isset;
968 #endif
969 
970   PetscFunctionBegin;
971 #if defined(PETSC_USE_COMPLEX)
972   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
973   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
974 #endif
975   PetscCall(MatSeqAIJGetArrayRead(A, &av));
976   ai = aa->i;
977   aj = aa->j;
978   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, &diagDense));
979   if (reuse == MAT_INITIAL_MATRIX) {
980     /* count nz in the upper triangular part of A */
981     nz = 0;
982     if (!diagDense) {
983       for (i = 0; i < M; i++) {
984         if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
985           for (j = ai[i]; j < ai[i + 1]; j++) {
986             if (aj[j] < i) continue;
987             nz++;
988           }
989         } else {
990           nz += ai[i + 1] - adiag[i];
991         }
992       }
993     } else {
994       for (i = 0; i < M; i++) nz += ai[i + 1] - adiag[i];
995     }
996     PetscCall(PetscMalloc2(nz, &row, nz, &col));
997     PetscCall(PetscMalloc1(nz, &val));
998     mumps->nnz = nz;
999     mumps->irn = row;
1000     mumps->jcn = col;
1001     mumps->val = mumps->val_alloc = val;
1002 
1003     nz = 0;
1004     if (!diagDense) {
1005       for (i = 0; i < M; i++) {
1006         if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
1007           for (j = ai[i]; j < ai[i + 1]; j++) {
1008             if (aj[j] < i) continue;
1009             PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
1010             PetscCall(PetscMUMPSIntCast(aj[j] + shift, &col[nz]));
1011             val[nz] = av[j];
1012             nz++;
1013           }
1014         } else {
1015           rnz = ai[i + 1] - adiag[i];
1016           ajj = aj + adiag[i];
1017           v1  = av + adiag[i];
1018           for (j = 0; j < rnz; j++) {
1019             PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
1020             PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz]));
1021             val[nz++] = v1[j];
1022           }
1023         }
1024       }
1025     } else {
1026       for (i = 0; i < M; i++) {
1027         rnz = ai[i + 1] - adiag[i];
1028         ajj = aj + adiag[i];
1029         v1  = av + adiag[i];
1030         for (j = 0; j < rnz; j++) {
1031           PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
1032           PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz]));
1033           val[nz++] = v1[j];
1034         }
1035       }
1036     }
1037   } else {
1038     nz  = 0;
1039     val = mumps->val;
1040     if (!diagDense) {
1041       for (i = 0; i < M; i++) {
1042         if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
1043           for (j = ai[i]; j < ai[i + 1]; j++) {
1044             if (aj[j] < i) continue;
1045             val[nz++] = av[j];
1046           }
1047         } else {
1048           rnz = ai[i + 1] - adiag[i];
1049           v1  = av + adiag[i];
1050           for (j = 0; j < rnz; j++) val[nz++] = v1[j];
1051         }
1052       }
1053     } else {
1054       for (i = 0; i < M; i++) {
1055         rnz = ai[i + 1] - adiag[i];
1056         v1  = av + adiag[i];
1057         for (j = 0; j < rnz; j++) val[nz++] = v1[j];
1058       }
1059     }
1060   }
1061   PetscCall(MatSeqAIJRestoreArrayRead(A, &av));
1062   PetscFunctionReturn(PETSC_SUCCESS);
1063 }
1064 
1065 static PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1066 {
1067   const PetscInt    *ai, *aj, *bi, *bj, *garray, *ajj, *bjj;
1068   PetscInt           bs;
1069   PetscCount         rstart, nz, i, j, k, m, jj, irow, countA, countB;
1070   PetscMUMPSInt     *row, *col;
1071   const PetscScalar *av, *bv, *v1, *v2;
1072   PetscScalar       *val;
1073   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ *)A->data;
1074   Mat_SeqSBAIJ      *aa  = (Mat_SeqSBAIJ *)mat->A->data;
1075   Mat_SeqBAIJ       *bb  = (Mat_SeqBAIJ *)mat->B->data;
1076   const PetscInt     bs2 = aa->bs2, mbs = aa->mbs;
1077 #if defined(PETSC_USE_COMPLEX)
1078   PetscBool hermitian, isset;
1079 #endif
1080 
1081   PetscFunctionBegin;
1082 #if defined(PETSC_USE_COMPLEX)
1083   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
1084   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
1085 #endif
1086   PetscCall(MatGetBlockSize(A, &bs));
1087   rstart = A->rmap->rstart;
1088   ai     = aa->i;
1089   aj     = aa->j;
1090   bi     = bb->i;
1091   bj     = bb->j;
1092   av     = aa->a;
1093   bv     = bb->a;
1094 
1095   garray = mat->garray;
1096 
1097   if (reuse == MAT_INITIAL_MATRIX) {
1098     nz = (aa->nz + bb->nz) * bs2; /* just a conservative estimate */
1099     PetscCall(PetscMalloc2(nz, &row, nz, &col));
1100     PetscCall(PetscMalloc1(nz, &val));
1101     /* can not decide the exact mumps->nnz now because of the SBAIJ */
1102     mumps->irn = row;
1103     mumps->jcn = col;
1104     mumps->val = mumps->val_alloc = val;
1105   } else {
1106     val = mumps->val;
1107   }
1108 
1109   jj   = 0;
1110   irow = rstart;
1111   for (i = 0; i < mbs; i++) {
1112     ajj    = aj + ai[i]; /* ptr to the beginning of this row */
1113     countA = ai[i + 1] - ai[i];
1114     countB = bi[i + 1] - bi[i];
1115     bjj    = bj + bi[i];
1116     v1     = av + ai[i] * bs2;
1117     v2     = bv + bi[i] * bs2;
1118 
1119     if (bs > 1) {
1120       /* A-part */
1121       for (j = 0; j < countA; j++) {
1122         for (k = 0; k < bs; k++) {
1123           for (m = 0; m < bs; m++) {
1124             if (rstart + ajj[j] * bs > irow || k >= m) {
1125               if (reuse == MAT_INITIAL_MATRIX) {
1126                 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj]));
1127                 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] * bs + k + shift, &col[jj]));
1128               }
1129               val[jj++] = v1[j * bs2 + m + k * bs];
1130             }
1131           }
1132         }
1133       }
1134 
1135       /* B-part */
1136       for (j = 0; j < countB; j++) {
1137         for (k = 0; k < bs; k++) {
1138           for (m = 0; m < bs; m++) {
1139             if (reuse == MAT_INITIAL_MATRIX) {
1140               PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj]));
1141               PetscCall(PetscMUMPSIntCast(garray[bjj[j]] * bs + k + shift, &col[jj]));
1142             }
1143             val[jj++] = v2[j * bs2 + m + k * bs];
1144           }
1145         }
1146       }
1147     } else {
1148       /* A-part */
1149       for (j = 0; j < countA; j++) {
1150         if (reuse == MAT_INITIAL_MATRIX) {
1151           PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
1152           PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj]));
1153         }
1154         val[jj++] = v1[j];
1155       }
1156 
1157       /* B-part */
1158       for (j = 0; j < countB; j++) {
1159         if (reuse == MAT_INITIAL_MATRIX) {
1160           PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
1161           PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]));
1162         }
1163         val[jj++] = v2[j];
1164       }
1165     }
1166     irow += bs;
1167   }
1168   if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = jj;
1169   PetscFunctionReturn(PETSC_SUCCESS);
1170 }
1171 
1172 static PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1173 {
1174   const PetscInt    *ai, *aj, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj;
1175   PetscCount         rstart, cstart, nz, i, j, jj, irow, countA, countB;
1176   PetscMUMPSInt     *row, *col;
1177   const PetscScalar *av, *bv, *v1, *v2;
1178   PetscScalar       *val;
1179   Mat                Ad, Ao;
1180   Mat_SeqAIJ        *aa;
1181   Mat_SeqAIJ        *bb;
1182 
1183   PetscFunctionBegin;
1184   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray));
1185   PetscCall(MatSeqAIJGetArrayRead(Ad, &av));
1186   PetscCall(MatSeqAIJGetArrayRead(Ao, &bv));
1187 
1188   aa = (Mat_SeqAIJ *)Ad->data;
1189   bb = (Mat_SeqAIJ *)Ao->data;
1190   ai = aa->i;
1191   aj = aa->j;
1192   bi = bb->i;
1193   bj = bb->j;
1194 
1195   rstart = A->rmap->rstart;
1196   cstart = A->cmap->rstart;
1197 
1198   if (reuse == MAT_INITIAL_MATRIX) {
1199     nz = (PetscCount)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */
1200     PetscCall(PetscMalloc2(nz, &row, nz, &col));
1201     PetscCall(PetscMalloc1(nz, &val));
1202     mumps->nnz = nz;
1203     mumps->irn = row;
1204     mumps->jcn = col;
1205     mumps->val = mumps->val_alloc = val;
1206   } else {
1207     val = mumps->val;
1208   }
1209 
1210   jj   = 0;
1211   irow = rstart;
1212   for (i = 0; i < m; i++) {
1213     ajj    = aj + ai[i]; /* ptr to the beginning of this row */
1214     countA = ai[i + 1] - ai[i];
1215     countB = bi[i + 1] - bi[i];
1216     bjj    = bj + bi[i];
1217     v1     = av + ai[i];
1218     v2     = bv + bi[i];
1219 
1220     /* A-part */
1221     for (j = 0; j < countA; j++) {
1222       if (reuse == MAT_INITIAL_MATRIX) {
1223         PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
1224         PetscCall(PetscMUMPSIntCast(cstart + ajj[j] + shift, &col[jj]));
1225       }
1226       val[jj++] = v1[j];
1227     }
1228 
1229     /* B-part */
1230     for (j = 0; j < countB; j++) {
1231       if (reuse == MAT_INITIAL_MATRIX) {
1232         PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
1233         PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]));
1234       }
1235       val[jj++] = v2[j];
1236     }
1237     irow++;
1238   }
1239   PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av));
1240   PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv));
1241   PetscFunctionReturn(PETSC_SUCCESS);
1242 }
1243 
1244 static PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1245 {
1246   Mat_MPIBAIJ       *mat = (Mat_MPIBAIJ *)A->data;
1247   Mat_SeqBAIJ       *aa  = (Mat_SeqBAIJ *)mat->A->data;
1248   Mat_SeqBAIJ       *bb  = (Mat_SeqBAIJ *)mat->B->data;
1249   const PetscInt    *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j, *ajj, *bjj;
1250   const PetscInt    *garray = mat->garray, mbs = mat->mbs, rstart = A->rmap->rstart, cstart = A->cmap->rstart;
1251   const PetscInt     bs2 = mat->bs2;
1252   PetscInt           bs;
1253   PetscCount         nz, i, j, k, n, jj, irow, countA, countB, idx;
1254   PetscMUMPSInt     *row, *col;
1255   const PetscScalar *av = aa->a, *bv = bb->a, *v1, *v2;
1256   PetscScalar       *val;
1257 
1258   PetscFunctionBegin;
1259   PetscCall(MatGetBlockSize(A, &bs));
1260   if (reuse == MAT_INITIAL_MATRIX) {
1261     nz = bs2 * (aa->nz + bb->nz);
1262     PetscCall(PetscMalloc2(nz, &row, nz, &col));
1263     PetscCall(PetscMalloc1(nz, &val));
1264     mumps->nnz = nz;
1265     mumps->irn = row;
1266     mumps->jcn = col;
1267     mumps->val = mumps->val_alloc = val;
1268   } else {
1269     val = mumps->val;
1270   }
1271 
1272   jj   = 0;
1273   irow = rstart;
1274   for (i = 0; i < mbs; i++) {
1275     countA = ai[i + 1] - ai[i];
1276     countB = bi[i + 1] - bi[i];
1277     ajj    = aj + ai[i];
1278     bjj    = bj + bi[i];
1279     v1     = av + bs2 * ai[i];
1280     v2     = bv + bs2 * bi[i];
1281 
1282     idx = 0;
1283     /* A-part */
1284     for (k = 0; k < countA; k++) {
1285       for (j = 0; j < bs; j++) {
1286         for (n = 0; n < bs; n++) {
1287           if (reuse == MAT_INITIAL_MATRIX) {
1288             PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj]));
1289             PetscCall(PetscMUMPSIntCast(cstart + bs * ajj[k] + j + shift, &col[jj]));
1290           }
1291           val[jj++] = v1[idx++];
1292         }
1293       }
1294     }
1295 
1296     idx = 0;
1297     /* B-part */
1298     for (k = 0; k < countB; k++) {
1299       for (j = 0; j < bs; j++) {
1300         for (n = 0; n < bs; n++) {
1301           if (reuse == MAT_INITIAL_MATRIX) {
1302             PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj]));
1303             PetscCall(PetscMUMPSIntCast(bs * garray[bjj[k]] + j + shift, &col[jj]));
1304           }
1305           val[jj++] = v2[idx++];
1306         }
1307       }
1308     }
1309     irow += bs;
1310   }
1311   PetscFunctionReturn(PETSC_SUCCESS);
1312 }
1313 
1314 static PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1315 {
1316   const PetscInt    *ai, *aj, *adiag, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj;
1317   PetscCount         rstart, nz, nza, nzb, i, j, jj, irow, countA, countB;
1318   PetscMUMPSInt     *row, *col;
1319   const PetscScalar *av, *bv, *v1, *v2;
1320   PetscScalar       *val;
1321   Mat                Ad, Ao;
1322   Mat_SeqAIJ        *aa;
1323   Mat_SeqAIJ        *bb;
1324 #if defined(PETSC_USE_COMPLEX)
1325   PetscBool hermitian, isset;
1326 #endif
1327 
1328   PetscFunctionBegin;
1329 #if defined(PETSC_USE_COMPLEX)
1330   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
1331   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
1332 #endif
1333   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray));
1334   PetscCall(MatSeqAIJGetArrayRead(Ad, &av));
1335   PetscCall(MatSeqAIJGetArrayRead(Ao, &bv));
1336 
1337   aa = (Mat_SeqAIJ *)Ad->data;
1338   bb = (Mat_SeqAIJ *)Ao->data;
1339   ai = aa->i;
1340   aj = aa->j;
1341   bi = bb->i;
1342   bj = bb->j;
1343   PetscCall(MatGetDiagonalMarkers_SeqAIJ(Ad, &adiag, NULL));
1344   rstart = A->rmap->rstart;
1345 
1346   if (reuse == MAT_INITIAL_MATRIX) {
1347     nza = 0; /* num of upper triangular entries in mat->A, including diagonals */
1348     nzb = 0; /* num of upper triangular entries in mat->B */
1349     for (i = 0; i < m; i++) {
1350       nza += (ai[i + 1] - adiag[i]);
1351       countB = bi[i + 1] - bi[i];
1352       bjj    = bj + bi[i];
1353       for (j = 0; j < countB; j++) {
1354         if (garray[bjj[j]] > rstart) nzb++;
1355       }
1356     }
1357 
1358     nz = nza + nzb; /* total nz of upper triangular part of mat */
1359     PetscCall(PetscMalloc2(nz, &row, nz, &col));
1360     PetscCall(PetscMalloc1(nz, &val));
1361     mumps->nnz = nz;
1362     mumps->irn = row;
1363     mumps->jcn = col;
1364     mumps->val = mumps->val_alloc = val;
1365   } else {
1366     val = mumps->val;
1367   }
1368 
1369   jj   = 0;
1370   irow = rstart;
1371   for (i = 0; i < m; i++) {
1372     ajj    = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */
1373     v1     = av + adiag[i];
1374     countA = ai[i + 1] - adiag[i];
1375     countB = bi[i + 1] - bi[i];
1376     bjj    = bj + bi[i];
1377     v2     = bv + bi[i];
1378 
1379     /* A-part */
1380     for (j = 0; j < countA; j++) {
1381       if (reuse == MAT_INITIAL_MATRIX) {
1382         PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
1383         PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj]));
1384       }
1385       val[jj++] = v1[j];
1386     }
1387 
1388     /* B-part */
1389     for (j = 0; j < countB; j++) {
1390       if (garray[bjj[j]] > rstart) {
1391         if (reuse == MAT_INITIAL_MATRIX) {
1392           PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
1393           PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]));
1394         }
1395         val[jj++] = v2[j];
1396       }
1397     }
1398     irow++;
1399   }
1400   PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av));
1401   PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv));
1402   PetscFunctionReturn(PETSC_SUCCESS);
1403 }
1404 
1405 static PetscErrorCode MatConvertToTriples_diagonal_xaij(Mat A, PETSC_UNUSED PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1406 {
1407   const PetscScalar *av;
1408   const PetscInt     M = A->rmap->n;
1409   PetscCount         i;
1410   PetscMUMPSInt     *row, *col;
1411   Vec                v;
1412 
1413   PetscFunctionBegin;
1414   PetscCall(MatDiagonalGetDiagonal(A, &v));
1415   PetscCall(VecGetArrayRead(v, &av));
1416   if (reuse == MAT_INITIAL_MATRIX) {
1417     PetscCall(PetscMalloc2(M, &row, M, &col));
1418     for (i = 0; i < M; i++) {
1419       PetscCall(PetscMUMPSIntCast(i + A->rmap->rstart, &row[i]));
1420       col[i] = row[i];
1421     }
1422     mumps->val = (PetscScalar *)av;
1423     mumps->irn = row;
1424     mumps->jcn = col;
1425     mumps->nnz = M;
1426   } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, av, M)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_diagonal_xaij(), so one needs to copy the memory */
1427   else mumps->val = (PetscScalar *)av;                                      /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */
1428   PetscCall(VecRestoreArrayRead(v, &av));
1429   PetscFunctionReturn(PETSC_SUCCESS);
1430 }
1431 
1432 static PetscErrorCode MatConvertToTriples_dense_xaij(Mat A, PETSC_UNUSED PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1433 {
1434   PetscScalar   *v;
1435   const PetscInt m = A->rmap->n, N = A->cmap->N;
1436   PetscInt       lda;
1437   PetscCount     i, j;
1438   PetscMUMPSInt *row, *col;
1439 
1440   PetscFunctionBegin;
1441   PetscCall(MatDenseGetArray(A, &v));
1442   PetscCall(MatDenseGetLDA(A, &lda));
1443   if (reuse == MAT_INITIAL_MATRIX) {
1444     PetscCall(PetscMalloc2(m * N, &row, m * N, &col));
1445     for (i = 0; i < m; i++) {
1446       col[i] = 0;
1447       PetscCall(PetscMUMPSIntCast(i + A->rmap->rstart, &row[i]));
1448     }
1449     for (j = 1; j < N; j++) {
1450       for (i = 0; i < m; i++) PetscCall(PetscMUMPSIntCast(j, col + i + m * j));
1451       PetscCall(PetscArraycpy(row + m * j, row + m * (j - 1), m));
1452     }
1453     if (lda == m) mumps->val = v;
1454     else {
1455       PetscCall(PetscMalloc1(m * N, &mumps->val));
1456       mumps->val_alloc = mumps->val;
1457       for (j = 0; j < N; j++) PetscCall(PetscArraycpy(mumps->val + m * j, v + lda * j, m));
1458     }
1459     mumps->irn = row;
1460     mumps->jcn = col;
1461     mumps->nnz = m * N;
1462   } else {
1463     if (lda == m && !mumps->nest_vals) mumps->val = v;
1464     else {
1465       for (j = 0; j < N; j++) PetscCall(PetscArraycpy(mumps->val + m * j, v + lda * j, m));
1466     }
1467   }
1468   PetscCall(MatDenseRestoreArray(A, &v));
1469   PetscFunctionReturn(PETSC_SUCCESS);
1470 }
1471 
1472 // If the input Mat (sub) is either MATTRANSPOSEVIRTUAL or MATHERMITIANTRANSPOSEVIRTUAL, this function gets the parent Mat until it is not a
1473 // MATTRANSPOSEVIRTUAL or MATHERMITIANTRANSPOSEVIRTUAL itself and returns the appropriate shift, scaling, and whether the parent Mat should be conjugated
1474 // and its rows and columns permuted
1475 // TODO FIXME: this should not be in this file and should instead be refactored where the same logic applies, e.g., MatAXPY_Dense_Nest()
1476 static PetscErrorCode MatGetTranspose_TransposeVirtual(Mat *sub, PetscBool *conjugate, PetscScalar *vshift, PetscScalar *vscale, PetscBool *swap)
1477 {
1478   Mat         A;
1479   PetscScalar s[2];
1480   PetscBool   isTrans, isHTrans, compare;
1481 
1482   PetscFunctionBegin;
1483   do {
1484     PetscCall(PetscObjectTypeCompare((PetscObject)*sub, MATTRANSPOSEVIRTUAL, &isTrans));
1485     if (isTrans) {
1486       PetscCall(MatTransposeGetMat(*sub, &A));
1487       isHTrans = PETSC_FALSE;
1488     } else {
1489       PetscCall(PetscObjectTypeCompare((PetscObject)*sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans));
1490       if (isHTrans) PetscCall(MatHermitianTransposeGetMat(*sub, &A));
1491     }
1492     compare = (PetscBool)(isTrans || isHTrans);
1493     if (compare) {
1494       if (vshift && vscale) {
1495         PetscCall(MatShellGetScalingShifts(*sub, s, s + 1, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
1496         if (!*conjugate) {
1497           *vshift += s[0] * *vscale;
1498           *vscale *= s[1];
1499         } else {
1500           *vshift += PetscConj(s[0]) * *vscale;
1501           *vscale *= PetscConj(s[1]);
1502         }
1503       }
1504       if (swap) *swap = (PetscBool)!*swap;
1505       if (isHTrans && conjugate) *conjugate = (PetscBool)!*conjugate;
1506       *sub = A;
1507     }
1508   } while (compare);
1509   PetscFunctionReturn(PETSC_SUCCESS);
1510 }
1511 
1512 static PetscErrorCode MatConvertToTriples_nest_xaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1513 {
1514   Mat     **mats;
1515   PetscInt  nr, nc;
1516   PetscBool chol = mumps->sym ? PETSC_TRUE : PETSC_FALSE;
1517 
1518   PetscFunctionBegin;
1519   PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats));
1520   if (reuse == MAT_INITIAL_MATRIX) {
1521     PetscMUMPSInt *irns, *jcns;
1522     PetscScalar   *vals;
1523     PetscCount     totnnz, cumnnz, maxnnz;
1524     PetscInt      *pjcns_w, Mbs = 0;
1525     IS            *rows, *cols;
1526     PetscInt     **rows_idx, **cols_idx;
1527 
1528     cumnnz = 0;
1529     maxnnz = 0;
1530     PetscCall(PetscMalloc2(nr * nc + 1, &mumps->nest_vals_start, nr * nc, &mumps->nest_convert_to_triples));
1531     for (PetscInt r = 0; r < nr; r++) {
1532       for (PetscInt c = 0; c < nc; c++) {
1533         Mat sub = mats[r][c];
1534 
1535         mumps->nest_convert_to_triples[r * nc + c] = NULL;
1536         if (chol && c < r) continue; /* skip lower-triangular block for Cholesky */
1537         if (sub) {
1538           PetscErrorCode (*convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *) = NULL;
1539           PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isDiag, isDense;
1540           MatInfo   info;
1541 
1542           PetscCall(MatGetTranspose_TransposeVirtual(&sub, NULL, NULL, NULL, NULL));
1543           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ));
1544           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ));
1545           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ));
1546           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ));
1547           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ));
1548           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ));
1549           PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATDIAGONAL, &isDiag));
1550           PetscCall(PetscObjectTypeCompareAny((PetscObject)sub, &isDense, MATSEQDENSE, MATMPIDENSE, NULL));
1551 
1552           if (chol) {
1553             if (r == c) {
1554               if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqsbaij;
1555               else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpisbaij;
1556               else if (isSeqSBAIJ) convert_to_triples = MatConvertToTriples_seqsbaij_seqsbaij;
1557               else if (isMPISBAIJ) convert_to_triples = MatConvertToTriples_mpisbaij_mpisbaij;
1558               else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij;
1559               else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij;
1560             } else {
1561               if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij;
1562               else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij;
1563               else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij;
1564               else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij;
1565               else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij;
1566               else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij;
1567             }
1568           } else {
1569             if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij;
1570             else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij;
1571             else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij;
1572             else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij;
1573             else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij;
1574             else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij;
1575           }
1576           PetscCheck(convert_to_triples, PetscObjectComm((PetscObject)sub), PETSC_ERR_SUP, "Not for block of type %s", ((PetscObject)sub)->type_name);
1577           mumps->nest_convert_to_triples[r * nc + c] = convert_to_triples;
1578           PetscCall(MatGetInfo(sub, MAT_LOCAL, &info));
1579           cumnnz += (PetscCount)info.nz_used; /* can be overestimated for Cholesky */
1580           maxnnz = PetscMax(maxnnz, info.nz_used);
1581         }
1582       }
1583     }
1584 
1585     /* Allocate total COO */
1586     totnnz = cumnnz;
1587     PetscCall(PetscMalloc2(totnnz, &irns, totnnz, &jcns));
1588     PetscCall(PetscMalloc1(totnnz, &vals));
1589 
1590     /* Handle rows and column maps
1591        We directly map rows and use an SF for the columns */
1592     PetscCall(PetscMalloc4(nr, &rows, nc, &cols, nr, &rows_idx, nc, &cols_idx));
1593     PetscCall(MatNestGetISs(A, rows, cols));
1594     for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetIndices(rows[r], (const PetscInt **)&rows_idx[r]));
1595     for (PetscInt c = 0; c < nc; c++) PetscCall(ISGetIndices(cols[c], (const PetscInt **)&cols_idx[c]));
1596     if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscMalloc1(maxnnz, &pjcns_w));
1597     else (void)maxnnz;
1598 
1599     cumnnz = 0;
1600     for (PetscInt r = 0; r < nr; r++) {
1601       for (PetscInt c = 0; c < nc; c++) {
1602         Mat             sub    = mats[r][c];
1603         const PetscInt *ridx   = rows_idx[r];
1604         const PetscInt *cidx   = cols_idx[c];
1605         PetscScalar     vscale = 1.0, vshift = 0.0;
1606         PetscInt        rst, size, bs;
1607         PetscSF         csf;
1608         PetscBool       conjugate = PETSC_FALSE, swap = PETSC_FALSE;
1609         PetscLayout     cmap;
1610         PetscInt        innz;
1611 
1612         mumps->nest_vals_start[r * nc + c] = cumnnz;
1613         if (c == r) {
1614           PetscCall(ISGetSize(rows[r], &size));
1615           if (!mumps->nest_convert_to_triples[r * nc + c]) {
1616             for (PetscInt c = 0; c < nc && !sub; ++c) sub = mats[r][c]; // diagonal Mat is NULL, so start over from the beginning of the current row
1617           }
1618           PetscCall(MatGetBlockSize(sub, &bs));
1619           Mbs += size / bs;
1620         }
1621         if (!mumps->nest_convert_to_triples[r * nc + c]) continue;
1622 
1623         /* Extract inner blocks if needed */
1624         PetscCall(MatGetTranspose_TransposeVirtual(&sub, &conjugate, &vshift, &vscale, &swap));
1625         PetscCheck(vshift == 0.0, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Nonzero shift in parent MatShell");
1626 
1627         /* Get column layout to map off-process columns */
1628         PetscCall(MatGetLayouts(sub, NULL, &cmap));
1629 
1630         /* Get row start to map on-process rows */
1631         PetscCall(MatGetOwnershipRange(sub, &rst, NULL));
1632 
1633         /* Directly use the mumps datastructure and use C ordering for now */
1634         PetscCall((*mumps->nest_convert_to_triples[r * nc + c])(sub, 0, MAT_INITIAL_MATRIX, mumps));
1635 
1636         /* Swap the role of rows and columns indices for transposed blocks
1637            since we need values with global final ordering */
1638         if (swap) {
1639           cidx = rows_idx[r];
1640           ridx = cols_idx[c];
1641         }
1642 
1643         /* Communicate column indices
1644            This could have been done with a single SF but it would have complicated the code a lot.
1645            But since we do it only once, we pay the price of setting up an SF for each block */
1646         if (PetscDefined(USE_64BIT_INDICES)) {
1647           for (PetscInt k = 0; k < mumps->nnz; k++) pjcns_w[k] = mumps->jcn[k];
1648         } else pjcns_w = (PetscInt *)mumps->jcn; /* This cast is needed only to silence warnings for 64bit integers builds */
1649         PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &csf));
1650         PetscCall(PetscIntCast(mumps->nnz, &innz));
1651         PetscCall(PetscSFSetGraphLayout(csf, cmap, innz, NULL, PETSC_OWN_POINTER, pjcns_w));
1652         PetscCall(PetscSFBcastBegin(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE));
1653         PetscCall(PetscSFBcastEnd(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE));
1654         PetscCall(PetscSFDestroy(&csf));
1655 
1656         /* Import indices: use direct map for rows and mapped indices for columns */
1657         if (swap) {
1658           for (PetscInt k = 0; k < mumps->nnz; k++) {
1659             PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &jcns[cumnnz + k]));
1660             PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &irns[cumnnz + k]));
1661           }
1662         } else {
1663           for (PetscInt k = 0; k < mumps->nnz; k++) {
1664             PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &irns[cumnnz + k]));
1665             PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &jcns[cumnnz + k]));
1666           }
1667         }
1668 
1669         /* Import values to full COO */
1670         if (conjugate) { /* conjugate the entries */
1671           PetscScalar *v = vals + cumnnz;
1672           for (PetscInt k = 0; k < mumps->nnz; k++) v[k] = vscale * PetscConj(mumps->val[k]);
1673         } else if (vscale != 1.0) {
1674           PetscScalar *v = vals + cumnnz;
1675           for (PetscInt k = 0; k < mumps->nnz; k++) v[k] = vscale * mumps->val[k];
1676         } else PetscCall(PetscArraycpy(vals + cumnnz, mumps->val, mumps->nnz));
1677 
1678         /* Shift new starting point and sanity check */
1679         cumnnz += mumps->nnz;
1680         PetscCheck(cumnnz <= totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected number of nonzeros %" PetscCount_FMT " != %" PetscCount_FMT, cumnnz, totnnz);
1681 
1682         /* Free scratch memory */
1683         PetscCall(PetscFree2(mumps->irn, mumps->jcn));
1684         PetscCall(PetscFree(mumps->val_alloc));
1685         mumps->val = NULL;
1686         mumps->nnz = 0;
1687       }
1688     }
1689     if (mumps->id.ICNTL(15) == 1) {
1690       if (Mbs != A->rmap->N) {
1691         PetscMPIInt rank, size;
1692 
1693         PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
1694         PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
1695         if (rank == 0) {
1696           PetscInt shift = 0;
1697 
1698           PetscCall(PetscMUMPSIntCast(Mbs, &mumps->id.nblk));
1699           PetscCall(PetscFree(mumps->id.blkptr));
1700           PetscCall(PetscMalloc1(Mbs + 1, &mumps->id.blkptr));
1701           mumps->id.blkptr[0] = 1;
1702           for (PetscInt i = 0; i < size; ++i) {
1703             for (PetscInt r = 0; r < nr; r++) {
1704               Mat             sub = mats[r][r];
1705               const PetscInt *ranges;
1706               PetscInt        bs;
1707 
1708               for (PetscInt c = 0; c < nc && !sub; ++c) sub = mats[r][c]; // diagonal Mat is NULL, so start over from the beginning of the current row
1709               PetscCall(MatGetOwnershipRanges(sub, &ranges));
1710               PetscCall(MatGetBlockSize(sub, &bs));
1711               for (PetscInt j = 0, start = mumps->id.blkptr[shift] + bs; j < ranges[i + 1] - ranges[i]; j += bs) PetscCall(PetscMUMPSIntCast(start + j, mumps->id.blkptr + shift + j / bs + 1));
1712               shift += (ranges[i + 1] - ranges[i]) / bs;
1713             }
1714           }
1715         }
1716       } else mumps->id.ICNTL(15) = 0;
1717     }
1718     if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscFree(pjcns_w));
1719     for (PetscInt r = 0; r < nr; r++) PetscCall(ISRestoreIndices(rows[r], (const PetscInt **)&rows_idx[r]));
1720     for (PetscInt c = 0; c < nc; c++) PetscCall(ISRestoreIndices(cols[c], (const PetscInt **)&cols_idx[c]));
1721     PetscCall(PetscFree4(rows, cols, rows_idx, cols_idx));
1722     if (!chol) PetscCheck(cumnnz == totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different number of nonzeros %" PetscCount_FMT " != %" PetscCount_FMT, cumnnz, totnnz);
1723     mumps->nest_vals_start[nr * nc] = cumnnz;
1724 
1725     /* Set pointers for final MUMPS data structure */
1726     mumps->nest_vals = vals;
1727     mumps->val_alloc = NULL; /* do not use val_alloc since it may be reallocated with the OMP callpath */
1728     mumps->val       = vals;
1729     mumps->irn       = irns;
1730     mumps->jcn       = jcns;
1731     mumps->nnz       = cumnnz;
1732   } else {
1733     PetscScalar *oval = mumps->nest_vals;
1734     for (PetscInt r = 0; r < nr; r++) {
1735       for (PetscInt c = 0; c < nc; c++) {
1736         PetscBool   conjugate = PETSC_FALSE;
1737         Mat         sub       = mats[r][c];
1738         PetscScalar vscale = 1.0, vshift = 0.0;
1739         PetscInt    midx = r * nc + c;
1740 
1741         if (!mumps->nest_convert_to_triples[midx]) continue;
1742         PetscCall(MatGetTranspose_TransposeVirtual(&sub, &conjugate, &vshift, &vscale, NULL));
1743         PetscCheck(vshift == 0.0, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Nonzero shift in parent MatShell");
1744         mumps->val = oval + mumps->nest_vals_start[midx];
1745         PetscCall((*mumps->nest_convert_to_triples[midx])(sub, shift, MAT_REUSE_MATRIX, mumps));
1746         if (conjugate) {
1747           PetscCount nnz = mumps->nest_vals_start[midx + 1] - mumps->nest_vals_start[midx];
1748           for (PetscCount k = 0; k < nnz; k++) mumps->val[k] = vscale * PetscConj(mumps->val[k]);
1749         } else if (vscale != 1.0) {
1750           PetscCount nnz = mumps->nest_vals_start[midx + 1] - mumps->nest_vals_start[midx];
1751           for (PetscCount k = 0; k < nnz; k++) mumps->val[k] *= vscale;
1752         }
1753       }
1754     }
1755     mumps->val = oval;
1756   }
1757   PetscFunctionReturn(PETSC_SUCCESS);
1758 }
1759 
1760 static PetscErrorCode MatDestroy_MUMPS(Mat A)
1761 {
1762   Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;
1763 
1764   PetscFunctionBegin;
1765   PetscCall(PetscFree(mumps->id.isol_loc));
1766   PetscCall(VecScatterDestroy(&mumps->scat_rhs));
1767   PetscCall(VecScatterDestroy(&mumps->scat_sol));
1768   PetscCall(VecDestroy(&mumps->b_seq));
1769   PetscCall(VecDestroy(&mumps->x_seq));
1770   PetscCall(PetscFree(mumps->id.perm_in));
1771   PetscCall(PetscFree(mumps->id.blkvar));
1772   PetscCall(PetscFree(mumps->id.blkptr));
1773   PetscCall(PetscFree2(mumps->irn, mumps->jcn));
1774   PetscCall(PetscFree(mumps->val_alloc));
1775   PetscCall(PetscFree(mumps->info));
1776   PetscCall(PetscFree(mumps->ICNTL_pre));
1777   PetscCall(PetscFree(mumps->CNTL_pre));
1778   PetscCall(MatMumpsResetSchur_Private(mumps));
1779   if (mumps->id.job != JOB_NULL) { /* cannot call PetscMUMPS_c() if JOB_INIT has never been called for this instance */
1780     mumps->id.job = JOB_END;
1781     PetscMUMPS_c(mumps);
1782     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in termination: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
1783     if (mumps->mumps_comm != MPI_COMM_NULL) {
1784       if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) PetscCallMPI(MPI_Comm_free(&mumps->mumps_comm));
1785       else PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A), &mumps->mumps_comm));
1786     }
1787   }
1788   PetscCall(MatMumpsFreeInternalID(&mumps->id));
1789 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1790   if (mumps->use_petsc_omp_support) {
1791     PetscCall(PetscOmpCtrlDestroy(&mumps->omp_ctrl));
1792     PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf));
1793     PetscCall(PetscFree3(mumps->rhs_nrow, mumps->rhs_recvcounts, mumps->rhs_disps));
1794   }
1795 #endif
1796   PetscCall(PetscFree(mumps->ia_alloc));
1797   PetscCall(PetscFree(mumps->ja_alloc));
1798   PetscCall(PetscFree(mumps->recvcount));
1799   PetscCall(PetscFree(mumps->reqs));
1800   PetscCall(PetscFree(mumps->irhs_loc));
1801   PetscCall(PetscFree2(mumps->nest_vals_start, mumps->nest_convert_to_triples));
1802   PetscCall(PetscFree(mumps->nest_vals));
1803   PetscCall(PetscFree(A->data));
1804 
1805   /* clear composed functions */
1806   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
1807   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorSetSchurIS_C", NULL));
1808   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorCreateSchurComplement_C", NULL));
1809   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetIcntl_C", NULL));
1810   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetIcntl_C", NULL));
1811   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetCntl_C", NULL));
1812   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetCntl_C", NULL));
1813   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfo_C", NULL));
1814   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfog_C", NULL));
1815   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfo_C", NULL));
1816   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfog_C", NULL));
1817   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetNullPivots_C", NULL));
1818   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverse_C", NULL));
1819   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverseTranspose_C", NULL));
1820   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetBlk_C", NULL));
1821   PetscFunctionReturn(PETSC_SUCCESS);
1822 }
1823 
1824 /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */
1825 static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A, PetscInt nrhs, const PetscScalar *array)
1826 {
1827   Mat_MUMPS        *mumps   = (Mat_MUMPS *)A->data;
1828   const PetscMPIInt ompsize = mumps->omp_comm_size;
1829   PetscInt          i, m, M, rstart;
1830 
1831   PetscFunctionBegin;
1832   PetscCall(MatGetSize(A, &M, NULL));
1833   PetscCall(MatGetLocalSize(A, &m, NULL));
1834   PetscCheck(M <= PETSC_MUMPS_INT_MAX, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt");
1835   if (ompsize == 1) {
1836     if (!mumps->irhs_loc) {
1837       mumps->nloc_rhs = (PetscMUMPSInt)m;
1838       PetscCall(PetscMalloc1(m, &mumps->irhs_loc));
1839       PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
1840       for (i = 0; i < m; i++) PetscCall(PetscMUMPSIntCast(rstart + i + 1, &mumps->irhs_loc[i])); /* use 1-based indices */
1841     }
1842     PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, m * nrhs, array, mumps->id.precision, &mumps->id.rhs_loc_len, &mumps->id.rhs_loc));
1843   } else {
1844 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1845     const PetscInt *ranges;
1846     PetscMPIInt     j, k, sendcount, *petsc_ranks, *omp_ranks;
1847     MPI_Group       petsc_group, omp_group;
1848     PetscScalar    *recvbuf = NULL;
1849 
1850     if (mumps->is_omp_master) {
1851       /* Lazily initialize the omp stuff for distributed rhs */
1852       if (!mumps->irhs_loc) {
1853         PetscCall(PetscMalloc2(ompsize, &omp_ranks, ompsize, &petsc_ranks));
1854         PetscCall(PetscMalloc3(ompsize, &mumps->rhs_nrow, ompsize, &mumps->rhs_recvcounts, ompsize, &mumps->rhs_disps));
1855         PetscCallMPI(MPI_Comm_group(mumps->petsc_comm, &petsc_group));
1856         PetscCallMPI(MPI_Comm_group(mumps->omp_comm, &omp_group));
1857         for (j = 0; j < ompsize; j++) omp_ranks[j] = j;
1858         PetscCallMPI(MPI_Group_translate_ranks(omp_group, ompsize, omp_ranks, petsc_group, petsc_ranks));
1859 
1860         /* Populate mumps->irhs_loc[], rhs_nrow[] */
1861         mumps->nloc_rhs = 0;
1862         PetscCall(MatGetOwnershipRanges(A, &ranges));
1863         for (j = 0; j < ompsize; j++) {
1864           mumps->rhs_nrow[j] = ranges[petsc_ranks[j] + 1] - ranges[petsc_ranks[j]];
1865           mumps->nloc_rhs += mumps->rhs_nrow[j];
1866         }
1867         PetscCall(PetscMalloc1(mumps->nloc_rhs, &mumps->irhs_loc));
1868         for (j = k = 0; j < ompsize; j++) {
1869           for (i = ranges[petsc_ranks[j]]; i < ranges[petsc_ranks[j] + 1]; i++, k++) PetscCall(PetscMUMPSIntCast(i + 1, &mumps->irhs_loc[k])); /* uses 1-based indices */
1870         }
1871 
1872         PetscCall(PetscFree2(omp_ranks, petsc_ranks));
1873         PetscCallMPI(MPI_Group_free(&petsc_group));
1874         PetscCallMPI(MPI_Group_free(&omp_group));
1875       }
1876 
1877       /* Realloc buffers when current nrhs is bigger than what we have met */
1878       if (nrhs > mumps->max_nrhs) {
1879         PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf));
1880         PetscCall(PetscMalloc2(mumps->nloc_rhs * nrhs, &mumps->rhs_loc, mumps->nloc_rhs * nrhs, &mumps->rhs_recvbuf));
1881         mumps->max_nrhs = nrhs;
1882       }
1883 
1884       /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */
1885       for (j = 0; j < ompsize; j++) PetscCall(PetscMPIIntCast(mumps->rhs_nrow[j] * nrhs, &mumps->rhs_recvcounts[j]));
1886       mumps->rhs_disps[0] = 0;
1887       for (j = 1; j < ompsize; j++) {
1888         mumps->rhs_disps[j] = mumps->rhs_disps[j - 1] + mumps->rhs_recvcounts[j - 1];
1889         PetscCheck(mumps->rhs_disps[j] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscMPIInt overflow!");
1890       }
1891       recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */
1892     }
1893 
1894     PetscCall(PetscMPIIntCast(m * nrhs, &sendcount));
1895     PetscCallMPI(MPI_Gatherv(array, sendcount, MPIU_SCALAR, recvbuf, mumps->rhs_recvcounts, mumps->rhs_disps, MPIU_SCALAR, 0, mumps->omp_comm));
1896 
1897     if (mumps->is_omp_master) {
1898       if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */
1899         PetscScalar *dst, *dstbase = mumps->rhs_loc;
1900         for (j = 0; j < ompsize; j++) {
1901           const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j];
1902           dst                    = dstbase;
1903           for (i = 0; i < nrhs; i++) {
1904             PetscCall(PetscArraycpy(dst, src, mumps->rhs_nrow[j]));
1905             src += mumps->rhs_nrow[j];
1906             dst += mumps->nloc_rhs;
1907           }
1908           dstbase += mumps->rhs_nrow[j];
1909         }
1910       }
1911       PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nloc_rhs * nrhs, mumps->rhs_loc, mumps->id.precision, &mumps->id.rhs_loc_len, &mumps->id.rhs_loc));
1912     }
1913 #endif /* PETSC_HAVE_OPENMP_SUPPORT */
1914   }
1915   mumps->id.nrhs     = (PetscMUMPSInt)nrhs;
1916   mumps->id.nloc_rhs = (PetscMUMPSInt)mumps->nloc_rhs;
1917   mumps->id.lrhs_loc = mumps->nloc_rhs;
1918   mumps->id.irhs_loc = mumps->irhs_loc;
1919   PetscFunctionReturn(PETSC_SUCCESS);
1920 }
1921 
1922 static PetscErrorCode MatSolve_MUMPS(Mat A, Vec b, Vec x)
1923 {
1924   Mat_MUMPS         *mumps  = (Mat_MUMPS *)A->data;
1925   const PetscScalar *barray = NULL;
1926   PetscScalar       *array;
1927   IS                 is_iden, is_petsc;
1928   PetscInt           i;
1929   PetscBool          second_solve = PETSC_FALSE;
1930   static PetscBool   cite1 = PETSC_FALSE, cite2 = PETSC_FALSE;
1931 
1932   PetscFunctionBegin;
1933   PetscCall(PetscCitationsRegister("@article{MUMPS01,\n  author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n  title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n  journal = {SIAM "
1934                                    "Journal on Matrix Analysis and Applications},\n  volume = {23},\n  number = {1},\n  pages = {15--41},\n  year = {2001}\n}\n",
1935                                    &cite1));
1936   PetscCall(PetscCitationsRegister("@article{MUMPS02,\n  author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n  title = {Hybrid scheduling for the parallel solution of linear systems},\n  journal = {Parallel "
1937                                    "Computing},\n  volume = {32},\n  number = {2},\n  pages = {136--156},\n  year = {2006}\n}\n",
1938                                    &cite2));
1939 
1940   PetscCall(VecFlag(x, A->factorerrortype));
1941   if (A->factorerrortype) {
1942     PetscCall(PetscInfo(A, "MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1943     PetscFunctionReturn(PETSC_SUCCESS);
1944   }
1945 
1946   mumps->id.nrhs = 1;
1947   if (mumps->petsc_size > 1) {
1948     if (mumps->ICNTL20 == 10) {
1949       mumps->id.ICNTL(20) = 10; /* dense distributed RHS, need to set rhs_loc[], irhs_loc[] */
1950       PetscCall(VecGetArrayRead(b, &barray));
1951       PetscCall(MatMumpsSetUpDistRHSInfo(A, 1, barray));
1952     } else {
1953       mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential b_seq vector*/
1954       PetscCall(VecScatterBegin(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD));
1955       PetscCall(VecScatterEnd(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD));
1956       if (!mumps->myid) {
1957         PetscCall(VecGetArray(mumps->b_seq, &array));
1958         PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->b_seq->map->n, array, mumps->id.precision, &mumps->id.rhs_len, &mumps->id.rhs));
1959       }
1960     }
1961   } else { /* petsc_size == 1, use MUMPS's dense centralized RHS feature, so that we don't need to bother with isol_loc[] to get the solution */
1962     mumps->id.ICNTL(20) = 0;
1963     PetscCall(VecCopy(b, x));
1964     PetscCall(VecGetArray(x, &array));
1965     PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, x->map->n, array, mumps->id.precision, &mumps->id.rhs_len, &mumps->id.rhs));
1966   }
1967 
1968   /*
1969      handle condensation step of Schur complement (if any)
1970      We set by default ICNTL(26) == -1 when Schur indices have been provided by the user.
1971      According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase
1972      Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system.
1973      This requires an extra call to PetscMUMPS_c and the computation of the factors for S
1974   */
1975   if (mumps->id.size_schur > 0) {
1976     PetscCheck(mumps->petsc_size <= 1, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc");
1977     if (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2) {
1978       second_solve = PETSC_TRUE;
1979       PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); // allocate id.redrhs
1980       mumps->id.ICNTL(26) = 1;                                /* condensation phase */
1981     } else if (mumps->id.ICNTL(26) == 1) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE));
1982   }
1983 
1984   mumps->id.job = JOB_SOLVE;
1985   PetscMUMPS_c(mumps); // reduced solve, put solution in id.redrhs
1986   PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2));
1987 
1988   /* handle expansion step of Schur complement (if any) */
1989   if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE));
1990   else if (mumps->id.ICNTL(26) == 1) { // condense the right hand side
1991     PetscCall(MatMumpsSolveSchur_Private(A));
1992     for (i = 0; i < mumps->id.size_schur; ++i) array[mumps->id.listvar_schur[i] - 1] = ID_FIELD_GET(mumps->id, redrhs, i);
1993   }
1994 
1995   if (mumps->petsc_size > 1) { /* convert mumps distributed solution to PETSc mpi x */
1996     if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) {
1997       /* when id.ICNTL(9) changes, the contents of ilsol_loc may change (not its size, lsol_loc), recreates scat_sol */
1998       PetscCall(VecScatterDestroy(&mumps->scat_sol));
1999     }
2000     if (!mumps->scat_sol) { /* create scatter scat_sol */
2001       PetscInt *isol2_loc = NULL;
2002       PetscCall(ISCreateStride(PETSC_COMM_SELF, mumps->id.lsol_loc, 0, 1, &is_iden)); /* from */
2003       PetscCall(PetscMalloc1(mumps->id.lsol_loc, &isol2_loc));
2004       for (i = 0; i < mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i] - 1;                        /* change Fortran style to C style */
2005       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, mumps->id.lsol_loc, isol2_loc, PETSC_OWN_POINTER, &is_petsc)); /* to */
2006       PetscCall(VecScatterCreate(mumps->x_seq, is_iden, x, is_petsc, &mumps->scat_sol));
2007       PetscCall(ISDestroy(&is_iden));
2008       PetscCall(ISDestroy(&is_petsc));
2009       mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */
2010     }
2011 
2012     PetscScalar *xarray;
2013     PetscCall(VecGetArray(mumps->x_seq, &xarray));
2014     PetscCall(MatMumpsCastMumpsScalarArray(mumps->id.lsol_loc, mumps->id.precision, mumps->id.sol_loc, xarray));
2015     PetscCall(VecRestoreArray(mumps->x_seq, &xarray));
2016     PetscCall(VecScatterBegin(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD));
2017     PetscCall(VecScatterEnd(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD));
2018 
2019     if (mumps->ICNTL20 == 10) { // distributed RHS
2020       PetscCall(VecRestoreArrayRead(b, &barray));
2021     } else if (!mumps->myid) { // centralized RHS
2022       PetscCall(VecRestoreArray(mumps->b_seq, &array));
2023     }
2024   } else {
2025     // id.rhs has the solution in mumps precision
2026     PetscCall(MatMumpsCastMumpsScalarArray(x->map->n, mumps->id.precision, mumps->id.rhs, array));
2027     PetscCall(VecRestoreArray(x, &array));
2028   }
2029 
2030   PetscCall(PetscLogFlops(2.0 * PetscMax(0, (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n)));
2031   PetscFunctionReturn(PETSC_SUCCESS);
2032 }
2033 
2034 static PetscErrorCode MatSolveTranspose_MUMPS(Mat A, Vec b, Vec x)
2035 {
2036   Mat_MUMPS          *mumps = (Mat_MUMPS *)A->data;
2037   const PetscMUMPSInt value = mumps->id.ICNTL(9);
2038 
2039   PetscFunctionBegin;
2040   mumps->id.ICNTL(9) = 0;
2041   PetscCall(MatSolve_MUMPS(A, b, x));
2042   mumps->id.ICNTL(9) = value;
2043   PetscFunctionReturn(PETSC_SUCCESS);
2044 }
2045 
2046 static PetscErrorCode MatMatSolve_MUMPS(Mat A, Mat B, Mat X)
2047 {
2048   Mat                Bt = NULL;
2049   PetscBool          denseX, denseB, flg, flgT;
2050   Mat_MUMPS         *mumps = (Mat_MUMPS *)A->data;
2051   PetscInt           i, nrhs, M, nrhsM;
2052   PetscScalar       *array;
2053   const PetscScalar *barray;
2054   PetscInt           lsol_loc, nlsol_loc, *idxx, iidx = 0;
2055   PetscMUMPSInt     *isol_loc, *isol_loc_save;
2056   PetscScalar       *sol_loc;
2057   void              *sol_loc_save;
2058   PetscCount         sol_loc_len_save;
2059   IS                 is_to, is_from;
2060   PetscInt           k, proc, j, m, myrstart;
2061   const PetscInt    *rstart;
2062   Vec                v_mpi, msol_loc;
2063   VecScatter         scat_sol;
2064   Vec                b_seq;
2065   VecScatter         scat_rhs;
2066   PetscScalar       *aa;
2067   PetscInt           spnr, *ia, *ja;
2068   Mat_MPIAIJ        *b = NULL;
2069 
2070   PetscFunctionBegin;
2071   PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &denseX, MATSEQDENSE, MATMPIDENSE, NULL));
2072   PetscCheck(denseX, PetscObjectComm((PetscObject)X), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix");
2073 
2074   PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &denseB, MATSEQDENSE, MATMPIDENSE, NULL));
2075 
2076   if (denseB) {
2077     PetscCheck(B->rmap->n == X->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Matrix B and X must have same row distribution");
2078     mumps->id.ICNTL(20) = 0; /* dense RHS */
2079   } else {                   /* sparse B */
2080     PetscCheck(X != B, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_IDN, "X and B must be different matrices");
2081     PetscCall(PetscObjectTypeCompare((PetscObject)B, MATTRANSPOSEVIRTUAL, &flgT));
2082     PetscCheck(flgT, PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_WRONG, "Matrix B must be MATTRANSPOSEVIRTUAL matrix");
2083     PetscCall(MatShellGetScalingShifts(B, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
2084     /* input B is transpose of actual RHS matrix,
2085      because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */
2086     PetscCall(MatTransposeGetMat(B, &Bt));
2087     mumps->id.ICNTL(20) = 1; /* sparse RHS */
2088   }
2089 
2090   PetscCall(MatGetSize(B, &M, &nrhs));
2091   PetscCall(PetscIntMultError(nrhs, M, &nrhsM));
2092   mumps->id.nrhs = (PetscMUMPSInt)nrhs;
2093   mumps->id.lrhs = (PetscMUMPSInt)M;
2094 
2095   if (mumps->petsc_size == 1) { // handle this easy case specially and return early
2096     PetscScalar *aa;
2097     PetscInt     spnr, *ia, *ja;
2098     PetscBool    second_solve = PETSC_FALSE;
2099 
2100     PetscCall(MatDenseGetArray(X, &array));
2101     if (denseB) {
2102       /* copy B to X */
2103       PetscCall(MatDenseGetArrayRead(B, &barray));
2104       PetscCall(PetscArraycpy(array, barray, nrhsM));
2105       PetscCall(MatDenseRestoreArrayRead(B, &barray));
2106     } else { /* sparse B */
2107       PetscCall(MatSeqAIJGetArray(Bt, &aa));
2108       PetscCall(MatGetRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
2109       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
2110       PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs));
2111       PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->id.nz_rhs, aa, mumps->id.precision, &mumps->id.rhs_sparse_len, &mumps->id.rhs_sparse));
2112     }
2113     PetscCall(MatMumpsMakeMumpsScalarArray(denseB, nrhsM, array, mumps->id.precision, &mumps->id.rhs_len, &mumps->id.rhs));
2114 
2115     /* handle condensation step of Schur complement (if any) */
2116     if (mumps->id.size_schur > 0) {
2117       if (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2) {
2118         second_solve = PETSC_TRUE;
2119         PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); // allocate id.redrhs
2120         mumps->id.ICNTL(26) = 1;                                /* condensation phase, i.e, to solve id.redrhs */
2121       } else if (mumps->id.ICNTL(26) == 1) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE));
2122     }
2123 
2124     mumps->id.job = JOB_SOLVE;
2125     PetscMUMPS_c(mumps);
2126     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2));
2127 
2128     /* handle expansion step of Schur complement (if any) */
2129     if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE));
2130     else if (mumps->id.ICNTL(26) == 1) { // condense the right hand side
2131       PetscCall(MatMumpsSolveSchur_Private(A));
2132       for (j = 0; j < nrhs; ++j)
2133         for (i = 0; i < mumps->id.size_schur; ++i) array[mumps->id.listvar_schur[i] - 1 + j * M] = ID_FIELD_GET(mumps->id, redrhs, i + j * mumps->id.lredrhs);
2134     }
2135 
2136     if (!denseB) { /* sparse B, restore ia, ja */
2137       PetscCall(MatSeqAIJRestoreArray(Bt, &aa));
2138       PetscCall(MatRestoreRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
2139       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure");
2140     }
2141 
2142     // no matter dense B or sparse B, solution is in id.rhs; convert it to array of X.
2143     PetscCall(MatMumpsCastMumpsScalarArray(nrhsM, mumps->id.precision, mumps->id.rhs, array));
2144     PetscCall(MatDenseRestoreArray(X, &array));
2145     PetscFunctionReturn(PETSC_SUCCESS);
2146   }
2147 
2148   /* parallel case: MUMPS requires rhs B to be centralized on the host! */
2149   PetscCheck(!mumps->id.ICNTL(19), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc");
2150 
2151   /* create msol_loc to hold mumps local solution */
2152   isol_loc_save         = mumps->id.isol_loc; /* save these, as we want to reuse them in MatSolve() */
2153   sol_loc_save          = mumps->id.sol_loc;
2154   sol_loc_len_save      = mumps->id.sol_loc_len;
2155   mumps->id.isol_loc    = NULL; // an init state
2156   mumps->id.sol_loc     = NULL;
2157   mumps->id.sol_loc_len = 0;
2158 
2159   lsol_loc = mumps->id.lsol_loc;
2160   PetscCall(PetscIntMultError(nrhs, lsol_loc, &nlsol_loc)); /* length of sol_loc */
2161   PetscCall(PetscMalloc2(nlsol_loc, &sol_loc, lsol_loc, &isol_loc));
2162   PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_FALSE, nlsol_loc, sol_loc, mumps->id.precision, &mumps->id.sol_loc_len, &mumps->id.sol_loc));
2163   mumps->id.isol_loc = isol_loc;
2164 
2165   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nlsol_loc, (PetscScalar *)sol_loc, &msol_loc));
2166 
2167   if (denseB) {
2168     if (mumps->ICNTL20 == 10) {
2169       mumps->id.ICNTL(20) = 10; /* dense distributed RHS */
2170       PetscCall(MatDenseGetArrayRead(B, &barray));
2171       PetscCall(MatMumpsSetUpDistRHSInfo(A, nrhs, barray)); // put barray to rhs_loc
2172       PetscCall(MatDenseRestoreArrayRead(B, &barray));
2173       PetscCall(MatGetLocalSize(B, &m, NULL));
2174       PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhsM, NULL, &v_mpi)); // will scatter the solution to v_mpi, which wraps X
2175     } else {
2176       mumps->id.ICNTL(20) = 0; /* dense centralized RHS */
2177       /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in
2178         very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank
2179         0, re-arrange B into desired order, which is a local operation.
2180       */
2181 
2182       /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */
2183       /* wrap dense rhs matrix B into a vector v_mpi */
2184       PetscCall(MatGetLocalSize(B, &m, NULL));
2185       PetscCall(MatDenseGetArrayRead(B, &barray));
2186       PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhsM, barray, &v_mpi));
2187       PetscCall(MatDenseRestoreArrayRead(B, &barray));
2188 
2189       /* scatter v_mpi to b_seq in proc[0]. With ICNTL(20) = 0, MUMPS requires rhs to be centralized on the host! */
2190       if (!mumps->myid) {
2191         PetscInt *idx;
2192         /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */
2193         PetscCall(PetscMalloc1(nrhsM, &idx));
2194         PetscCall(MatGetOwnershipRanges(B, &rstart));
2195         for (proc = 0, k = 0; proc < mumps->petsc_size; proc++) {
2196           for (j = 0; j < nrhs; j++) {
2197             for (i = rstart[proc]; i < rstart[proc + 1]; i++) idx[k++] = j * M + i;
2198           }
2199         }
2200 
2201         PetscCall(VecCreateSeq(PETSC_COMM_SELF, nrhsM, &b_seq));
2202         PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nrhsM, idx, PETSC_OWN_POINTER, &is_to));
2203         PetscCall(ISCreateStride(PETSC_COMM_SELF, nrhsM, 0, 1, &is_from));
2204       } else {
2205         PetscCall(VecCreateSeq(PETSC_COMM_SELF, 0, &b_seq));
2206         PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_to));
2207         PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_from));
2208       }
2209 
2210       PetscCall(VecScatterCreate(v_mpi, is_from, b_seq, is_to, &scat_rhs));
2211       PetscCall(VecScatterBegin(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD));
2212       PetscCall(ISDestroy(&is_to));
2213       PetscCall(ISDestroy(&is_from));
2214       PetscCall(VecScatterEnd(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD));
2215 
2216       if (!mumps->myid) { /* define rhs on the host */
2217         PetscCall(VecGetArrayRead(b_seq, &barray));
2218         PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, nrhsM, barray, mumps->id.precision, &mumps->id.rhs_len, &mumps->id.rhs));
2219         PetscCall(VecRestoreArrayRead(b_seq, &barray));
2220       }
2221     }
2222   } else { /* sparse B */
2223     b = (Mat_MPIAIJ *)Bt->data;
2224 
2225     /* wrap dense X into a vector v_mpi */
2226     PetscCall(MatGetLocalSize(X, &m, NULL));
2227     PetscCall(MatDenseGetArrayRead(X, &barray));
2228     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)X), 1, nrhs * m, nrhsM, barray, &v_mpi));
2229     PetscCall(MatDenseRestoreArrayRead(X, &barray));
2230 
2231     if (!mumps->myid) {
2232       PetscCall(MatSeqAIJGetArray(b->A, &aa));
2233       PetscCall(MatGetRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
2234       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
2235       PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs));
2236       PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, ((Mat_SeqAIJ *)b->A->data)->nz, aa, mumps->id.precision, &mumps->id.rhs_sparse_len, &mumps->id.rhs_sparse));
2237     } else {
2238       mumps->id.irhs_ptr    = NULL;
2239       mumps->id.irhs_sparse = NULL;
2240       mumps->id.nz_rhs      = 0;
2241       if (mumps->id.rhs_sparse_len) {
2242         PetscCall(PetscFree(mumps->id.rhs_sparse));
2243         mumps->id.rhs_sparse_len = 0;
2244       }
2245     }
2246   }
2247 
2248   /* solve phase */
2249   mumps->id.job = JOB_SOLVE;
2250   PetscMUMPS_c(mumps);
2251   PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
2252 
2253   /* scatter mumps distributed solution to PETSc vector v_mpi, which shares local arrays with solution matrix X */
2254   PetscCall(MatDenseGetArray(X, &array));
2255   PetscCall(VecPlaceArray(v_mpi, array));
2256 
2257   /* create scatter scat_sol */
2258   PetscCall(MatGetOwnershipRanges(X, &rstart));
2259   /* iidx: index for scatter mumps solution to PETSc X */
2260 
2261   PetscCall(ISCreateStride(PETSC_COMM_SELF, nlsol_loc, 0, 1, &is_from));
2262   PetscCall(PetscMalloc1(nlsol_loc, &idxx));
2263   for (i = 0; i < lsol_loc; i++) {
2264     isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */
2265 
2266     for (proc = 0; proc < mumps->petsc_size; proc++) {
2267       if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc + 1]) {
2268         myrstart = rstart[proc];
2269         k        = isol_loc[i] - myrstart;          /* local index on 1st column of PETSc vector X */
2270         iidx     = k + myrstart * nrhs;             /* maps mumps isol_loc[i] to PETSc index in X */
2271         m        = rstart[proc + 1] - rstart[proc]; /* rows of X for this proc */
2272         break;
2273       }
2274     }
2275 
2276     for (j = 0; j < nrhs; j++) idxx[i + j * lsol_loc] = iidx + j * m;
2277   }
2278   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nlsol_loc, idxx, PETSC_COPY_VALUES, &is_to));
2279   PetscCall(MatMumpsCastMumpsScalarArray(nlsol_loc, mumps->id.precision, mumps->id.sol_loc, sol_loc)); // Vec msol_loc is created with sol_loc[]
2280   PetscCall(VecScatterCreate(msol_loc, is_from, v_mpi, is_to, &scat_sol));
2281   PetscCall(VecScatterBegin(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD));
2282   PetscCall(ISDestroy(&is_from));
2283   PetscCall(ISDestroy(&is_to));
2284   PetscCall(VecScatterEnd(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD));
2285   PetscCall(MatDenseRestoreArray(X, &array));
2286 
2287   if (mumps->id.sol_loc_len) { // in case we allocated intermediate buffers
2288     mumps->id.sol_loc_len = 0;
2289     PetscCall(PetscFree(mumps->id.sol_loc));
2290   }
2291 
2292   // restore old values
2293   mumps->id.sol_loc     = sol_loc_save;
2294   mumps->id.sol_loc_len = sol_loc_len_save;
2295   mumps->id.isol_loc    = isol_loc_save;
2296 
2297   PetscCall(PetscFree2(sol_loc, isol_loc));
2298   PetscCall(PetscFree(idxx));
2299   PetscCall(VecDestroy(&msol_loc));
2300   PetscCall(VecDestroy(&v_mpi));
2301   if (!denseB) {
2302     if (!mumps->myid) {
2303       b = (Mat_MPIAIJ *)Bt->data;
2304       PetscCall(MatSeqAIJRestoreArray(b->A, &aa));
2305       PetscCall(MatRestoreRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
2306       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure");
2307     }
2308   } else {
2309     if (mumps->ICNTL20 == 0) {
2310       PetscCall(VecDestroy(&b_seq));
2311       PetscCall(VecScatterDestroy(&scat_rhs));
2312     }
2313   }
2314   PetscCall(VecScatterDestroy(&scat_sol));
2315   PetscCall(PetscLogFlops(nrhs * PetscMax(0, 2.0 * (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n)));
2316   PetscFunctionReturn(PETSC_SUCCESS);
2317 }
2318 
2319 static PetscErrorCode MatMatSolveTranspose_MUMPS(Mat A, Mat B, Mat X)
2320 {
2321   Mat_MUMPS          *mumps = (Mat_MUMPS *)A->data;
2322   const PetscMUMPSInt value = mumps->id.ICNTL(9);
2323 
2324   PetscFunctionBegin;
2325   mumps->id.ICNTL(9) = 0;
2326   PetscCall(MatMatSolve_MUMPS(A, B, X));
2327   mumps->id.ICNTL(9) = value;
2328   PetscFunctionReturn(PETSC_SUCCESS);
2329 }
2330 
2331 static PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A, Mat Bt, Mat X)
2332 {
2333   PetscBool flg;
2334   Mat       B;
2335 
2336   PetscFunctionBegin;
2337   PetscCall(PetscObjectTypeCompareAny((PetscObject)Bt, &flg, MATSEQAIJ, MATMPIAIJ, NULL));
2338   PetscCheck(flg, PetscObjectComm((PetscObject)Bt), PETSC_ERR_ARG_WRONG, "Matrix Bt must be MATAIJ matrix");
2339 
2340   /* Create B=Bt^T that uses Bt's data structure */
2341   PetscCall(MatCreateTranspose(Bt, &B));
2342 
2343   PetscCall(MatMatSolve_MUMPS(A, B, X));
2344   PetscCall(MatDestroy(&B));
2345   PetscFunctionReturn(PETSC_SUCCESS);
2346 }
2347 
2348 #if !defined(PETSC_USE_COMPLEX)
2349 /*
2350   input:
2351    F:        numeric factor
2352   output:
2353    nneg:     total number of negative pivots
2354    nzero:    total number of zero pivots
2355    npos:     (global dimension of F) - nneg - nzero
2356 */
2357 static PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos)
2358 {
2359   Mat_MUMPS  *mumps = (Mat_MUMPS *)F->data;
2360   PetscMPIInt size;
2361 
2362   PetscFunctionBegin;
2363   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)F), &size));
2364   /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */
2365   PetscCheck(size <= 1 || mumps->id.ICNTL(13) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia", mumps->id.INFOG(13));
2366 
2367   if (nneg) *nneg = mumps->id.INFOG(12);
2368   if (nzero || npos) {
2369     PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection");
2370     if (nzero) *nzero = mumps->id.INFOG(28);
2371     if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28));
2372   }
2373   PetscFunctionReturn(PETSC_SUCCESS);
2374 }
2375 #endif
2376 
2377 static PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse, Mat_MUMPS *mumps)
2378 {
2379   PetscMPIInt    nreqs;
2380   PetscMUMPSInt *irn, *jcn;
2381   PetscMPIInt    count;
2382   PetscCount     totnnz, remain;
2383   const PetscInt osize = mumps->omp_comm_size;
2384   PetscScalar   *val;
2385 
2386   PetscFunctionBegin;
2387   if (osize > 1) {
2388     if (reuse == MAT_INITIAL_MATRIX) {
2389       /* master first gathers counts of nonzeros to receive */
2390       if (mumps->is_omp_master) PetscCall(PetscMalloc1(osize, &mumps->recvcount));
2391       PetscCallMPI(MPI_Gather(&mumps->nnz, 1, MPIU_INT64, mumps->recvcount, 1, MPIU_INT64, 0 /*master*/, mumps->omp_comm));
2392 
2393       /* Then each computes number of send/recvs */
2394       if (mumps->is_omp_master) {
2395         /* Start from 1 since self communication is not done in MPI */
2396         nreqs = 0;
2397         for (PetscMPIInt i = 1; i < osize; i++) nreqs += (mumps->recvcount[i] + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX;
2398       } else {
2399         nreqs = (PetscMPIInt)(((mumps->nnz + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX));
2400       }
2401       PetscCall(PetscMalloc1(nreqs * 3, &mumps->reqs)); /* Triple the requests since we send irn, jcn and val separately */
2402 
2403       /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others.
2404          MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz
2405          might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size
2406          is very small, the current approach should have no extra overhead compared to MPI_Gatherv.
2407        */
2408       nreqs = 0; /* counter for actual send/recvs */
2409       if (mumps->is_omp_master) {
2410         totnnz = 0;
2411 
2412         for (PetscMPIInt i = 0; i < osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */
2413         PetscCall(PetscMalloc2(totnnz, &irn, totnnz, &jcn));
2414         PetscCall(PetscMalloc1(totnnz, &val));
2415 
2416         /* Self communication */
2417         PetscCall(PetscArraycpy(irn, mumps->irn, mumps->nnz));
2418         PetscCall(PetscArraycpy(jcn, mumps->jcn, mumps->nnz));
2419         PetscCall(PetscArraycpy(val, mumps->val, mumps->nnz));
2420 
2421         /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */
2422         PetscCall(PetscFree2(mumps->irn, mumps->jcn));
2423         PetscCall(PetscFree(mumps->val_alloc));
2424         mumps->nnz = totnnz;
2425         mumps->irn = irn;
2426         mumps->jcn = jcn;
2427         mumps->val = mumps->val_alloc = val;
2428 
2429         irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */
2430         jcn += mumps->recvcount[0];
2431         val += mumps->recvcount[0];
2432 
2433         /* Remote communication */
2434         for (PetscMPIInt i = 1; i < osize; i++) {
2435           count  = (PetscMPIInt)PetscMin(mumps->recvcount[i], (PetscMPIInt)PETSC_MPI_INT_MAX);
2436           remain = mumps->recvcount[i] - count;
2437           while (count > 0) {
2438             PetscCallMPI(MPIU_Irecv(irn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2439             PetscCallMPI(MPIU_Irecv(jcn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2440             PetscCallMPI(MPIU_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2441             irn += count;
2442             jcn += count;
2443             val += count;
2444             count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX);
2445             remain -= count;
2446           }
2447         }
2448       } else {
2449         irn    = mumps->irn;
2450         jcn    = mumps->jcn;
2451         val    = mumps->val;
2452         count  = (PetscMPIInt)PetscMin(mumps->nnz, (PetscMPIInt)PETSC_MPI_INT_MAX);
2453         remain = mumps->nnz - count;
2454         while (count > 0) {
2455           PetscCallMPI(MPIU_Isend(irn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2456           PetscCallMPI(MPIU_Isend(jcn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2457           PetscCallMPI(MPIU_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2458           irn += count;
2459           jcn += count;
2460           val += count;
2461           count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX);
2462           remain -= count;
2463         }
2464       }
2465     } else {
2466       nreqs = 0;
2467       if (mumps->is_omp_master) {
2468         val = mumps->val + mumps->recvcount[0];
2469         for (PetscMPIInt i = 1; i < osize; i++) { /* Remote communication only since self data is already in place */
2470           count  = (PetscMPIInt)PetscMin(mumps->recvcount[i], (PetscMPIInt)PETSC_MPI_INT_MAX);
2471           remain = mumps->recvcount[i] - count;
2472           while (count > 0) {
2473             PetscCallMPI(MPIU_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2474             val += count;
2475             count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX);
2476             remain -= count;
2477           }
2478         }
2479       } else {
2480         val    = mumps->val;
2481         count  = (PetscMPIInt)PetscMin(mumps->nnz, (PetscMPIInt)PETSC_MPI_INT_MAX);
2482         remain = mumps->nnz - count;
2483         while (count > 0) {
2484           PetscCallMPI(MPIU_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2485           val += count;
2486           count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX);
2487           remain -= count;
2488         }
2489       }
2490     }
2491     PetscCallMPI(MPI_Waitall(nreqs, mumps->reqs, MPI_STATUSES_IGNORE));
2492     mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */
2493   }
2494   PetscFunctionReturn(PETSC_SUCCESS);
2495 }
2496 
2497 static PetscErrorCode MatFactorNumeric_MUMPS(Mat F, Mat A, PETSC_UNUSED const MatFactorInfo *info)
2498 {
2499   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2500 
2501   PetscFunctionBegin;
2502   if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) {
2503     if (mumps->id.INFOG(1) == -6) PetscCall(PetscInfo(A, "MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2504     PetscCall(PetscInfo(A, "MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2505     PetscFunctionReturn(PETSC_SUCCESS);
2506   }
2507 
2508   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps));
2509   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX, mumps));
2510 
2511   /* numerical factorization phase */
2512   mumps->id.job = JOB_FACTNUMERIC;
2513   if (!mumps->id.ICNTL(18)) { /* A is centralized */
2514     if (!mumps->myid) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_len, &mumps->id.a));
2515   } else {
2516     PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_loc_len, &mumps->id.a_loc));
2517   }
2518 
2519   if (F->schur) {
2520     const PetscScalar *array;
2521     MUMPS_INT          size = mumps->id.size_schur;
2522     PetscCall(MatDenseGetArrayRead(F->schur, &array));
2523     PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_FALSE, size * size, array, mumps->id.precision, &mumps->id.schur_len, &mumps->id.schur));
2524     PetscCall(MatDenseRestoreArrayRead(F->schur, &array));
2525   }
2526 
2527   PetscMUMPS_c(mumps);
2528   if (mumps->id.INFOG(1) < 0) {
2529     PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2));
2530     if (mumps->id.INFOG(1) == -10) {
2531       PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2532       F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
2533     } else if (mumps->id.INFOG(1) == -13) {
2534       PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, cannot allocate required memory %d megabytes\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2535       F->factorerrortype = MAT_FACTOR_OUTMEMORY;
2536     } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) {
2537       PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d, problem with work array\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2538       F->factorerrortype = MAT_FACTOR_OUTMEMORY;
2539     } else {
2540       PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2541       F->factorerrortype = MAT_FACTOR_OTHER;
2542     }
2543   }
2544   PetscCheck(mumps->myid || mumps->id.ICNTL(16) <= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in numerical factorization: ICNTL(16)=%d " MUMPS_MANUALS, mumps->id.INFOG(16));
2545 
2546   F->assembled = PETSC_TRUE;
2547 
2548   if (F->schur) { /* reset Schur status to unfactored */
2549 #if defined(PETSC_HAVE_CUDA)
2550     F->schur->offloadmask = PETSC_OFFLOAD_CPU;
2551 #endif
2552     PetscScalar *array;
2553     PetscCall(MatDenseGetArray(F->schur, &array));
2554     PetscCall(MatMumpsCastMumpsScalarArray(mumps->id.size_schur * mumps->id.size_schur, mumps->id.precision, mumps->id.schur, array));
2555     PetscCall(MatDenseRestoreArray(F->schur, &array));
2556     if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
2557       mumps->id.ICNTL(19) = 2;
2558       PetscCall(MatTranspose(F->schur, MAT_INPLACE_MATRIX, &F->schur));
2559     }
2560     PetscCall(MatFactorRestoreSchurComplement(F, NULL, MAT_FACTOR_SCHUR_UNFACTORED));
2561   }
2562 
2563   /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */
2564   if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3;
2565 
2566   if (!mumps->is_omp_master) mumps->id.INFO(23) = 0;
2567   // MUMPS userguide: ISOL_loc should be allocated by the user between the factorization and the
2568   // solve phases. On exit from the solve phase, ISOL_loc(i) contains the index of the variables for
2569   // which the solution (in SOL_loc) is available on the local processor.
2570   // If successive calls to the solve phase (JOB= 3) are performed for a given matrix, ISOL_loc will
2571   // normally have the same contents for each of these calls. The only exception is the case of
2572   // unsymmetric matrices (SYM=1) when the transpose option is changed (see ICNTL(9)) and non
2573   // symmetric row/column exchanges (see ICNTL(6)) have occurred before the solve phase.
2574   if (mumps->petsc_size > 1) {
2575     PetscInt     lsol_loc;
2576     PetscScalar *array;
2577 
2578     /* distributed solution; Create x_seq=sol_loc for repeated use */
2579     if (mumps->x_seq) {
2580       PetscCall(VecScatterDestroy(&mumps->scat_sol));
2581       PetscCall(PetscFree(mumps->id.isol_loc));
2582       PetscCall(VecDestroy(&mumps->x_seq));
2583     }
2584     lsol_loc = mumps->id.INFO(23); /* length of sol_loc */
2585     PetscCall(PetscMalloc1(lsol_loc, &mumps->id.isol_loc));
2586     PetscCall(VecCreateSeq(PETSC_COMM_SELF, lsol_loc, &mumps->x_seq));
2587     PetscCall(VecGetArray(mumps->x_seq, &array));
2588     PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_FALSE, lsol_loc, array, mumps->id.precision, &mumps->id.sol_loc_len, &mumps->id.sol_loc));
2589     PetscCall(VecRestoreArray(mumps->x_seq, &array));
2590     mumps->id.lsol_loc = (PetscMUMPSInt)lsol_loc;
2591   }
2592   PetscCall(PetscLogFlops((double)ID_RINFO_GET(mumps->id, 2)));
2593   PetscFunctionReturn(PETSC_SUCCESS);
2594 }
2595 
2596 /* Sets MUMPS options from the options database */
2597 static PetscErrorCode MatSetFromOptions_MUMPS(Mat F, Mat A)
2598 {
2599   Mat_MUMPS    *mumps = (Mat_MUMPS *)F->data;
2600   PetscReal     cntl;
2601   PetscMUMPSInt icntl = 0, size, *listvar_schur;
2602   PetscInt      info[80], i, ninfo = 80, rbs, cbs;
2603   PetscBool     flg   = PETSC_FALSE;
2604   PetscBool     schur = mumps->id.icntl ? (PetscBool)(mumps->id.ICNTL(26) == -1) : (PetscBool)(mumps->ICNTL26 == -1);
2605   void         *arr;
2606 
2607   PetscFunctionBegin;
2608   PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "MUMPS Options", "Mat");
2609   if (mumps->id.job == JOB_NULL) { /* MatSetFromOptions_MUMPS() has never been called before */
2610     PetscPrecision precision  = PetscDefined(USE_REAL_SINGLE) ? PETSC_PRECISION_SINGLE : PETSC_PRECISION_DOUBLE;
2611     PetscInt       nthreads   = 0;
2612     PetscInt       nCNTL_pre  = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0;
2613     PetscInt       nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0;
2614     PetscMUMPSInt  nblk, *blkvar, *blkptr;
2615 
2616     mumps->petsc_comm = PetscObjectComm((PetscObject)A);
2617     PetscCallMPI(MPI_Comm_size(mumps->petsc_comm, &mumps->petsc_size));
2618     PetscCallMPI(MPI_Comm_rank(mumps->petsc_comm, &mumps->myid)); /* "if (!myid)" still works even if mumps_comm is different */
2619 
2620     PetscCall(PetscOptionsName("-mat_mumps_use_omp_threads", "Convert MPI processes into OpenMP threads", "None", &mumps->use_petsc_omp_support));
2621     if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */
2622     /* do not use PetscOptionsInt() so that the option -mat_mumps_use_omp_threads is not displayed twice in the help */
2623     PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)F)->prefix, "-mat_mumps_use_omp_threads", &nthreads, NULL));
2624     if (mumps->use_petsc_omp_support) {
2625       PetscCheck(!schur, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use -%smat_mumps_use_omp_threads with the Schur complement feature", ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : "");
2626 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
2627       PetscCall(PetscOmpCtrlCreate(mumps->petsc_comm, nthreads, &mumps->omp_ctrl));
2628       PetscCall(PetscOmpCtrlGetOmpComms(mumps->omp_ctrl, &mumps->omp_comm, &mumps->mumps_comm, &mumps->is_omp_master));
2629 #else
2630       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "The system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual",
2631               ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : "");
2632 #endif
2633     } else {
2634       mumps->omp_comm      = PETSC_COMM_SELF;
2635       mumps->mumps_comm    = mumps->petsc_comm;
2636       mumps->is_omp_master = PETSC_TRUE;
2637     }
2638     PetscCallMPI(MPI_Comm_size(mumps->omp_comm, &mumps->omp_comm_size));
2639     mumps->reqs = NULL;
2640     mumps->tag  = 0;
2641 
2642     if (mumps->mumps_comm != MPI_COMM_NULL) {
2643       if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) {
2644         /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */
2645         MPI_Comm comm;
2646         PetscCallMPI(MPI_Comm_dup(mumps->mumps_comm, &comm));
2647         mumps->mumps_comm = comm;
2648       } else PetscCall(PetscCommGetComm(mumps->petsc_comm, &mumps->mumps_comm));
2649     }
2650 
2651     mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm);
2652     mumps->id.job          = JOB_INIT;
2653     mumps->id.par          = 1; /* host participates factorizaton and solve */
2654     mumps->id.sym          = mumps->sym;
2655 
2656     size          = mumps->id.size_schur;
2657     arr           = mumps->id.schur;
2658     listvar_schur = mumps->id.listvar_schur;
2659     nblk          = mumps->id.nblk;
2660     blkvar        = mumps->id.blkvar;
2661     blkptr        = mumps->id.blkptr;
2662     if (PetscDefined(USE_DEBUG)) {
2663       for (PetscInt i = 0; i < size; i++)
2664         PetscCheck(listvar_schur[i] - 1 >= 0 && listvar_schur[i] - 1 < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid Schur index at position %" PetscInt_FMT "! %" PetscInt_FMT " must be in [0, %" PetscInt_FMT ")", i, (PetscInt)listvar_schur[i] - 1,
2665                    A->rmap->N);
2666     }
2667 
2668     PetscCall(PetscOptionsEnum("-pc_precision", "Precision used by MUMPS", "MATSOLVERMUMPS", PetscPrecisionTypes, (PetscEnum)precision, (PetscEnum *)&precision, NULL));
2669     PetscCheck(precision == PETSC_PRECISION_SINGLE || precision == PETSC_PRECISION_DOUBLE, PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "MUMPS does not support %s precision", PetscPrecisionTypes[precision]);
2670     PetscCheck(precision == PETSC_SCALAR_PRECISION || PetscDefined(HAVE_MUMPS_MIXED_PRECISION), PetscObjectComm((PetscObject)F), PETSC_ERR_USER, "Your MUMPS library does not support mixed precision, but which is needed with your specified PetscScalar");
2671     PetscCall(MatMumpsAllocateInternalID(&mumps->id, precision));
2672 
2673     PetscMUMPS_c(mumps);
2674     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
2675 
2676     /* set PETSc-MUMPS default options - override MUMPS default */
2677     mumps->id.ICNTL(3) = 0;
2678     mumps->id.ICNTL(4) = 0;
2679     if (mumps->petsc_size == 1) {
2680       mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */
2681       mumps->id.ICNTL(7)  = 7; /* automatic choice of ordering done by the package */
2682     } else {
2683       mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */
2684       mumps->id.ICNTL(21) = 1; /* distributed solution */
2685     }
2686     if (nblk && blkptr) {
2687       mumps->id.ICNTL(15) = 1;
2688       mumps->id.nblk      = nblk;
2689       mumps->id.blkvar    = blkvar;
2690       mumps->id.blkptr    = blkptr;
2691     } else mumps->id.ICNTL(15) = 0;
2692 
2693     /* restore cached ICNTL and CNTL values */
2694     for (icntl = 0; icntl < nICNTL_pre; ++icntl) mumps->id.ICNTL(mumps->ICNTL_pre[1 + 2 * icntl]) = mumps->ICNTL_pre[2 + 2 * icntl];
2695     for (icntl = 0; icntl < nCNTL_pre; ++icntl) ID_CNTL_SET(mumps->id, (PetscInt)mumps->CNTL_pre[1 + 2 * icntl], mumps->CNTL_pre[2 + 2 * icntl]);
2696 
2697     PetscCall(PetscFree(mumps->ICNTL_pre));
2698     PetscCall(PetscFree(mumps->CNTL_pre));
2699 
2700     if (schur) {
2701       mumps->id.size_schur    = size;
2702       mumps->id.schur_lld     = size;
2703       mumps->id.schur         = arr;
2704       mumps->id.listvar_schur = listvar_schur;
2705       if (mumps->petsc_size > 1) {
2706         PetscBool gs; /* gs is false if any rank other than root has non-empty IS */
2707 
2708         mumps->id.ICNTL(19) = 1;                                                                            /* MUMPS returns Schur centralized on the host */
2709         gs                  = mumps->myid ? (mumps->id.size_schur ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */
2710         PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &gs, 1, MPI_C_BOOL, MPI_LAND, mumps->petsc_comm));
2711         PetscCheck(gs, PETSC_COMM_SELF, PETSC_ERR_SUP, "MUMPS distributed parallel Schur complements not yet supported from PETSc");
2712       } else {
2713         if (F->factortype == MAT_FACTOR_LU) {
2714           mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */
2715         } else {
2716           mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */
2717         }
2718       }
2719       mumps->id.ICNTL(26) = -1;
2720     }
2721 
2722     /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code.
2723        For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS.
2724      */
2725     PetscCallMPI(MPI_Bcast(mumps->id.icntl, 40, MPI_INT, 0, mumps->omp_comm));
2726     PetscCallMPI(MPI_Bcast(mumps->id.cntl, 15, MPIU_MUMPSREAL(&mumps->id), 0, mumps->omp_comm));
2727 
2728     mumps->scat_rhs = NULL;
2729     mumps->scat_sol = NULL;
2730   }
2731   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_1", "ICNTL(1): output stream for error messages", "None", mumps->id.ICNTL(1), &icntl, &flg));
2732   if (flg) mumps->id.ICNTL(1) = icntl;
2733   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_2", "ICNTL(2): output stream for diagnostic printing, statistics, and warning", "None", mumps->id.ICNTL(2), &icntl, &flg));
2734   if (flg) mumps->id.ICNTL(2) = icntl;
2735   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_3", "ICNTL(3): output stream for global information, collected on the host", "None", mumps->id.ICNTL(3), &icntl, &flg));
2736   if (flg) mumps->id.ICNTL(3) = icntl;
2737 
2738   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_4", "ICNTL(4): level of printing (0 to 4)", "None", mumps->id.ICNTL(4), &icntl, &flg));
2739   if (flg) mumps->id.ICNTL(4) = icntl;
2740   if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */
2741 
2742   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_6", "ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)", "None", mumps->id.ICNTL(6), &icntl, &flg));
2743   if (flg) mumps->id.ICNTL(6) = icntl;
2744 
2745   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_7", "ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)", "None", mumps->id.ICNTL(7), &icntl, &flg));
2746   if (flg) {
2747     PetscCheck(icntl != 1 && icntl >= 0 && icntl <= 7, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto");
2748     mumps->id.ICNTL(7) = icntl;
2749   }
2750 
2751   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_8", "ICNTL(8): scaling strategy (-2 to 8 or 77)", "None", mumps->id.ICNTL(8), &mumps->id.ICNTL(8), NULL));
2752   /* PetscCall(PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL)); handled by MatSolveTranspose_MUMPS() */
2753   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_10", "ICNTL(10): max num of refinements", "None", mumps->id.ICNTL(10), &mumps->id.ICNTL(10), NULL));
2754   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_11", "ICNTL(11): statistics related to an error analysis (via -ksp_view)", "None", mumps->id.ICNTL(11), &mumps->id.ICNTL(11), NULL));
2755   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_12", "ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)", "None", mumps->id.ICNTL(12), &mumps->id.ICNTL(12), NULL));
2756   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_13", "ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting", "None", mumps->id.ICNTL(13), &mumps->id.ICNTL(13), NULL));
2757   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_14", "ICNTL(14): percentage increase in the estimated working space", "None", mumps->id.ICNTL(14), &mumps->id.ICNTL(14), NULL));
2758   PetscCall(MatGetBlockSizes(A, &rbs, &cbs));
2759   if (rbs == cbs && rbs > 1) mumps->id.ICNTL(15) = (PetscMUMPSInt)-rbs;
2760   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_15", "ICNTL(15): compression of the input matrix resulting from a block format", "None", mumps->id.ICNTL(15), &mumps->id.ICNTL(15), &flg));
2761   if (flg) {
2762     if (mumps->id.ICNTL(15) < 0) PetscCheck((-mumps->id.ICNTL(15) % cbs == 0) && (-mumps->id.ICNTL(15) % rbs == 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "The opposite of -mat_mumps_icntl_15 must be a multiple of the column and row blocksizes");
2763     else if (mumps->id.ICNTL(15) > 0) {
2764       const PetscInt *bsizes;
2765       PetscInt        nblocks, p, *blkptr = NULL;
2766       PetscMPIInt    *recvcounts, *displs, n;
2767       PetscMPIInt     rank, size = 0;
2768 
2769       PetscCall(MatGetVariableBlockSizes(A, &nblocks, &bsizes));
2770       flg = PETSC_TRUE;
2771       for (p = 0; p < nblocks; ++p) {
2772         if (bsizes[p] > 1) break;
2773       }
2774       if (p == nblocks) flg = PETSC_FALSE;
2775       PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &flg, 1, MPI_C_BOOL, MPI_LOR, PetscObjectComm((PetscObject)A)));
2776       if (flg) { // if at least one process supplies variable block sizes and they are not all set to 1
2777         PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
2778         if (rank == 0) PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
2779         PetscCall(PetscCalloc2(size, &recvcounts, size + 1, &displs));
2780         PetscCall(PetscMPIIntCast(nblocks, &n));
2781         PetscCallMPI(MPI_Gather(&n, 1, MPI_INT, recvcounts, 1, MPI_INT, 0, PetscObjectComm((PetscObject)A)));
2782         for (PetscInt p = 0; p < size; ++p) displs[p + 1] = displs[p] + recvcounts[p];
2783         PetscCall(PetscMalloc1(displs[size] + 1, &blkptr));
2784         PetscCallMPI(MPI_Bcast(displs + size, 1, MPIU_INT, 0, PetscObjectComm((PetscObject)A)));
2785         PetscCallMPI(MPI_Gatherv(bsizes, n, MPIU_INT, blkptr + 1, recvcounts, displs, MPIU_INT, 0, PetscObjectComm((PetscObject)A)));
2786         if (rank == 0) {
2787           blkptr[0] = 1;
2788           for (PetscInt p = 0; p < n; ++p) blkptr[p + 1] += blkptr[p];
2789           PetscCall(MatMumpsSetBlk(F, displs[size], NULL, blkptr));
2790         }
2791         PetscCall(PetscFree2(recvcounts, displs));
2792         PetscCall(PetscFree(blkptr));
2793       }
2794     }
2795   }
2796   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_19", "ICNTL(19): computes the Schur complement", "None", mumps->id.ICNTL(19), &mumps->id.ICNTL(19), NULL));
2797   if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */
2798     PetscCall(MatDestroy(&F->schur));
2799     PetscCall(MatMumpsResetSchur_Private(mumps));
2800   }
2801 
2802   /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps",
2803      and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran
2804      and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...'
2805      This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug,
2806      see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590.
2807      In short, we could not use distributed RHS until with MPICH v4.0b1 or we enabled a workaround in mumps-5.6.2+
2808    */
2809   mumps->ICNTL20 = 10; /* Distributed dense RHS, by default */
2810 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) || (PetscDefined(HAVE_MPICH) && MPICH_NUMVERSION < 40000101) || PetscDefined(HAVE_MSMPI)
2811   mumps->ICNTL20 = 0; /* Centralized dense RHS, if need be */
2812 #endif
2813   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_20", "ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides", "None", mumps->ICNTL20, &mumps->ICNTL20, &flg));
2814   PetscCheck(!flg || mumps->ICNTL20 == 10 || mumps->ICNTL20 == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10", (int)mumps->ICNTL20);
2815 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0)
2816   PetscCheck(!flg || mumps->ICNTL20 != 10, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=10 is not supported before MUMPS-5.3.0");
2817 #endif
2818   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL)); we only use distributed solution vector */
2819 
2820   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_22", "ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)", "None", mumps->id.ICNTL(22), &mumps->id.ICNTL(22), NULL));
2821   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_23", "ICNTL(23): max size of the working memory (MB) that can allocate per processor", "None", mumps->id.ICNTL(23), &mumps->id.ICNTL(23), NULL));
2822   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_24", "ICNTL(24): detection of null pivot rows (0 or 1)", "None", mumps->id.ICNTL(24), &mumps->id.ICNTL(24), NULL));
2823   if (mumps->id.ICNTL(24)) mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */
2824 
2825   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_25", "ICNTL(25): computes a solution of a deficient matrix and a null space basis", "None", mumps->id.ICNTL(25), &mumps->id.ICNTL(25), NULL));
2826   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_26", "ICNTL(26): drives the solution phase if a Schur complement matrix", "None", mumps->id.ICNTL(26), &mumps->id.ICNTL(26), NULL));
2827   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_27", "ICNTL(27): controls the blocking size for multiple right-hand sides", "None", mumps->id.ICNTL(27), &mumps->id.ICNTL(27), NULL));
2828   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_28", "ICNTL(28): use 1 for sequential analysis and ICNTL(7) ordering, or 2 for parallel analysis and ICNTL(29) ordering", "None", mumps->id.ICNTL(28), &mumps->id.ICNTL(28), NULL));
2829   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_29", "ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis", "None", mumps->id.ICNTL(29), &mumps->id.ICNTL(29), NULL));
2830   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL)); */ /* call MatMumpsGetInverse() directly */
2831   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_31", "ICNTL(31): indicates which factors may be discarded during factorization", "None", mumps->id.ICNTL(31), &mumps->id.ICNTL(31), NULL));
2832   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elimination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL));  -- not supported by PETSc API */
2833   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_33", "ICNTL(33): compute determinant", "None", mumps->id.ICNTL(33), &mumps->id.ICNTL(33), NULL));
2834   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_35", "ICNTL(35): activates Block Low Rank (BLR) based factorization", "None", mumps->id.ICNTL(35), &mumps->id.ICNTL(35), NULL));
2835   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_36", "ICNTL(36): choice of BLR factorization variant", "None", mumps->id.ICNTL(36), &mumps->id.ICNTL(36), NULL));
2836   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_37", "ICNTL(37): compression of the contribution blocks (CB)", "None", mumps->id.ICNTL(37), &mumps->id.ICNTL(37), NULL));
2837   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_38", "ICNTL(38): estimated compression rate of LU factors with BLR", "None", mumps->id.ICNTL(38), &mumps->id.ICNTL(38), NULL));
2838   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_48", "ICNTL(48): multithreading with tree parallelism", "None", mumps->id.ICNTL(48), &mumps->id.ICNTL(48), NULL));
2839   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_56", "ICNTL(56): postponing and rank-revealing factorization", "None", mumps->id.ICNTL(56), &mumps->id.ICNTL(56), NULL));
2840   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_58", "ICNTL(58): defines options for symbolic factorization", "None", mumps->id.ICNTL(58), &mumps->id.ICNTL(58), NULL));
2841 
2842   PetscCall(PetscOptionsReal("-mat_mumps_cntl_1", "CNTL(1): relative pivoting threshold", "None", (PetscReal)ID_CNTL_GET(mumps->id, 1), &cntl, &flg));
2843   if (flg) ID_CNTL_SET(mumps->id, 1, cntl);
2844   PetscCall(PetscOptionsReal("-mat_mumps_cntl_2", "CNTL(2): stopping criterion of refinement", "None", (PetscReal)ID_CNTL_GET(mumps->id, 2), &cntl, &flg));
2845   if (flg) ID_CNTL_SET(mumps->id, 2, cntl);
2846   PetscCall(PetscOptionsReal("-mat_mumps_cntl_3", "CNTL(3): absolute pivoting threshold", "None", (PetscReal)ID_CNTL_GET(mumps->id, 3), &cntl, &flg));
2847   if (flg) ID_CNTL_SET(mumps->id, 3, cntl);
2848   PetscCall(PetscOptionsReal("-mat_mumps_cntl_4", "CNTL(4): value for static pivoting", "None", (PetscReal)ID_CNTL_GET(mumps->id, 4), &cntl, &flg));
2849   if (flg) ID_CNTL_SET(mumps->id, 4, cntl);
2850   PetscCall(PetscOptionsReal("-mat_mumps_cntl_5", "CNTL(5): fixation for null pivots", "None", (PetscReal)ID_CNTL_GET(mumps->id, 5), &cntl, &flg));
2851   if (flg) ID_CNTL_SET(mumps->id, 5, cntl);
2852   PetscCall(PetscOptionsReal("-mat_mumps_cntl_7", "CNTL(7): dropping parameter used during BLR", "None", (PetscReal)ID_CNTL_GET(mumps->id, 7), &cntl, &flg));
2853   if (flg) ID_CNTL_SET(mumps->id, 7, cntl);
2854 
2855   PetscCall(PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL));
2856 
2857   PetscCall(PetscOptionsIntArray("-mat_mumps_view_info", "request INFO local to each processor", "", info, &ninfo, NULL));
2858   if (ninfo) {
2859     PetscCheck(ninfo <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "number of INFO %" PetscInt_FMT " must <= 80", ninfo);
2860     PetscCall(PetscMalloc1(ninfo, &mumps->info));
2861     mumps->ninfo = ninfo;
2862     for (i = 0; i < ninfo; i++) {
2863       PetscCheck(info[i] >= 0 && info[i] <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "index of INFO %" PetscInt_FMT " must between 1 and 80", ninfo);
2864       mumps->info[i] = info[i];
2865     }
2866   }
2867   PetscOptionsEnd();
2868   PetscFunctionReturn(PETSC_SUCCESS);
2869 }
2870 
2871 static PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F, Mat A, PETSC_UNUSED const MatFactorInfo *info, Mat_MUMPS *mumps)
2872 {
2873   PetscFunctionBegin;
2874   if (mumps->id.INFOG(1) < 0) {
2875     PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in analysis: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
2876     if (mumps->id.INFOG(1) == -6) {
2877       PetscCall(PetscInfo(F, "MUMPS error in analysis: matrix is singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2878       F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT;
2879     } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) {
2880       PetscCall(PetscInfo(F, "MUMPS error in analysis: problem with work array, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2881       F->factorerrortype = MAT_FACTOR_OUTMEMORY;
2882     } else {
2883       PetscCall(PetscInfo(F, "MUMPS error in analysis: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS "\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2884       F->factorerrortype = MAT_FACTOR_OTHER;
2885     }
2886   }
2887   if (!mumps->id.n) F->factorerrortype = MAT_FACTOR_NOERROR;
2888   PetscFunctionReturn(PETSC_SUCCESS);
2889 }
2890 
2891 static PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F, Mat A, IS r, PETSC_UNUSED IS c, const MatFactorInfo *info)
2892 {
2893   Mat_MUMPS     *mumps = (Mat_MUMPS *)F->data;
2894   Vec            b;
2895   const PetscInt M = A->rmap->N;
2896 
2897   PetscFunctionBegin;
2898   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
2899     /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */
2900     PetscFunctionReturn(PETSC_SUCCESS);
2901   }
2902 
2903   /* Set MUMPS options from the options database */
2904   PetscCall(MatSetFromOptions_MUMPS(F, A));
2905 
2906   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
2907   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps));
2908 
2909   /* analysis phase */
2910   mumps->id.job = JOB_FACTSYMBOLIC;
2911   PetscCall(PetscMUMPSIntCast(M, &mumps->id.n));
2912   switch (mumps->id.ICNTL(18)) {
2913   case 0: /* centralized assembled matrix input */
2914     if (!mumps->myid) {
2915       mumps->id.nnz = mumps->nnz;
2916       mumps->id.irn = mumps->irn;
2917       mumps->id.jcn = mumps->jcn;
2918       if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_len, &mumps->id.a));
2919       if (r && mumps->id.ICNTL(7) == 7) {
2920         mumps->id.ICNTL(7) = 1;
2921         if (!mumps->myid) {
2922           const PetscInt *idx;
2923           PetscInt        i;
2924 
2925           PetscCall(PetscMalloc1(M, &mumps->id.perm_in));
2926           PetscCall(ISGetIndices(r, &idx));
2927           for (i = 0; i < M; i++) PetscCall(PetscMUMPSIntCast(idx[i] + 1, &mumps->id.perm_in[i])); /* perm_in[]: start from 1, not 0! */
2928           PetscCall(ISRestoreIndices(r, &idx));
2929         }
2930       }
2931     }
2932     break;
2933   case 3: /* distributed assembled matrix input (size>1) */
2934     mumps->id.nnz_loc = mumps->nnz;
2935     mumps->id.irn_loc = mumps->irn;
2936     mumps->id.jcn_loc = mumps->jcn;
2937     if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_loc_len, &mumps->id.a_loc));
2938     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2939       PetscCall(MatCreateVecs(A, NULL, &b));
2940       PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq));
2941       PetscCall(VecDestroy(&b));
2942     }
2943     break;
2944   }
2945   PetscMUMPS_c(mumps);
2946   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps));
2947 
2948   F->ops->lufactornumeric   = MatFactorNumeric_MUMPS;
2949   F->ops->solve             = MatSolve_MUMPS;
2950   F->ops->solvetranspose    = MatSolveTranspose_MUMPS;
2951   F->ops->matsolve          = MatMatSolve_MUMPS;
2952   F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS;
2953   F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS;
2954 
2955   mumps->matstruc = SAME_NONZERO_PATTERN;
2956   PetscFunctionReturn(PETSC_SUCCESS);
2957 }
2958 
2959 /* Note the PETSc r and c permutations are ignored */
2960 static PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F, Mat A, PETSC_UNUSED IS r, PETSC_UNUSED IS c, const MatFactorInfo *info)
2961 {
2962   Mat_MUMPS     *mumps = (Mat_MUMPS *)F->data;
2963   Vec            b;
2964   const PetscInt M = A->rmap->N;
2965 
2966   PetscFunctionBegin;
2967   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
2968     /* F is assembled by a previous call of MatLUFactorSymbolic_BAIJMUMPS() */
2969     PetscFunctionReturn(PETSC_SUCCESS);
2970   }
2971 
2972   /* Set MUMPS options from the options database */
2973   PetscCall(MatSetFromOptions_MUMPS(F, A));
2974 
2975   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
2976   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps));
2977 
2978   /* analysis phase */
2979   mumps->id.job = JOB_FACTSYMBOLIC;
2980   PetscCall(PetscMUMPSIntCast(M, &mumps->id.n));
2981   switch (mumps->id.ICNTL(18)) {
2982   case 0: /* centralized assembled matrix input */
2983     if (!mumps->myid) {
2984       mumps->id.nnz = mumps->nnz;
2985       mumps->id.irn = mumps->irn;
2986       mumps->id.jcn = mumps->jcn;
2987       if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_len, &mumps->id.a));
2988     }
2989     break;
2990   case 3: /* distributed assembled matrix input (size>1) */
2991     mumps->id.nnz_loc = mumps->nnz;
2992     mumps->id.irn_loc = mumps->irn;
2993     mumps->id.jcn_loc = mumps->jcn;
2994     if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_loc_len, &mumps->id.a_loc));
2995     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2996       PetscCall(MatCreateVecs(A, NULL, &b));
2997       PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq));
2998       PetscCall(VecDestroy(&b));
2999     }
3000     break;
3001   }
3002   PetscMUMPS_c(mumps);
3003   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps));
3004 
3005   F->ops->lufactornumeric   = MatFactorNumeric_MUMPS;
3006   F->ops->solve             = MatSolve_MUMPS;
3007   F->ops->solvetranspose    = MatSolveTranspose_MUMPS;
3008   F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS;
3009 
3010   mumps->matstruc = SAME_NONZERO_PATTERN;
3011   PetscFunctionReturn(PETSC_SUCCESS);
3012 }
3013 
3014 /* Note the PETSc r permutation and factor info are ignored */
3015 static PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F, Mat A, PETSC_UNUSED IS r, const MatFactorInfo *info)
3016 {
3017   Mat_MUMPS     *mumps = (Mat_MUMPS *)F->data;
3018   Vec            b;
3019   const PetscInt M = A->rmap->N;
3020 
3021   PetscFunctionBegin;
3022   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
3023     /* F is assembled by a previous call of MatCholeskyFactorSymbolic_MUMPS() */
3024     PetscFunctionReturn(PETSC_SUCCESS);
3025   }
3026 
3027   /* Set MUMPS options from the options database */
3028   PetscCall(MatSetFromOptions_MUMPS(F, A));
3029 
3030   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
3031   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps));
3032 
3033   /* analysis phase */
3034   mumps->id.job = JOB_FACTSYMBOLIC;
3035   PetscCall(PetscMUMPSIntCast(M, &mumps->id.n));
3036   switch (mumps->id.ICNTL(18)) {
3037   case 0: /* centralized assembled matrix input */
3038     if (!mumps->myid) {
3039       mumps->id.nnz = mumps->nnz;
3040       mumps->id.irn = mumps->irn;
3041       mumps->id.jcn = mumps->jcn;
3042       if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_len, &mumps->id.a));
3043     }
3044     break;
3045   case 3: /* distributed assembled matrix input (size>1) */
3046     mumps->id.nnz_loc = mumps->nnz;
3047     mumps->id.irn_loc = mumps->irn;
3048     mumps->id.jcn_loc = mumps->jcn;
3049     if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_loc_len, &mumps->id.a_loc));
3050     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
3051       PetscCall(MatCreateVecs(A, NULL, &b));
3052       PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq));
3053       PetscCall(VecDestroy(&b));
3054     }
3055     break;
3056   }
3057   PetscMUMPS_c(mumps);
3058   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps));
3059 
3060   F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS;
3061   F->ops->solve                 = MatSolve_MUMPS;
3062   F->ops->solvetranspose        = MatSolve_MUMPS;
3063   F->ops->matsolve              = MatMatSolve_MUMPS;
3064   F->ops->mattransposesolve     = MatMatTransposeSolve_MUMPS;
3065   F->ops->matsolvetranspose     = MatMatSolveTranspose_MUMPS;
3066 #if defined(PETSC_USE_COMPLEX)
3067   F->ops->getinertia = NULL;
3068 #else
3069   F->ops->getinertia = MatGetInertia_SBAIJMUMPS;
3070 #endif
3071 
3072   mumps->matstruc = SAME_NONZERO_PATTERN;
3073   PetscFunctionReturn(PETSC_SUCCESS);
3074 }
3075 
3076 static PetscErrorCode MatView_MUMPS(Mat A, PetscViewer viewer)
3077 {
3078   PetscBool         isascii;
3079   PetscViewerFormat format;
3080   Mat_MUMPS        *mumps = (Mat_MUMPS *)A->data;
3081 
3082   PetscFunctionBegin;
3083   /* check if matrix is mumps type */
3084   if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(PETSC_SUCCESS);
3085 
3086   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
3087   if (isascii) {
3088     PetscCall(PetscViewerGetFormat(viewer, &format));
3089     if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
3090       PetscCall(PetscViewerASCIIPrintf(viewer, "MUMPS run parameters:\n"));
3091       if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
3092         PetscCall(PetscViewerASCIIPrintf(viewer, "  SYM (matrix type):                   %d\n", mumps->id.sym));
3093         PetscCall(PetscViewerASCIIPrintf(viewer, "  PAR (host participation):            %d\n", mumps->id.par));
3094         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(1) (output for error):         %d\n", mumps->id.ICNTL(1)));
3095         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(2) (output of diagnostic msg): %d\n", mumps->id.ICNTL(2)));
3096         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(3) (output for global info):   %d\n", mumps->id.ICNTL(3)));
3097         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(4) (level of printing):        %d\n", mumps->id.ICNTL(4)));
3098         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(5) (input mat struct):         %d\n", mumps->id.ICNTL(5)));
3099         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(6) (matrix prescaling):        %d\n", mumps->id.ICNTL(6)));
3100         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(7) (sequential matrix ordering):%d\n", mumps->id.ICNTL(7)));
3101         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(8) (scaling strategy):         %d\n", mumps->id.ICNTL(8)));
3102         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(10) (max num of refinements):  %d\n", mumps->id.ICNTL(10)));
3103         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(11) (error analysis):          %d\n", mumps->id.ICNTL(11)));
3104         if (mumps->id.ICNTL(11) > 0) {
3105           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(4) (inf norm of input mat):        %g\n", (double)ID_RINFOG_GET(mumps->id, 4)));
3106           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(5) (inf norm of solution):         %g\n", (double)ID_RINFOG_GET(mumps->id, 5)));
3107           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(6) (inf norm of residual):         %g\n", (double)ID_RINFOG_GET(mumps->id, 6)));
3108           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(7),RINFOG(8) (backward error est): %g, %g\n", (double)ID_RINFOG_GET(mumps->id, 7), (double)ID_RINFOG_GET(mumps->id, 8)));
3109           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(9) (error estimate):               %g\n", (double)ID_RINFOG_GET(mumps->id, 9)));
3110           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n", (double)ID_RINFOG_GET(mumps->id, 10), (double)ID_RINFOG_GET(mumps->id, 11)));
3111         }
3112         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(12) (efficiency control):                         %d\n", mumps->id.ICNTL(12)));
3113         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(13) (sequential factorization of the root node):  %d\n", mumps->id.ICNTL(13)));
3114         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(14) (percentage of estimated workspace increase): %d\n", mumps->id.ICNTL(14)));
3115         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(15) (compression of the input matrix):            %d\n", mumps->id.ICNTL(15)));
3116         /* ICNTL(15-17) not used */
3117         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(18) (input mat struct):                           %d\n", mumps->id.ICNTL(18)));
3118         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(19) (Schur complement info):                      %d\n", mumps->id.ICNTL(19)));
3119         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(20) (RHS sparse pattern):                         %d\n", mumps->id.ICNTL(20)));
3120         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(21) (solution struct):                            %d\n", mumps->id.ICNTL(21)));
3121         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(22) (in-core/out-of-core facility):               %d\n", mumps->id.ICNTL(22)));
3122         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(23) (max size of memory can be allocated locally):%d\n", mumps->id.ICNTL(23)));
3123 
3124         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(24) (detection of null pivot rows):               %d\n", mumps->id.ICNTL(24)));
3125         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(25) (computation of a null space basis):          %d\n", mumps->id.ICNTL(25)));
3126         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(26) (Schur options for RHS or solution):          %d\n", mumps->id.ICNTL(26)));
3127         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(27) (blocking size for multiple RHS):             %d\n", mumps->id.ICNTL(27)));
3128         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(28) (use parallel or sequential ordering):        %d\n", mumps->id.ICNTL(28)));
3129         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(29) (parallel ordering):                          %d\n", mumps->id.ICNTL(29)));
3130 
3131         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(30) (user-specified set of entries in inv(A)):    %d\n", mumps->id.ICNTL(30)));
3132         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(31) (factors is discarded in the solve phase):    %d\n", mumps->id.ICNTL(31)));
3133         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(33) (compute determinant):                        %d\n", mumps->id.ICNTL(33)));
3134         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(35) (activate BLR based factorization):           %d\n", mumps->id.ICNTL(35)));
3135         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(36) (choice of BLR factorization variant):        %d\n", mumps->id.ICNTL(36)));
3136         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(37) (compression of the contribution blocks):     %d\n", mumps->id.ICNTL(37)));
3137         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(38) (estimated compression rate of LU factors):   %d\n", mumps->id.ICNTL(38)));
3138         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(48) (multithreading with tree parallelism):       %d\n", mumps->id.ICNTL(48)));
3139         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(56) (postponing and rank-revealing factorization):%d\n", mumps->id.ICNTL(56)));
3140         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(58) (options for symbolic factorization):         %d\n", mumps->id.ICNTL(58)));
3141 
3142         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(1) (relative pivoting threshold):      %g\n", (double)ID_CNTL_GET(mumps->id, 1)));
3143         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(2) (stopping criterion of refinement): %g\n", (double)ID_CNTL_GET(mumps->id, 2)));
3144         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(3) (absolute pivoting threshold):      %g\n", (double)ID_CNTL_GET(mumps->id, 3)));
3145         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(4) (value of static pivoting):         %g\n", (double)ID_CNTL_GET(mumps->id, 4)));
3146         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(5) (fixation for null pivots):         %g\n", (double)ID_CNTL_GET(mumps->id, 5)));
3147         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(7) (dropping parameter for BLR):       %g\n", (double)ID_CNTL_GET(mumps->id, 7)));
3148 
3149         /* information local to each processor */
3150         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(1) (local estimated flops for the elimination after analysis):\n"));
3151         PetscCall(PetscViewerASCIIPushSynchronized(viewer));
3152         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %g\n", mumps->myid, (double)ID_RINFO_GET(mumps->id, 1)));
3153         PetscCall(PetscViewerFlush(viewer));
3154         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(2) (local estimated flops for the assembly after factorization):\n"));
3155         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %g\n", mumps->myid, (double)ID_RINFO_GET(mumps->id, 2)));
3156         PetscCall(PetscViewerFlush(viewer));
3157         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(3) (local estimated flops for the elimination after factorization):\n"));
3158         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %g\n", mumps->myid, (double)ID_RINFO_GET(mumps->id, 3)));
3159         PetscCall(PetscViewerFlush(viewer));
3160 
3161         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization):\n"));
3162         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(15)));
3163         PetscCall(PetscViewerFlush(viewer));
3164 
3165         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization):\n"));
3166         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(16)));
3167         PetscCall(PetscViewerFlush(viewer));
3168 
3169         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(23) (num of pivots eliminated on this processor after factorization):\n"));
3170         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(23)));
3171         PetscCall(PetscViewerFlush(viewer));
3172 
3173         if (mumps->ninfo && mumps->ninfo <= 80) {
3174           PetscInt i;
3175           for (i = 0; i < mumps->ninfo; i++) {
3176             PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(%" PetscInt_FMT "):\n", mumps->info[i]));
3177             PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(mumps->info[i])));
3178             PetscCall(PetscViewerFlush(viewer));
3179           }
3180         }
3181         PetscCall(PetscViewerASCIIPopSynchronized(viewer));
3182       } else PetscCall(PetscViewerASCIIPrintf(viewer, "  Use -%sksp_view ::ascii_info_detail to display information for all processes\n", ((PetscObject)A)->prefix ? ((PetscObject)A)->prefix : ""));
3183 
3184       if (mumps->myid == 0) { /* information from the host */
3185         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFOG(1) (global estimated flops for the elimination after analysis): %g\n", (double)ID_RINFOG_GET(mumps->id, 1)));
3186         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFOG(2) (global estimated flops for the assembly after factorization): %g\n", (double)ID_RINFOG_GET(mumps->id, 2)));
3187         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFOG(3) (global estimated flops for the elimination after factorization): %g\n", (double)ID_RINFOG_GET(mumps->id, 3)));
3188         PetscCall(PetscViewerASCIIPrintf(viewer, "  (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n", (double)ID_RINFOG_GET(mumps->id, 12), (double)ID_RINFOG_GET(mumps->id, 13), mumps->id.INFOG(34)));
3189 
3190         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(3)));
3191         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(4)));
3192         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(5) (estimated maximum front size in the complete tree): %d\n", mumps->id.INFOG(5)));
3193         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(6) (number of nodes in the complete tree): %d\n", mumps->id.INFOG(6)));
3194         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(7) (ordering option effectively used after analysis): %d\n", mumps->id.INFOG(7)));
3195         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n", mumps->id.INFOG(8)));
3196         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n", mumps->id.INFOG(9)));
3197         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(10) (total integer space store the matrix factors after factorization): %d\n", mumps->id.INFOG(10)));
3198         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(11) (order of largest frontal matrix after factorization): %d\n", mumps->id.INFOG(11)));
3199         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(12) (number of off-diagonal pivots): %d\n", mumps->id.INFOG(12)));
3200         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(13) (number of delayed pivots after factorization): %d\n", mumps->id.INFOG(13)));
3201         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(14) (number of memory compress after factorization): %d\n", mumps->id.INFOG(14)));
3202         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(15) (number of steps of iterative refinement after solution): %d\n", mumps->id.INFOG(15)));
3203         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n", mumps->id.INFOG(16)));
3204         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n", mumps->id.INFOG(17)));
3205         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n", mumps->id.INFOG(18)));
3206         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n", mumps->id.INFOG(19)));
3207         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(20) (estimated number of entries in the factors): %d\n", mumps->id.INFOG(20)));
3208         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n", mumps->id.INFOG(21)));
3209         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n", mumps->id.INFOG(22)));
3210         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n", mumps->id.INFOG(23)));
3211         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n", mumps->id.INFOG(24)));
3212         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n", mumps->id.INFOG(25)));
3213         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(28) (after factorization: number of null pivots encountered): %d\n", mumps->id.INFOG(28)));
3214         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n", mumps->id.INFOG(29)));
3215         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n", mumps->id.INFOG(30), mumps->id.INFOG(31)));
3216         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(32) (after analysis: type of analysis done): %d\n", mumps->id.INFOG(32)));
3217         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(33) (value used for ICNTL(8)): %d\n", mumps->id.INFOG(33)));
3218         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(34) (exponent of the determinant if determinant is requested): %d\n", mumps->id.INFOG(34)));
3219         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n", mumps->id.INFOG(35)));
3220         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(36)));
3221         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n", mumps->id.INFOG(37)));
3222         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(38)));
3223         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n", mumps->id.INFOG(39)));
3224       }
3225     }
3226   }
3227   PetscFunctionReturn(PETSC_SUCCESS);
3228 }
3229 
3230 static PetscErrorCode MatGetInfo_MUMPS(Mat A, PETSC_UNUSED MatInfoType flag, MatInfo *info)
3231 {
3232   Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;
3233 
3234   PetscFunctionBegin;
3235   info->block_size        = 1.0;
3236   info->nz_allocated      = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20);
3237   info->nz_used           = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20);
3238   info->nz_unneeded       = 0.0;
3239   info->assemblies        = 0.0;
3240   info->mallocs           = 0.0;
3241   info->memory            = 0.0;
3242   info->fill_ratio_given  = 0;
3243   info->fill_ratio_needed = 0;
3244   info->factor_mallocs    = 0;
3245   PetscFunctionReturn(PETSC_SUCCESS);
3246 }
3247 
3248 static PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is)
3249 {
3250   Mat_MUMPS         *mumps = (Mat_MUMPS *)F->data;
3251   const PetscScalar *arr;
3252   const PetscInt    *idxs;
3253   PetscInt           size, i;
3254 
3255   PetscFunctionBegin;
3256   PetscCall(ISGetLocalSize(is, &size));
3257   /* Schur complement matrix */
3258   PetscCall(MatDestroy(&F->schur));
3259   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size, size, NULL, &F->schur));
3260   PetscCall(MatDenseGetArrayRead(F->schur, &arr));
3261   // don't allocate mumps->id.schur[] now as its precision is yet to know
3262   PetscCall(PetscMUMPSIntCast(size, &mumps->id.size_schur));
3263   PetscCall(PetscMUMPSIntCast(size, &mumps->id.schur_lld));
3264   PetscCall(MatDenseRestoreArrayRead(F->schur, &arr));
3265   if (mumps->sym == 1) PetscCall(MatSetOption(F->schur, MAT_SPD, PETSC_TRUE));
3266 
3267   /* MUMPS expects Fortran style indices */
3268   PetscCall(PetscFree(mumps->id.listvar_schur));
3269   PetscCall(PetscMalloc1(size, &mumps->id.listvar_schur));
3270   PetscCall(ISGetIndices(is, &idxs));
3271   for (i = 0; i < size; i++) PetscCall(PetscMUMPSIntCast(idxs[i] + 1, &mumps->id.listvar_schur[i]));
3272   PetscCall(ISRestoreIndices(is, &idxs));
3273   /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */
3274   if (mumps->id.icntl) mumps->id.ICNTL(26) = -1;
3275   else mumps->ICNTL26 = -1;
3276   PetscFunctionReturn(PETSC_SUCCESS);
3277 }
3278 
3279 static PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F, Mat *S)
3280 {
3281   Mat          St;
3282   Mat_MUMPS   *mumps = (Mat_MUMPS *)F->data;
3283   PetscScalar *array;
3284   PetscInt     i, j, N = mumps->id.size_schur;
3285 
3286   PetscFunctionBegin;
3287   PetscCheck(mumps->id.ICNTL(19), PetscObjectComm((PetscObject)F), PETSC_ERR_ORDER, "Schur complement mode not selected! Call MatFactorSetSchurIS() to enable it");
3288   PetscCall(MatCreate(PETSC_COMM_SELF, &St));
3289   PetscCall(MatSetSizes(St, PETSC_DECIDE, PETSC_DECIDE, mumps->id.size_schur, mumps->id.size_schur));
3290   PetscCall(MatSetType(St, MATDENSE));
3291   PetscCall(MatSetUp(St));
3292   PetscCall(MatDenseGetArray(St, &array));
3293   if (!mumps->sym) {                /* MUMPS always return a full matrix */
3294     if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
3295       for (i = 0; i < N; i++) {
3296         for (j = 0; j < N; j++) array[j * N + i] = ID_FIELD_GET(mumps->id, schur, i * N + j);
3297       }
3298     } else { /* stored by columns */
3299       PetscCall(MatMumpsCastMumpsScalarArray(N * N, mumps->id.precision, mumps->id.schur, array));
3300     }
3301   } else {                          /* either full or lower-triangular (not packed) */
3302     if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */
3303       for (i = 0; i < N; i++) {
3304         for (j = i; j < N; j++) array[i * N + j] = array[j * N + i] = ID_FIELD_GET(mumps->id, schur, i * N + j);
3305       }
3306     } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */
3307       PetscCall(MatMumpsCastMumpsScalarArray(N * N, mumps->id.precision, mumps->id.schur, array));
3308     } else { /* ICNTL(19) == 1 lower triangular stored by rows */
3309       for (i = 0; i < N; i++) {
3310         for (j = 0; j < i + 1; j++) array[i * N + j] = array[j * N + i] = ID_FIELD_GET(mumps->id, schur, i * N + j);
3311       }
3312     }
3313   }
3314   PetscCall(MatDenseRestoreArray(St, &array));
3315   *S = St;
3316   PetscFunctionReturn(PETSC_SUCCESS);
3317 }
3318 
3319 static PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt ival)
3320 {
3321   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3322 
3323   PetscFunctionBegin;
3324   if (mumps->id.job == JOB_NULL) {                                            /* need to cache icntl and ival since PetscMUMPS_c() has never been called */
3325     PetscMUMPSInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; /* number of already cached ICNTL */
3326     for (i = 0; i < nICNTL_pre; ++i)
3327       if (mumps->ICNTL_pre[1 + 2 * i] == icntl) break; /* is this ICNTL already cached? */
3328     if (i == nICNTL_pre) {                             /* not already cached */
3329       if (i > 0) PetscCall(PetscRealloc(sizeof(PetscMUMPSInt) * (2 * nICNTL_pre + 3), &mumps->ICNTL_pre));
3330       else PetscCall(PetscCalloc(sizeof(PetscMUMPSInt) * 3, &mumps->ICNTL_pre));
3331       mumps->ICNTL_pre[0]++;
3332     }
3333     mumps->ICNTL_pre[1 + 2 * i] = (PetscMUMPSInt)icntl;
3334     PetscCall(PetscMUMPSIntCast(ival, mumps->ICNTL_pre + 2 + 2 * i));
3335   } else PetscCall(PetscMUMPSIntCast(ival, &mumps->id.ICNTL(icntl)));
3336   PetscFunctionReturn(PETSC_SUCCESS);
3337 }
3338 
3339 static PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt *ival)
3340 {
3341   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3342 
3343   PetscFunctionBegin;
3344   if (mumps->id.job == JOB_NULL) {
3345     PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0;
3346     *ival = 0;
3347     for (i = 0; i < nICNTL_pre; ++i) {
3348       if (mumps->ICNTL_pre[1 + 2 * i] == icntl) *ival = mumps->ICNTL_pre[2 + 2 * i];
3349     }
3350   } else *ival = mumps->id.ICNTL(icntl);
3351   PetscFunctionReturn(PETSC_SUCCESS);
3352 }
3353 
3354 /*@
3355   MatMumpsSetIcntl - Set MUMPS parameter ICNTL() <https://mumps-solver.org/index.php?page=doc>
3356 
3357   Logically Collective
3358 
3359   Input Parameters:
3360 + F     - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3361 . icntl - index of MUMPS parameter array `ICNTL()`
3362 - ival  - value of MUMPS `ICNTL(icntl)`
3363 
3364   Options Database Key:
3365 . -mat_mumps_icntl_<icntl> <ival> - change the option numbered `icntl` to `ival`
3366 
3367   Level: beginner
3368 
3369   Note:
3370   Ignored if MUMPS is not installed or `F` is not a MUMPS matrix
3371 
3372 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
3373 @*/
3374 PetscErrorCode MatMumpsSetIcntl(Mat F, PetscInt icntl, PetscInt ival)
3375 {
3376   PetscFunctionBegin;
3377   PetscValidType(F, 1);
3378   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3379   PetscValidLogicalCollectiveInt(F, icntl, 2);
3380   PetscValidLogicalCollectiveInt(F, ival, 3);
3381   PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 48 || icntl == 56 || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl);
3382   PetscTryMethod(F, "MatMumpsSetIcntl_C", (Mat, PetscInt, PetscInt), (F, icntl, ival));
3383   PetscFunctionReturn(PETSC_SUCCESS);
3384 }
3385 
3386 /*@
3387   MatMumpsGetIcntl - Get MUMPS parameter ICNTL() <https://mumps-solver.org/index.php?page=doc>
3388 
3389   Logically Collective
3390 
3391   Input Parameters:
3392 + F     - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3393 - icntl - index of MUMPS parameter array ICNTL()
3394 
3395   Output Parameter:
3396 . ival - value of MUMPS ICNTL(icntl)
3397 
3398   Level: beginner
3399 
3400 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
3401 @*/
3402 PetscErrorCode MatMumpsGetIcntl(Mat F, PetscInt icntl, PetscInt *ival)
3403 {
3404   PetscFunctionBegin;
3405   PetscValidType(F, 1);
3406   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3407   PetscValidLogicalCollectiveInt(F, icntl, 2);
3408   PetscAssertPointer(ival, 3);
3409   PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 48 || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl);
3410   PetscUseMethod(F, "MatMumpsGetIcntl_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival));
3411   PetscFunctionReturn(PETSC_SUCCESS);
3412 }
3413 
3414 static PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal val)
3415 {
3416   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3417 
3418   PetscFunctionBegin;
3419   if (mumps->id.job == JOB_NULL) {
3420     PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0;
3421     for (i = 0; i < nCNTL_pre; ++i)
3422       if (mumps->CNTL_pre[1 + 2 * i] == icntl) break;
3423     if (i == nCNTL_pre) {
3424       if (i > 0) PetscCall(PetscRealloc(sizeof(PetscReal) * (2 * nCNTL_pre + 3), &mumps->CNTL_pre));
3425       else PetscCall(PetscCalloc(sizeof(PetscReal) * 3, &mumps->CNTL_pre));
3426       mumps->CNTL_pre[0]++;
3427     }
3428     mumps->CNTL_pre[1 + 2 * i] = icntl;
3429     mumps->CNTL_pre[2 + 2 * i] = val;
3430   } else ID_CNTL_SET(mumps->id, icntl, val);
3431   PetscFunctionReturn(PETSC_SUCCESS);
3432 }
3433 
3434 static PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal *val)
3435 {
3436   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3437 
3438   PetscFunctionBegin;
3439   if (mumps->id.job == JOB_NULL) {
3440     PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0;
3441     *val = 0.0;
3442     for (i = 0; i < nCNTL_pre; ++i) {
3443       if (mumps->CNTL_pre[1 + 2 * i] == icntl) *val = mumps->CNTL_pre[2 + 2 * i];
3444     }
3445   } else *val = ID_CNTL_GET(mumps->id, icntl);
3446   PetscFunctionReturn(PETSC_SUCCESS);
3447 }
3448 
3449 /*@
3450   MatMumpsSetCntl - Set MUMPS parameter CNTL() <https://mumps-solver.org/index.php?page=doc>
3451 
3452   Logically Collective
3453 
3454   Input Parameters:
3455 + F     - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3456 . icntl - index of MUMPS parameter array `CNTL()`
3457 - val   - value of MUMPS `CNTL(icntl)`
3458 
3459   Options Database Key:
3460 . -mat_mumps_cntl_<icntl> <val> - change the option numbered icntl to ival
3461 
3462   Level: beginner
3463 
3464   Note:
3465   Ignored if MUMPS is not installed or `F` is not a MUMPS matrix
3466 
3467 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
3468 @*/
3469 PetscErrorCode MatMumpsSetCntl(Mat F, PetscInt icntl, PetscReal val)
3470 {
3471   PetscFunctionBegin;
3472   PetscValidType(F, 1);
3473   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3474   PetscValidLogicalCollectiveInt(F, icntl, 2);
3475   PetscValidLogicalCollectiveReal(F, val, 3);
3476   PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl);
3477   PetscTryMethod(F, "MatMumpsSetCntl_C", (Mat, PetscInt, PetscReal), (F, icntl, val));
3478   PetscFunctionReturn(PETSC_SUCCESS);
3479 }
3480 
3481 /*@
3482   MatMumpsGetCntl - Get MUMPS parameter CNTL() <https://mumps-solver.org/index.php?page=doc>
3483 
3484   Logically Collective
3485 
3486   Input Parameters:
3487 + F     - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3488 - icntl - index of MUMPS parameter array CNTL()
3489 
3490   Output Parameter:
3491 . val - value of MUMPS CNTL(icntl)
3492 
3493   Level: beginner
3494 
3495 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
3496 @*/
3497 PetscErrorCode MatMumpsGetCntl(Mat F, PetscInt icntl, PetscReal *val)
3498 {
3499   PetscFunctionBegin;
3500   PetscValidType(F, 1);
3501   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3502   PetscValidLogicalCollectiveInt(F, icntl, 2);
3503   PetscAssertPointer(val, 3);
3504   PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl);
3505   PetscUseMethod(F, "MatMumpsGetCntl_C", (Mat, PetscInt, PetscReal *), (F, icntl, val));
3506   PetscFunctionReturn(PETSC_SUCCESS);
3507 }
3508 
3509 static PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F, PetscInt icntl, PetscInt *info)
3510 {
3511   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3512 
3513   PetscFunctionBegin;
3514   *info = mumps->id.INFO(icntl);
3515   PetscFunctionReturn(PETSC_SUCCESS);
3516 }
3517 
3518 static PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F, PetscInt icntl, PetscInt *infog)
3519 {
3520   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3521 
3522   PetscFunctionBegin;
3523   *infog = mumps->id.INFOG(icntl);
3524   PetscFunctionReturn(PETSC_SUCCESS);
3525 }
3526 
3527 static PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfo)
3528 {
3529   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3530 
3531   PetscFunctionBegin;
3532   *rinfo = ID_RINFO_GET(mumps->id, icntl);
3533   PetscFunctionReturn(PETSC_SUCCESS);
3534 }
3535 
3536 static PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfog)
3537 {
3538   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3539 
3540   PetscFunctionBegin;
3541   *rinfog = ID_RINFOG_GET(mumps->id, icntl);
3542   PetscFunctionReturn(PETSC_SUCCESS);
3543 }
3544 
3545 static PetscErrorCode MatMumpsGetNullPivots_MUMPS(Mat F, PetscInt *size, PetscInt **array)
3546 {
3547   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3548 
3549   PetscFunctionBegin;
3550   PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection");
3551   *size  = 0;
3552   *array = NULL;
3553   if (!mumps->myid) {
3554     *size = mumps->id.INFOG(28);
3555     PetscCall(PetscMalloc1(*size, array));
3556     for (int i = 0; i < *size; i++) (*array)[i] = mumps->id.pivnul_list[i] - 1;
3557   }
3558   PetscFunctionReturn(PETSC_SUCCESS);
3559 }
3560 
3561 static PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F, Mat spRHS)
3562 {
3563   Mat          Bt = NULL, Btseq = NULL;
3564   PetscBool    flg;
3565   Mat_MUMPS   *mumps = (Mat_MUMPS *)F->data;
3566   PetscScalar *aa;
3567   PetscInt     spnr, *ia, *ja, M, nrhs;
3568 
3569   PetscFunctionBegin;
3570   PetscAssertPointer(spRHS, 2);
3571   PetscCall(PetscObjectTypeCompare((PetscObject)spRHS, MATTRANSPOSEVIRTUAL, &flg));
3572   PetscCheck(flg, PetscObjectComm((PetscObject)spRHS), PETSC_ERR_ARG_WRONG, "Matrix spRHS must be type MATTRANSPOSEVIRTUAL matrix");
3573   PetscCall(MatShellGetScalingShifts(spRHS, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
3574   PetscCall(MatTransposeGetMat(spRHS, &Bt));
3575 
3576   PetscCall(MatMumpsSetIcntl(F, 30, 1));
3577 
3578   if (mumps->petsc_size > 1) {
3579     Mat_MPIAIJ *b = (Mat_MPIAIJ *)Bt->data;
3580     Btseq         = b->A;
3581   } else {
3582     Btseq = Bt;
3583   }
3584 
3585   PetscCall(MatGetSize(spRHS, &M, &nrhs));
3586   mumps->id.nrhs = (PetscMUMPSInt)nrhs;
3587   PetscCall(PetscMUMPSIntCast(M, &mumps->id.lrhs));
3588   mumps->id.rhs = NULL;
3589 
3590   if (!mumps->myid) {
3591     PetscCall(MatSeqAIJGetArray(Btseq, &aa));
3592     PetscCall(MatGetRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
3593     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
3594     PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs));
3595     PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, ((Mat_SeqAIJ *)Btseq->data)->nz, aa, mumps->id.precision, &mumps->id.rhs_sparse_len, &mumps->id.rhs_sparse));
3596   } else {
3597     mumps->id.irhs_ptr    = NULL;
3598     mumps->id.irhs_sparse = NULL;
3599     mumps->id.nz_rhs      = 0;
3600     if (mumps->id.rhs_sparse_len) {
3601       PetscCall(PetscFree(mumps->id.rhs_sparse));
3602       mumps->id.rhs_sparse_len = 0;
3603     }
3604   }
3605   mumps->id.ICNTL(20) = 1; /* rhs is sparse */
3606   mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */
3607 
3608   /* solve phase */
3609   mumps->id.job = JOB_SOLVE;
3610   PetscMUMPS_c(mumps);
3611   PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2));
3612 
3613   if (!mumps->myid) {
3614     PetscCall(MatSeqAIJRestoreArray(Btseq, &aa));
3615     PetscCall(MatRestoreRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
3616     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
3617   }
3618   PetscFunctionReturn(PETSC_SUCCESS);
3619 }
3620 
3621 /*@
3622   MatMumpsGetInverse - Get user-specified set of entries in inverse of `A` <https://mumps-solver.org/index.php?page=doc>
3623 
3624   Logically Collective
3625 
3626   Input Parameter:
3627 . F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3628 
3629   Output Parameter:
3630 . spRHS - sequential sparse matrix in `MATTRANSPOSEVIRTUAL` format with requested entries of inverse of `A`
3631 
3632   Level: beginner
3633 
3634 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()`
3635 @*/
3636 PetscErrorCode MatMumpsGetInverse(Mat F, Mat spRHS)
3637 {
3638   PetscFunctionBegin;
3639   PetscValidType(F, 1);
3640   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3641   PetscUseMethod(F, "MatMumpsGetInverse_C", (Mat, Mat), (F, spRHS));
3642   PetscFunctionReturn(PETSC_SUCCESS);
3643 }
3644 
3645 static PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F, Mat spRHST)
3646 {
3647   Mat spRHS;
3648 
3649   PetscFunctionBegin;
3650   PetscCall(MatCreateTranspose(spRHST, &spRHS));
3651   PetscCall(MatMumpsGetInverse_MUMPS(F, spRHS));
3652   PetscCall(MatDestroy(&spRHS));
3653   PetscFunctionReturn(PETSC_SUCCESS);
3654 }
3655 
3656 /*@
3657   MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix $A^T $ <https://mumps-solver.org/index.php?page=doc>
3658 
3659   Logically Collective
3660 
3661   Input Parameter:
3662 . F - the factored matrix of A obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3663 
3664   Output Parameter:
3665 . spRHST - sequential sparse matrix in `MATAIJ` format containing the requested entries of inverse of `A`^T
3666 
3667   Level: beginner
3668 
3669 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()`, `MatMumpsGetInverse()`
3670 @*/
3671 PetscErrorCode MatMumpsGetInverseTranspose(Mat F, Mat spRHST)
3672 {
3673   PetscBool flg;
3674 
3675   PetscFunctionBegin;
3676   PetscValidType(F, 1);
3677   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3678   PetscCall(PetscObjectTypeCompareAny((PetscObject)spRHST, &flg, MATSEQAIJ, MATMPIAIJ, NULL));
3679   PetscCheck(flg, PetscObjectComm((PetscObject)spRHST), PETSC_ERR_ARG_WRONG, "Matrix spRHST must be MATAIJ matrix");
3680   PetscUseMethod(F, "MatMumpsGetInverseTranspose_C", (Mat, Mat), (F, spRHST));
3681   PetscFunctionReturn(PETSC_SUCCESS);
3682 }
3683 
3684 static PetscErrorCode MatMumpsSetBlk_MUMPS(Mat F, PetscInt nblk, const PetscInt blkvar[], const PetscInt blkptr[])
3685 {
3686   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3687 
3688   PetscFunctionBegin;
3689   if (nblk) {
3690     PetscAssertPointer(blkptr, 4);
3691     PetscCall(PetscMUMPSIntCast(nblk, &mumps->id.nblk));
3692     PetscCall(PetscFree(mumps->id.blkptr));
3693     PetscCall(PetscMalloc1(nblk + 1, &mumps->id.blkptr));
3694     for (PetscInt i = 0; i < nblk + 1; ++i) PetscCall(PetscMUMPSIntCast(blkptr[i], mumps->id.blkptr + i));
3695     // mumps->id.icntl[] might have not been allocated, which is done in MatSetFromOptions_MUMPS(). So we don't assign ICNTL(15).
3696     // We use id.nblk and id.blkptr to know what values to set to ICNTL(15) in MatSetFromOptions_MUMPS().
3697     // mumps->id.ICNTL(15) = 1;
3698     if (blkvar) {
3699       PetscCall(PetscFree(mumps->id.blkvar));
3700       PetscCall(PetscMalloc1(F->rmap->N, &mumps->id.blkvar));
3701       for (PetscInt i = 0; i < F->rmap->N; ++i) PetscCall(PetscMUMPSIntCast(blkvar[i], mumps->id.blkvar + i));
3702     }
3703   } else {
3704     PetscCall(PetscFree(mumps->id.blkptr));
3705     PetscCall(PetscFree(mumps->id.blkvar));
3706     // mumps->id.ICNTL(15) = 0;
3707     mumps->id.nblk = 0;
3708   }
3709   PetscFunctionReturn(PETSC_SUCCESS);
3710 }
3711 
3712 /*@
3713   MatMumpsSetBlk - Set user-specified variable block sizes to be used with `-mat_mumps_icntl_15 1`
3714 
3715   Not collective, only relevant on the first process of the MPI communicator
3716 
3717   Input Parameters:
3718 + F      - the factored matrix of A obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3719 . nblk   - the number of blocks
3720 . blkvar - see MUMPS documentation, `blkvar(blkptr(iblk):blkptr(iblk+1)-1)`, (`iblk=1, nblk`) holds the variables associated to block `iblk`
3721 - blkptr - array starting at 1 and of size `nblk + 1` storing the prefix sum of all blocks
3722 
3723   Level: advanced
3724 
3725 .seealso: [](ch_matrices), `MATSOLVERMUMPS`, `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatSetVariableBlockSizes()`
3726 @*/
3727 PetscErrorCode MatMumpsSetBlk(Mat F, PetscInt nblk, const PetscInt blkvar[], const PetscInt blkptr[])
3728 {
3729   PetscFunctionBegin;
3730   PetscValidType(F, 1);
3731   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3732   PetscUseMethod(F, "MatMumpsSetBlk_C", (Mat, PetscInt, const PetscInt[], const PetscInt[]), (F, nblk, blkvar, blkptr));
3733   PetscFunctionReturn(PETSC_SUCCESS);
3734 }
3735 
3736 /*@
3737   MatMumpsGetInfo - Get MUMPS parameter INFO() <https://mumps-solver.org/index.php?page=doc>
3738 
3739   Logically Collective
3740 
3741   Input Parameters:
3742 + F     - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3743 - icntl - index of MUMPS parameter array INFO()
3744 
3745   Output Parameter:
3746 . ival - value of MUMPS INFO(icntl)
3747 
3748   Level: beginner
3749 
3750 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
3751 @*/
3752 PetscErrorCode MatMumpsGetInfo(Mat F, PetscInt icntl, PetscInt *ival)
3753 {
3754   PetscFunctionBegin;
3755   PetscValidType(F, 1);
3756   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3757   PetscAssertPointer(ival, 3);
3758   PetscUseMethod(F, "MatMumpsGetInfo_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival));
3759   PetscFunctionReturn(PETSC_SUCCESS);
3760 }
3761 
3762 /*@
3763   MatMumpsGetInfog - Get MUMPS parameter INFOG() <https://mumps-solver.org/index.php?page=doc>
3764 
3765   Logically Collective
3766 
3767   Input Parameters:
3768 + F     - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3769 - icntl - index of MUMPS parameter array INFOG()
3770 
3771   Output Parameter:
3772 . ival - value of MUMPS INFOG(icntl)
3773 
3774   Level: beginner
3775 
3776 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
3777 @*/
3778 PetscErrorCode MatMumpsGetInfog(Mat F, PetscInt icntl, PetscInt *ival)
3779 {
3780   PetscFunctionBegin;
3781   PetscValidType(F, 1);
3782   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3783   PetscAssertPointer(ival, 3);
3784   PetscUseMethod(F, "MatMumpsGetInfog_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival));
3785   PetscFunctionReturn(PETSC_SUCCESS);
3786 }
3787 
3788 /*@
3789   MatMumpsGetRinfo - Get MUMPS parameter RINFO() <https://mumps-solver.org/index.php?page=doc>
3790 
3791   Logically Collective
3792 
3793   Input Parameters:
3794 + F     - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3795 - icntl - index of MUMPS parameter array RINFO()
3796 
3797   Output Parameter:
3798 . val - value of MUMPS RINFO(icntl)
3799 
3800   Level: beginner
3801 
3802 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfog()`
3803 @*/
3804 PetscErrorCode MatMumpsGetRinfo(Mat F, PetscInt icntl, PetscReal *val)
3805 {
3806   PetscFunctionBegin;
3807   PetscValidType(F, 1);
3808   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3809   PetscAssertPointer(val, 3);
3810   PetscUseMethod(F, "MatMumpsGetRinfo_C", (Mat, PetscInt, PetscReal *), (F, icntl, val));
3811   PetscFunctionReturn(PETSC_SUCCESS);
3812 }
3813 
3814 /*@
3815   MatMumpsGetRinfog - Get MUMPS parameter RINFOG() <https://mumps-solver.org/index.php?page=doc>
3816 
3817   Logically Collective
3818 
3819   Input Parameters:
3820 + F     - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3821 - icntl - index of MUMPS parameter array RINFOG()
3822 
3823   Output Parameter:
3824 . val - value of MUMPS RINFOG(icntl)
3825 
3826   Level: beginner
3827 
3828 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`
3829 @*/
3830 PetscErrorCode MatMumpsGetRinfog(Mat F, PetscInt icntl, PetscReal *val)
3831 {
3832   PetscFunctionBegin;
3833   PetscValidType(F, 1);
3834   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3835   PetscAssertPointer(val, 3);
3836   PetscUseMethod(F, "MatMumpsGetRinfog_C", (Mat, PetscInt, PetscReal *), (F, icntl, val));
3837   PetscFunctionReturn(PETSC_SUCCESS);
3838 }
3839 
3840 /*@
3841   MatMumpsGetNullPivots - Get MUMPS parameter PIVNUL_LIST() <https://mumps-solver.org/index.php?page=doc>
3842 
3843   Logically Collective
3844 
3845   Input Parameter:
3846 . F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3847 
3848   Output Parameters:
3849 + size  - local size of the array. The size of the array is non-zero only on MPI rank 0
3850 - array - array of rows with null pivot, these rows follow 0-based indexing. The array gets allocated within the function and the user is responsible
3851           for freeing this array.
3852 
3853   Level: beginner
3854 
3855 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`
3856 @*/
3857 PetscErrorCode MatMumpsGetNullPivots(Mat F, PetscInt *size, PetscInt **array)
3858 {
3859   PetscFunctionBegin;
3860   PetscValidType(F, 1);
3861   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3862   PetscAssertPointer(size, 2);
3863   PetscAssertPointer(array, 3);
3864   PetscUseMethod(F, "MatMumpsGetNullPivots_C", (Mat, PetscInt *, PetscInt **), (F, size, array));
3865   PetscFunctionReturn(PETSC_SUCCESS);
3866 }
3867 
3868 /*MC
3869   MATSOLVERMUMPS -  A matrix type providing direct solvers (LU and Cholesky) for
3870   MPI distributed and sequential matrices via the external package MUMPS <https://mumps-solver.org/index.php?page=doc>
3871 
3872   Works with `MATAIJ` and `MATSBAIJ` matrices
3873 
3874   Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS
3875 
3876   Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode.
3877   See details below.
3878 
3879   Use `-pc_type cholesky` or `lu` `-pc_factor_mat_solver_type mumps` to use this direct solver
3880 
3881   Options Database Keys:
3882 +  -mat_mumps_icntl_1  - ICNTL(1): output stream for error messages
3883 .  -mat_mumps_icntl_2  - ICNTL(2): output stream for diagnostic printing, statistics, and warning
3884 .  -mat_mumps_icntl_3  - ICNTL(3): output stream for global information, collected on the host
3885 .  -mat_mumps_icntl_4  - ICNTL(4): level of printing (0 to 4)
3886 .  -mat_mumps_icntl_6  - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)
3887 .  -mat_mumps_icntl_7  - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto
3888                           Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only)
3889 .  -mat_mumps_icntl_8  - ICNTL(8): scaling strategy (-2 to 8 or 77)
3890 .  -mat_mumps_icntl_10 - ICNTL(10): max num of refinements
3891 .  -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view)
3892 .  -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)
3893 .  -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting
3894 .  -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space
3895 .  -mat_mumps_icntl_15 - ICNTL(15): compression of the input matrix resulting from a block format
3896 .  -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement
3897 .  -mat_mumps_icntl_20 - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS
3898 .  -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)
3899 .  -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor
3900 .  -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1)
3901 .  -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis
3902 .  -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix
3903 .  -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ICNTL(7) ordering, or 2 for parallel analysis and ICNTL(29) ordering
3904 .  -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis
3905 .  -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A)
3906 .  -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization
3907 .  -mat_mumps_icntl_33 - ICNTL(33): compute determinant
3908 .  -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature
3909 .  -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant
3910 .  -mat_mumps_icntl_37 - ICNTL(37): compression of the contribution blocks (CB)
3911 .  -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR
3912 .  -mat_mumps_icntl_48 - ICNTL(48): multithreading with tree parallelism
3913 .  -mat_mumps_icntl_58 - ICNTL(58): options for symbolic factorization
3914 .  -mat_mumps_cntl_1   - CNTL(1): relative pivoting threshold
3915 .  -mat_mumps_cntl_2   - CNTL(2): stopping criterion of refinement
3916 .  -mat_mumps_cntl_3   - CNTL(3): absolute pivoting threshold
3917 .  -mat_mumps_cntl_4   - CNTL(4): value for static pivoting
3918 .  -mat_mumps_cntl_5   - CNTL(5): fixation for null pivots
3919 .  -mat_mumps_cntl_7   - CNTL(7): precision of the dropping parameter used during BLR factorization
3920 -  -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS.
3921                                     Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual.
3922 
3923   Level: beginner
3924 
3925   Notes:
3926   MUMPS Cholesky does not handle (complex) Hermitian matrices (see User's Guide at <https://mumps-solver.org/index.php?page=doc>) so using it will
3927   error if the matrix is Hermitian.
3928 
3929   When used within a `KSP`/`PC` solve the options are prefixed with that of the `PC`. Otherwise one can set the options prefix by calling
3930   `MatSetOptionsPrefixFactor()` on the matrix from which the factor was obtained or `MatSetOptionsPrefix()` on the factor matrix.
3931 
3932   When a MUMPS factorization fails inside a KSP solve, for example with a `KSP_DIVERGED_PC_FAILED`, one can find the MUMPS information about
3933   the failure with
3934 .vb
3935           KSPGetPC(ksp,&pc);
3936           PCFactorGetMatrix(pc,&mat);
3937           MatMumpsGetInfo(mat,....);
3938           MatMumpsGetInfog(mat,....); etc.
3939 .ve
3940   Or run with `-ksp_error_if_not_converged` and the program will be stopped and the information printed in the error message.
3941 
3942   MUMPS provides 64-bit integer support in two build modes:
3943   full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and
3944   requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI).
3945 
3946   selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices,
3947   MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and
3948   columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit
3949   integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS.
3950 
3951   With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc.
3952 
3953   Two modes to run MUMPS/PETSc with OpenMP
3954 .vb
3955    Set `OMP_NUM_THREADS` and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP
3956    threads per rank, then you may use "export `OMP_NUM_THREADS` = 16 && mpirun -n 4 ./test".
3957 .ve
3958 
3959 .vb
3960    `-mat_mumps_use_omp_threads` [m] and run your code with as many MPI ranks as the number of cores. For example,
3961    if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16"
3962 .ve
3963 
3964    To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part
3965    (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with `--with-openmp` `--download-hwloc`
3966    (or `--with-hwloc`), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS
3967    libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS
3968    (PETSc will automatically try to utilized a threaded BLAS if `--with-openmp` is provided).
3969 
3970    If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI
3971    processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of
3972    size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm
3973    are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set
3974    by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs.
3975    In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets,
3976    if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind
3977    MPI ranks to cores, then with `-mat_mumps_use_omp_threads` 16, a master rank (and threads it spawns) will use half cores in socket 0, and half
3978    cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the
3979    problem will not happen. Therefore, when you use `-mat_mumps_use_omp_threads`, you need to keep an eye on your MPI rank mapping and CPU binding.
3980    For example, with the Slurm job scheduler, one can use srun `--cpu-bind`=verbose -m block:block to map consecutive MPI ranks to sockets and
3981    examine the mapping result.
3982 
3983    PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set `OMP_PROC_BIND` and `OMP_PLACES` in job scripts,
3984    for example, export `OMP_PLACES`=threads and export `OMP_PROC_BIND`=spread. One does not need to export `OMP_NUM_THREADS`=m in job scripts as PETSc
3985    calls `omp_set_num_threads`(m) internally before calling MUMPS.
3986 
3987    See {cite}`heroux2011bi` and {cite}`gutierrez2017accommodating`
3988 
3989 .seealso: [](ch_matrices), `Mat`, `PCFactorSetMatSolverType()`, `MatSolverType`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`, `MatMumpsSetBlk()`, `KSPGetPC()`, `PCFactorGetMatrix()`
3990 M*/
3991 
3992 static PetscErrorCode MatFactorGetSolverType_mumps(PETSC_UNUSED Mat A, MatSolverType *type)
3993 {
3994   PetscFunctionBegin;
3995   *type = MATSOLVERMUMPS;
3996   PetscFunctionReturn(PETSC_SUCCESS);
3997 }
3998 
3999 /* MatGetFactor for Seq and MPI AIJ matrices */
4000 static PetscErrorCode MatGetFactor_aij_mumps(Mat A, MatFactorType ftype, Mat *F)
4001 {
4002   Mat         B;
4003   Mat_MUMPS  *mumps;
4004   PetscBool   isSeqAIJ, isDiag, isDense;
4005   PetscMPIInt size;
4006 
4007   PetscFunctionBegin;
4008 #if defined(PETSC_USE_COMPLEX)
4009   if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) {
4010     PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n"));
4011     *F = NULL;
4012     PetscFunctionReturn(PETSC_SUCCESS);
4013   }
4014 #endif
4015   /* Create the factorization matrix */
4016   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isSeqAIJ));
4017   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATDIAGONAL, &isDiag));
4018   PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &isDense, MATSEQDENSE, MATMPIDENSE, NULL));
4019   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
4020   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
4021   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name));
4022   PetscCall(MatSetUp(B));
4023 
4024   PetscCall(PetscNew(&mumps));
4025 
4026   B->ops->view    = MatView_MUMPS;
4027   B->ops->getinfo = MatGetInfo_MUMPS;
4028 
4029   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
4030   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
4031   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
4032   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
4033   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
4034   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
4035   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
4036   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
4037   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
4038   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
4039   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
4040   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
4041   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
4042   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
4043   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS));
4044 
4045   if (ftype == MAT_FACTOR_LU) {
4046     B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
4047     B->factortype            = MAT_FACTOR_LU;
4048     if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij;
4049     else if (isDiag) mumps->ConvertToTriples = MatConvertToTriples_diagonal_xaij;
4050     else if (isDense) mumps->ConvertToTriples = MatConvertToTriples_dense_xaij;
4051     else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij;
4052     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]));
4053     mumps->sym = 0;
4054   } else {
4055     B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
4056     B->factortype                  = MAT_FACTOR_CHOLESKY;
4057     if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij;
4058     else if (isDiag) mumps->ConvertToTriples = MatConvertToTriples_diagonal_xaij;
4059     else if (isDense) mumps->ConvertToTriples = MatConvertToTriples_dense_xaij;
4060     else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij;
4061     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY]));
4062 #if defined(PETSC_USE_COMPLEX)
4063     mumps->sym = 2;
4064 #else
4065     if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1;
4066     else mumps->sym = 2;
4067 #endif
4068   }
4069 
4070   /* set solvertype */
4071   PetscCall(PetscFree(B->solvertype));
4072   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
4073   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
4074   if (size == 1) {
4075     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
4076     B->canuseordering = PETSC_TRUE;
4077   }
4078   B->ops->destroy = MatDestroy_MUMPS;
4079   B->data         = (void *)mumps;
4080 
4081   *F               = B;
4082   mumps->id.job    = JOB_NULL;
4083   mumps->ICNTL_pre = NULL;
4084   mumps->CNTL_pre  = NULL;
4085   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
4086   PetscFunctionReturn(PETSC_SUCCESS);
4087 }
4088 
4089 /* MatGetFactor for Seq and MPI SBAIJ matrices */
4090 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A, PETSC_UNUSED MatFactorType ftype, Mat *F)
4091 {
4092   Mat         B;
4093   Mat_MUMPS  *mumps;
4094   PetscBool   isSeqSBAIJ;
4095   PetscMPIInt size;
4096 
4097   PetscFunctionBegin;
4098 #if defined(PETSC_USE_COMPLEX)
4099   if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) {
4100     PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n"));
4101     *F = NULL;
4102     PetscFunctionReturn(PETSC_SUCCESS);
4103   }
4104 #endif
4105   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
4106   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
4107   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name));
4108   PetscCall(MatSetUp(B));
4109 
4110   PetscCall(PetscNew(&mumps));
4111   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &isSeqSBAIJ));
4112   if (isSeqSBAIJ) {
4113     mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij;
4114   } else {
4115     mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij;
4116   }
4117 
4118   B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
4119   B->ops->view                   = MatView_MUMPS;
4120   B->ops->getinfo                = MatGetInfo_MUMPS;
4121 
4122   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
4123   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
4124   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
4125   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
4126   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
4127   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
4128   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
4129   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
4130   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
4131   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
4132   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
4133   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
4134   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
4135   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
4136   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS));
4137 
4138   B->factortype = MAT_FACTOR_CHOLESKY;
4139 #if defined(PETSC_USE_COMPLEX)
4140   mumps->sym = 2;
4141 #else
4142   if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1;
4143   else mumps->sym = 2;
4144 #endif
4145 
4146   /* set solvertype */
4147   PetscCall(PetscFree(B->solvertype));
4148   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
4149   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
4150   if (size == 1) {
4151     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
4152     B->canuseordering = PETSC_TRUE;
4153   }
4154   PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY]));
4155   B->ops->destroy = MatDestroy_MUMPS;
4156   B->data         = (void *)mumps;
4157 
4158   *F               = B;
4159   mumps->id.job    = JOB_NULL;
4160   mumps->ICNTL_pre = NULL;
4161   mumps->CNTL_pre  = NULL;
4162   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
4163   PetscFunctionReturn(PETSC_SUCCESS);
4164 }
4165 
4166 static PetscErrorCode MatGetFactor_baij_mumps(Mat A, MatFactorType ftype, Mat *F)
4167 {
4168   Mat         B;
4169   Mat_MUMPS  *mumps;
4170   PetscBool   isSeqBAIJ;
4171   PetscMPIInt size;
4172 
4173   PetscFunctionBegin;
4174   /* Create the factorization matrix */
4175   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &isSeqBAIJ));
4176   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
4177   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
4178   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name));
4179   PetscCall(MatSetUp(B));
4180 
4181   PetscCall(PetscNew(&mumps));
4182   PetscCheck(ftype == MAT_FACTOR_LU, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead");
4183   B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS;
4184   B->factortype            = MAT_FACTOR_LU;
4185   if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij;
4186   else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij;
4187   mumps->sym = 0;
4188   PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]));
4189 
4190   B->ops->view    = MatView_MUMPS;
4191   B->ops->getinfo = MatGetInfo_MUMPS;
4192 
4193   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
4194   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
4195   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
4196   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
4197   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
4198   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
4199   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
4200   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
4201   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
4202   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
4203   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
4204   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
4205   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
4206   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
4207   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS));
4208 
4209   /* set solvertype */
4210   PetscCall(PetscFree(B->solvertype));
4211   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
4212   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
4213   if (size == 1) {
4214     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
4215     B->canuseordering = PETSC_TRUE;
4216   }
4217   B->ops->destroy = MatDestroy_MUMPS;
4218   B->data         = (void *)mumps;
4219 
4220   *F               = B;
4221   mumps->id.job    = JOB_NULL;
4222   mumps->ICNTL_pre = NULL;
4223   mumps->CNTL_pre  = NULL;
4224   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
4225   PetscFunctionReturn(PETSC_SUCCESS);
4226 }
4227 
4228 /* MatGetFactor for Seq and MPI SELL matrices */
4229 static PetscErrorCode MatGetFactor_sell_mumps(Mat A, MatFactorType ftype, Mat *F)
4230 {
4231   Mat         B;
4232   Mat_MUMPS  *mumps;
4233   PetscBool   isSeqSELL;
4234   PetscMPIInt size;
4235 
4236   PetscFunctionBegin;
4237   /* Create the factorization matrix */
4238   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSELL, &isSeqSELL));
4239   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
4240   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
4241   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name));
4242   PetscCall(MatSetUp(B));
4243 
4244   PetscCall(PetscNew(&mumps));
4245 
4246   B->ops->view    = MatView_MUMPS;
4247   B->ops->getinfo = MatGetInfo_MUMPS;
4248 
4249   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
4250   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
4251   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
4252   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
4253   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
4254   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
4255   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
4256   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
4257   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
4258   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
4259   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
4260   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
4261 
4262   PetscCheck(ftype == MAT_FACTOR_LU, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented");
4263   B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
4264   B->factortype            = MAT_FACTOR_LU;
4265   PetscCheck(isSeqSELL, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented");
4266   mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij;
4267   mumps->sym              = 0;
4268   PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]));
4269 
4270   /* set solvertype */
4271   PetscCall(PetscFree(B->solvertype));
4272   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
4273   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
4274   if (size == 1) {
4275     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization  */
4276     B->canuseordering = PETSC_TRUE;
4277   }
4278   B->ops->destroy = MatDestroy_MUMPS;
4279   B->data         = (void *)mumps;
4280 
4281   *F               = B;
4282   mumps->id.job    = JOB_NULL;
4283   mumps->ICNTL_pre = NULL;
4284   mumps->CNTL_pre  = NULL;
4285   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
4286   PetscFunctionReturn(PETSC_SUCCESS);
4287 }
4288 
4289 /* MatGetFactor for MATNEST matrices */
4290 static PetscErrorCode MatGetFactor_nest_mumps(Mat A, MatFactorType ftype, Mat *F)
4291 {
4292   Mat         B, **mats;
4293   Mat_MUMPS  *mumps;
4294   PetscInt    nr, nc;
4295   PetscMPIInt size;
4296   PetscBool   flg = PETSC_TRUE;
4297 
4298   PetscFunctionBegin;
4299 #if defined(PETSC_USE_COMPLEX)
4300   if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) {
4301     PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n"));
4302     *F = NULL;
4303     PetscFunctionReturn(PETSC_SUCCESS);
4304   }
4305 #endif
4306 
4307   /* Return if some condition is not satisfied */
4308   *F = NULL;
4309   PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats));
4310   if (ftype == MAT_FACTOR_CHOLESKY) {
4311     IS       *rows, *cols;
4312     PetscInt *m, *M;
4313 
4314     PetscCheck(nr == nc, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MAT_FACTOR_CHOLESKY not supported for nest sizes %" PetscInt_FMT " != %" PetscInt_FMT ". Use MAT_FACTOR_LU.", nr, nc);
4315     PetscCall(PetscMalloc2(nr, &rows, nc, &cols));
4316     PetscCall(MatNestGetISs(A, rows, cols));
4317     for (PetscInt r = 0; flg && r < nr; r++) PetscCall(ISEqualUnsorted(rows[r], cols[r], &flg));
4318     if (!flg) {
4319       PetscCall(PetscFree2(rows, cols));
4320       PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for unequal row and column maps. Use MAT_FACTOR_LU.\n"));
4321       PetscFunctionReturn(PETSC_SUCCESS);
4322     }
4323     PetscCall(PetscMalloc2(nr, &m, nr, &M));
4324     for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetMinMax(rows[r], &m[r], &M[r]));
4325     for (PetscInt r = 0; flg && r < nr; r++)
4326       for (PetscInt k = r + 1; flg && k < nr; k++)
4327         if ((m[k] <= m[r] && m[r] <= M[k]) || (m[k] <= M[r] && M[r] <= M[k])) flg = PETSC_FALSE;
4328     PetscCall(PetscFree2(m, M));
4329     PetscCall(PetscFree2(rows, cols));
4330     if (!flg) {
4331       PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for intersecting row maps. Use MAT_FACTOR_LU.\n"));
4332       PetscFunctionReturn(PETSC_SUCCESS);
4333     }
4334   }
4335 
4336   for (PetscInt r = 0; r < nr; r++) {
4337     for (PetscInt c = 0; c < nc; c++) {
4338       Mat       sub = mats[r][c];
4339       PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isDiag, isDense;
4340 
4341       if (!sub || (ftype == MAT_FACTOR_CHOLESKY && c < r)) continue;
4342       PetscCall(MatGetTranspose_TransposeVirtual(&sub, NULL, NULL, NULL, NULL));
4343       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ));
4344       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ));
4345       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ));
4346       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ));
4347       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ));
4348       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ));
4349       PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATDIAGONAL, &isDiag));
4350       PetscCall(PetscObjectTypeCompareAny((PetscObject)sub, &isDense, MATSEQDENSE, MATMPIDENSE, NULL));
4351       if (ftype == MAT_FACTOR_CHOLESKY) {
4352         if (r == c) {
4353           if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isSeqSBAIJ && !isMPISBAIJ && !isDiag && !isDense) {
4354             PetscCall(PetscInfo(sub, "MAT_FACTOR_CHOLESKY not supported for diagonal block of type %s.\n", ((PetscObject)sub)->type_name));
4355             flg = PETSC_FALSE;
4356           }
4357         } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isDiag && !isDense) {
4358           PetscCall(PetscInfo(sub, "MAT_FACTOR_CHOLESKY not supported for off-diagonal block of type %s.\n", ((PetscObject)sub)->type_name));
4359           flg = PETSC_FALSE;
4360         }
4361       } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isDiag && !isDense) {
4362         PetscCall(PetscInfo(sub, "MAT_FACTOR_LU not supported for block of type %s.\n", ((PetscObject)sub)->type_name));
4363         flg = PETSC_FALSE;
4364       }
4365     }
4366   }
4367   if (!flg) PetscFunctionReturn(PETSC_SUCCESS);
4368 
4369   /* Create the factorization matrix */
4370   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
4371   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
4372   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name));
4373   PetscCall(MatSetUp(B));
4374 
4375   PetscCall(PetscNew(&mumps));
4376 
4377   B->ops->view    = MatView_MUMPS;
4378   B->ops->getinfo = MatGetInfo_MUMPS;
4379 
4380   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
4381   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
4382   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
4383   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
4384   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
4385   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
4386   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
4387   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
4388   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
4389   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
4390   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
4391   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
4392   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
4393   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
4394   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS));
4395 
4396   if (ftype == MAT_FACTOR_LU) {
4397     B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
4398     B->factortype            = MAT_FACTOR_LU;
4399     mumps->sym               = 0;
4400   } else {
4401     B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
4402     B->factortype                  = MAT_FACTOR_CHOLESKY;
4403 #if defined(PETSC_USE_COMPLEX)
4404     mumps->sym = 2;
4405 #else
4406     if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1;
4407     else mumps->sym = 2;
4408 #endif
4409   }
4410   mumps->ConvertToTriples = MatConvertToTriples_nest_xaij;
4411   PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[ftype]));
4412 
4413   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
4414   if (size == 1) {
4415     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
4416     B->canuseordering = PETSC_TRUE;
4417   }
4418 
4419   /* set solvertype */
4420   PetscCall(PetscFree(B->solvertype));
4421   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
4422   B->ops->destroy = MatDestroy_MUMPS;
4423   B->data         = (void *)mumps;
4424 
4425   *F               = B;
4426   mumps->id.job    = JOB_NULL;
4427   mumps->ICNTL_pre = NULL;
4428   mumps->CNTL_pre  = NULL;
4429   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
4430   PetscFunctionReturn(PETSC_SUCCESS);
4431 }
4432 
4433 PETSC_INTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void)
4434 {
4435   PetscFunctionBegin;
4436   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
4437   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
4438   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps));
4439   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps));
4440   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPISBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps));
4441   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
4442   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
4443   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps));
4444   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps));
4445   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps));
4446   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSELL, MAT_FACTOR_LU, MatGetFactor_sell_mumps));
4447   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATDIAGONAL, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
4448   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATDIAGONAL, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
4449   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQDENSE, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
4450   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQDENSE, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
4451   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIDENSE, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
4452   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIDENSE, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
4453   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_LU, MatGetFactor_nest_mumps));
4454   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_CHOLESKY, MatGetFactor_nest_mumps));
4455   PetscFunctionReturn(PETSC_SUCCESS);
4456 }
4457