xref: /petsc/src/mat/impls/aij/mpi/mumps/mumps.c (revision 58ad77e8b9ee6fdbdfef97ebcff79a2d98620aab)
1 /*
2     Provides an interface to the MUMPS sparse solver
3 */
4 #include <petscpkg_version.h>
5 #include <petscsf.h>
6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I  "petscmat.h"  I*/
7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
8 #include <../src/mat/impls/sell/mpi/mpisell.h>
9 
10 #define MUMPS_MANUALS "(see users manual https://mumps-solver.org/index.php?page=doc \"Error and warning diagnostics\")"
11 
12 EXTERN_C_BEGIN
13 #if defined(PETSC_USE_COMPLEX)
14   #if defined(PETSC_USE_REAL_SINGLE)
15     #include <cmumps_c.h>
16   #else
17     #include <zmumps_c.h>
18   #endif
19 #else
20   #if defined(PETSC_USE_REAL_SINGLE)
21     #include <smumps_c.h>
22   #else
23     #include <dmumps_c.h>
24   #endif
25 #endif
26 EXTERN_C_END
27 #define JOB_INIT         -1
28 #define JOB_NULL         0
29 #define JOB_FACTSYMBOLIC 1
30 #define JOB_FACTNUMERIC  2
31 #define JOB_SOLVE        3
32 #define JOB_END          -2
33 
34 /* calls to MUMPS */
35 #if defined(PETSC_USE_COMPLEX)
36   #if defined(PETSC_USE_REAL_SINGLE)
37     #define MUMPS_c cmumps_c
38   #else
39     #define MUMPS_c zmumps_c
40   #endif
41 #else
42   #if defined(PETSC_USE_REAL_SINGLE)
43     #define MUMPS_c smumps_c
44   #else
45     #define MUMPS_c dmumps_c
46   #endif
47 #endif
48 
49 /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for
50    number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the
51    naming convention in PetscMPIInt, PetscBLASInt etc.
52 */
53 typedef MUMPS_INT PetscMUMPSInt;
54 
55 #if PETSC_PKG_MUMPS_VERSION_GE(5, 3, 0)
56   #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */
57     #error "PETSc has not been tested with full 64-bit MUMPS and we choose to error out"
58   #endif
59 #else
60   #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */
61     #error "PETSc has not been tested with full 64-bit MUMPS and we choose to error out"
62   #endif
63 #endif
64 
65 #define MPIU_MUMPSINT       MPI_INT
66 #define PETSC_MUMPS_INT_MAX 2147483647
67 #define PETSC_MUMPS_INT_MIN -2147483648
68 
69 /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/
70 static inline PetscErrorCode PetscMUMPSIntCast(PetscCount a, PetscMUMPSInt *b)
71 {
72   PetscFunctionBegin;
73 #if PetscDefined(USE_64BIT_INDICES)
74   PetscAssert(a <= PETSC_MUMPS_INT_MAX && a >= PETSC_MUMPS_INT_MIN, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt");
75 #endif
76   *b = (PetscMUMPSInt)a;
77   PetscFunctionReturn(PETSC_SUCCESS);
78 }
79 
80 /* Put these utility routines here since they are only used in this file */
81 static inline PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems PetscOptionsObject, const char opt[], const char text[], const char man[], PetscMUMPSInt currentvalue, PetscMUMPSInt *value, PetscBool *set, PetscMUMPSInt lb, PetscMUMPSInt ub)
82 {
83   PetscInt  myval;
84   PetscBool myset;
85 
86   PetscFunctionBegin;
87   /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */
88   PetscCall(PetscOptionsInt_Private(PetscOptionsObject, opt, text, man, (PetscInt)currentvalue, &myval, &myset, lb, ub));
89   if (myset) PetscCall(PetscMUMPSIntCast(myval, value));
90   if (set) *set = myset;
91   PetscFunctionReturn(PETSC_SUCCESS);
92 }
93 #define PetscOptionsMUMPSInt(a, b, c, d, e, f) PetscOptionsMUMPSInt_Private(PetscOptionsObject, a, b, c, d, e, f, PETSC_MUMPS_INT_MIN, PETSC_MUMPS_INT_MAX)
94 
95 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */
96 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
97   #define PetscMUMPS_c(mumps) \
98     do { \
99       if (mumps->use_petsc_omp_support) { \
100         if (mumps->is_omp_master) { \
101           PetscCall(PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl)); \
102           PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \
103           PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \
104           PetscCall(PetscFPTrapPop()); \
105           PetscCall(PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl)); \
106         } \
107         PetscCall(PetscOmpCtrlBarrier(mumps->omp_ctrl)); \
108         /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific      \
109          to processes, so we only Bcast info[1], an error code and leave others (since they do not have   \
110          an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82.                   \
111          omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \
112       */ \
113         PetscCallMPI(MPI_Bcast(mumps->id.infog, PETSC_STATIC_ARRAY_LENGTH(mumps->id.infog), MPIU_MUMPSINT, 0, mumps->omp_comm)); \
114         PetscCallMPI(MPI_Bcast(mumps->id.rinfog, PETSC_STATIC_ARRAY_LENGTH(mumps->id.rinfog), MPIU_REAL, 0, mumps->omp_comm)); \
115         PetscCallMPI(MPI_Bcast(mumps->id.info, PETSC_STATIC_ARRAY_LENGTH(mumps->id.info), MPIU_MUMPSINT, 0, mumps->omp_comm)); \
116         PetscCallMPI(MPI_Bcast(mumps->id.rinfo, PETSC_STATIC_ARRAY_LENGTH(mumps->id.rinfo), MPIU_REAL, 0, mumps->omp_comm)); \
117       } else { \
118         PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \
119         PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \
120         PetscCall(PetscFPTrapPop()); \
121       } \
122     } while (0)
123 #else
124   #define PetscMUMPS_c(mumps) \
125     do { \
126       PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \
127       PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \
128       PetscCall(PetscFPTrapPop()); \
129     } while (0)
130 #endif
131 
132 /* declare MumpsScalar */
133 #if defined(PETSC_USE_COMPLEX)
134   #if defined(PETSC_USE_REAL_SINGLE)
135     #define MumpsScalar mumps_complex
136   #else
137     #define MumpsScalar mumps_double_complex
138   #endif
139 #else
140   #define MumpsScalar PetscScalar
141 #endif
142 
143 /* macros s.t. indices match MUMPS documentation */
144 #define ICNTL(I)  icntl[(I) - 1]
145 #define CNTL(I)   cntl[(I) - 1]
146 #define INFOG(I)  infog[(I) - 1]
147 #define INFO(I)   info[(I) - 1]
148 #define RINFOG(I) rinfog[(I) - 1]
149 #define RINFO(I)  rinfo[(I) - 1]
150 
151 typedef struct Mat_MUMPS Mat_MUMPS;
152 struct Mat_MUMPS {
153 #if defined(PETSC_USE_COMPLEX)
154   #if defined(PETSC_USE_REAL_SINGLE)
155   CMUMPS_STRUC_C id;
156   #else
157   ZMUMPS_STRUC_C id;
158   #endif
159 #else
160   #if defined(PETSC_USE_REAL_SINGLE)
161   SMUMPS_STRUC_C id;
162   #else
163   DMUMPS_STRUC_C id;
164   #endif
165 #endif
166 
167   MatStructure   matstruc;
168   PetscMPIInt    myid, petsc_size;
169   PetscMUMPSInt *irn, *jcn;       /* the (i,j,v) triplets passed to mumps. */
170   PetscScalar   *val, *val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */
171   PetscCount     nnz;             /* number of nonzeros. The type is called selective 64-bit in mumps */
172   PetscMUMPSInt  sym;
173   MPI_Comm       mumps_comm;
174   PetscMUMPSInt *ICNTL_pre;
175   PetscReal     *CNTL_pre;
176   PetscMUMPSInt  ICNTL9_pre;         /* check if ICNTL(9) is changed from previous MatSolve */
177   VecScatter     scat_rhs, scat_sol; /* used by MatSolve() */
178   PetscMUMPSInt  ICNTL20;            /* use centralized (0) or distributed (10) dense RHS */
179   PetscMUMPSInt  lrhs_loc, nloc_rhs, *irhs_loc;
180 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
181   PetscInt    *rhs_nrow, max_nrhs;
182   PetscMPIInt *rhs_recvcounts, *rhs_disps;
183   PetscScalar *rhs_loc, *rhs_recvbuf;
184 #endif
185   Vec            b_seq, x_seq;
186   PetscInt       ninfo, *info; /* which INFO to display */
187   PetscInt       sizeredrhs;
188   PetscScalar   *schur_sol;
189   PetscInt       schur_sizesol;
190   PetscMUMPSInt *ia_alloc, *ja_alloc; /* work arrays used for the CSR struct for sparse rhs */
191   PetscCount     cur_ilen, cur_jlen;  /* current len of ia_alloc[], ja_alloc[] */
192   PetscErrorCode (*ConvertToTriples)(Mat, PetscInt, MatReuse, Mat_MUMPS *);
193 
194   /* Support for MATNEST */
195   PetscErrorCode (**nest_convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *);
196   PetscCount  *nest_vals_start;
197   PetscScalar *nest_vals;
198 
199   /* stuff used by petsc/mumps OpenMP support*/
200   PetscBool    use_petsc_omp_support;
201   PetscOmpCtrl omp_ctrl;             /* an OpenMP controller that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */
202   MPI_Comm     petsc_comm, omp_comm; /* petsc_comm is PETSc matrix's comm */
203   PetscCount  *recvcount;            /* a collection of nnz on omp_master */
204   PetscMPIInt  tag, omp_comm_size;
205   PetscBool    is_omp_master; /* is this rank the master of omp_comm */
206   MPI_Request *reqs;
207 };
208 
209 /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt.
210    Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices.
211  */
212 static PetscErrorCode PetscMUMPSIntCSRCast(PETSC_UNUSED Mat_MUMPS *mumps, PetscInt nrow, PetscInt *ia, PetscInt *ja, PetscMUMPSInt **ia_mumps, PetscMUMPSInt **ja_mumps, PetscMUMPSInt *nnz_mumps)
213 {
214   PetscInt nnz = ia[nrow] - 1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscCount since mumps only uses PetscMUMPSInt for rhs */
215 
216   PetscFunctionBegin;
217 #if defined(PETSC_USE_64BIT_INDICES)
218   {
219     PetscInt i;
220     if (nrow + 1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */
221       PetscCall(PetscFree(mumps->ia_alloc));
222       PetscCall(PetscMalloc1(nrow + 1, &mumps->ia_alloc));
223       mumps->cur_ilen = nrow + 1;
224     }
225     if (nnz > mumps->cur_jlen) {
226       PetscCall(PetscFree(mumps->ja_alloc));
227       PetscCall(PetscMalloc1(nnz, &mumps->ja_alloc));
228       mumps->cur_jlen = nnz;
229     }
230     for (i = 0; i < nrow + 1; i++) PetscCall(PetscMUMPSIntCast(ia[i], &mumps->ia_alloc[i]));
231     for (i = 0; i < nnz; i++) PetscCall(PetscMUMPSIntCast(ja[i], &mumps->ja_alloc[i]));
232     *ia_mumps = mumps->ia_alloc;
233     *ja_mumps = mumps->ja_alloc;
234   }
235 #else
236   *ia_mumps = ia;
237   *ja_mumps = ja;
238 #endif
239   PetscCall(PetscMUMPSIntCast(nnz, nnz_mumps));
240   PetscFunctionReturn(PETSC_SUCCESS);
241 }
242 
243 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS *mumps)
244 {
245   PetscFunctionBegin;
246   PetscCall(PetscFree(mumps->id.listvar_schur));
247   PetscCall(PetscFree(mumps->id.redrhs));
248   PetscCall(PetscFree(mumps->schur_sol));
249   mumps->id.size_schur = 0;
250   mumps->id.schur_lld  = 0;
251   mumps->id.ICNTL(19)  = 0;
252   PetscFunctionReturn(PETSC_SUCCESS);
253 }
254 
255 /* solve with rhs in mumps->id.redrhs and return in the same location */
256 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F)
257 {
258   Mat_MUMPS           *mumps = (Mat_MUMPS *)F->data;
259   Mat                  S, B, X;
260   MatFactorSchurStatus schurstatus;
261   PetscInt             sizesol;
262 
263   PetscFunctionBegin;
264   PetscCall(MatFactorFactorizeSchurComplement(F));
265   PetscCall(MatFactorGetSchurComplement(F, &S, &schurstatus));
266   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &B));
267   PetscCall(MatSetType(B, ((PetscObject)S)->type_name));
268 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
269   PetscCall(MatBindToCPU(B, S->boundtocpu));
270 #endif
271   switch (schurstatus) {
272   case MAT_FACTOR_SCHUR_FACTORED:
273     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &X));
274     PetscCall(MatSetType(X, ((PetscObject)S)->type_name));
275 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
276     PetscCall(MatBindToCPU(X, S->boundtocpu));
277 #endif
278     if (!mumps->id.ICNTL(9)) { /* transpose solve */
279       PetscCall(MatMatSolveTranspose(S, B, X));
280     } else {
281       PetscCall(MatMatSolve(S, B, X));
282     }
283     break;
284   case MAT_FACTOR_SCHUR_INVERTED:
285     sizesol = mumps->id.nrhs * mumps->id.size_schur;
286     if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) {
287       PetscCall(PetscFree(mumps->schur_sol));
288       PetscCall(PetscMalloc1(sizesol, &mumps->schur_sol));
289       mumps->schur_sizesol = sizesol;
290     }
291     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->schur_sol, &X));
292     PetscCall(MatSetType(X, ((PetscObject)S)->type_name));
293 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
294     PetscCall(MatBindToCPU(X, S->boundtocpu));
295 #endif
296     PetscCall(MatProductCreateWithMat(S, B, NULL, X));
297     if (!mumps->id.ICNTL(9)) { /* transpose solve */
298       PetscCall(MatProductSetType(X, MATPRODUCT_AtB));
299     } else {
300       PetscCall(MatProductSetType(X, MATPRODUCT_AB));
301     }
302     PetscCall(MatProductSetFromOptions(X));
303     PetscCall(MatProductSymbolic(X));
304     PetscCall(MatProductNumeric(X));
305 
306     PetscCall(MatCopy(X, B, SAME_NONZERO_PATTERN));
307     break;
308   default:
309     SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unhandled MatFactorSchurStatus %d", F->schur_status);
310   }
311   PetscCall(MatFactorRestoreSchurComplement(F, &S, schurstatus));
312   PetscCall(MatDestroy(&B));
313   PetscCall(MatDestroy(&X));
314   PetscFunctionReturn(PETSC_SUCCESS);
315 }
316 
317 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion)
318 {
319   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
320 
321   PetscFunctionBegin;
322   if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */
323     PetscFunctionReturn(PETSC_SUCCESS);
324   }
325   if (!expansion) { /* prepare for the condensation step */
326     PetscInt sizeredrhs = mumps->id.nrhs * mumps->id.size_schur;
327     /* allocate MUMPS internal array to store reduced right-hand sides */
328     if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) {
329       PetscCall(PetscFree(mumps->id.redrhs));
330       mumps->id.lredrhs = mumps->id.size_schur;
331       PetscCall(PetscMalloc1(mumps->id.nrhs * mumps->id.lredrhs, &mumps->id.redrhs));
332       mumps->sizeredrhs = mumps->id.nrhs * mumps->id.lredrhs;
333     }
334   } else { /* prepare for the expansion step */
335     /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */
336     PetscCall(MatMumpsSolveSchur_Private(F));
337     mumps->id.ICNTL(26) = 2; /* expansion phase */
338     PetscMUMPS_c(mumps);
339     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
340     /* restore defaults */
341     mumps->id.ICNTL(26) = -1;
342     /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */
343     if (mumps->id.nrhs > 1) {
344       PetscCall(PetscFree(mumps->id.redrhs));
345       mumps->id.lredrhs = 0;
346       mumps->sizeredrhs = 0;
347     }
348   }
349   PetscFunctionReturn(PETSC_SUCCESS);
350 }
351 
352 /*
353   MatConvertToTriples_A_B - convert PETSc matrix to triples: row[nz], col[nz], val[nz]
354 
355   input:
356     A       - matrix in aij,baij or sbaij format
357     shift   - 0: C style output triple; 1: Fortran style output triple.
358     reuse   - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple
359               MAT_REUSE_MATRIX:   only the values in v array are updated
360   output:
361     nnz     - dim of r, c, and v (number of local nonzero entries of A)
362     r, c, v - row and col index, matrix values (matrix triples)
363 
364   The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is
365   freed with PetscFree(mumps->irn);  This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means
366   that the PetscMalloc() cannot easily be replaced with a PetscMalloc3().
367 
368  */
369 
370 static PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
371 {
372   const PetscScalar *av;
373   const PetscInt    *ai, *aj, *ajj, M = A->rmap->n;
374   PetscCount         nz, rnz, k;
375   PetscMUMPSInt     *row, *col;
376   Mat_SeqAIJ        *aa = (Mat_SeqAIJ *)A->data;
377 
378   PetscFunctionBegin;
379   PetscCall(MatSeqAIJGetArrayRead(A, &av));
380   if (reuse == MAT_INITIAL_MATRIX) {
381     nz = aa->nz;
382     ai = aa->i;
383     aj = aa->j;
384     PetscCall(PetscMalloc2(nz, &row, nz, &col));
385     for (PetscCount i = k = 0; i < M; i++) {
386       rnz = ai[i + 1] - ai[i];
387       ajj = aj + ai[i];
388       for (PetscCount j = 0; j < rnz; j++) {
389         PetscCall(PetscMUMPSIntCast(i + shift, &row[k]));
390         PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[k]));
391         k++;
392       }
393     }
394     mumps->val = (PetscScalar *)av;
395     mumps->irn = row;
396     mumps->jcn = col;
397     mumps->nnz = nz;
398   } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, av, aa->nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqaij_seqaij(), so one needs to copy the memory */
399   else mumps->val = (PetscScalar *)av;                                           /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */
400   PetscCall(MatSeqAIJRestoreArrayRead(A, &av));
401   PetscFunctionReturn(PETSC_SUCCESS);
402 }
403 
404 static PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
405 {
406   PetscCount     nz, i, j, k, r;
407   Mat_SeqSELL   *a = (Mat_SeqSELL *)A->data;
408   PetscMUMPSInt *row, *col;
409 
410   PetscFunctionBegin;
411   nz = a->sliidx[a->totalslices];
412   if (reuse == MAT_INITIAL_MATRIX) {
413     PetscCall(PetscMalloc2(nz, &row, nz, &col));
414     for (i = k = 0; i < a->totalslices; i++) {
415       for (j = a->sliidx[i], r = 0; j < a->sliidx[i + 1]; j++, r = ((r + 1) & 0x07)) PetscCall(PetscMUMPSIntCast(8 * i + r + shift, &row[k++]));
416     }
417     for (i = 0; i < nz; i++) PetscCall(PetscMUMPSIntCast(a->colidx[i] + shift, &col[i]));
418     mumps->irn = row;
419     mumps->jcn = col;
420     mumps->nnz = nz;
421     mumps->val = a->val;
422   } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, a->val, nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqsell_seqaij(), so one needs to copy the memory */
423   else mumps->val = a->val;                                                      /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */
424   PetscFunctionReturn(PETSC_SUCCESS);
425 }
426 
427 static PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
428 {
429   Mat_SeqBAIJ    *aa = (Mat_SeqBAIJ *)A->data;
430   const PetscInt *ai, *aj, *ajj, bs2 = aa->bs2;
431   PetscCount      M, nz = bs2 * aa->nz, idx = 0, rnz, i, j, k, m;
432   PetscInt        bs;
433   PetscMUMPSInt  *row, *col;
434 
435   PetscFunctionBegin;
436   if (reuse == MAT_INITIAL_MATRIX) {
437     PetscCall(MatGetBlockSize(A, &bs));
438     M  = A->rmap->N / bs;
439     ai = aa->i;
440     aj = aa->j;
441     PetscCall(PetscMalloc2(nz, &row, nz, &col));
442     for (i = 0; i < M; i++) {
443       ajj = aj + ai[i];
444       rnz = ai[i + 1] - ai[i];
445       for (k = 0; k < rnz; k++) {
446         for (j = 0; j < bs; j++) {
447           for (m = 0; m < bs; m++) {
448             PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[idx]));
449             PetscCall(PetscMUMPSIntCast(bs * ajj[k] + j + shift, &col[idx]));
450             idx++;
451           }
452         }
453       }
454     }
455     mumps->irn = row;
456     mumps->jcn = col;
457     mumps->nnz = nz;
458     mumps->val = aa->a;
459   } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, aa->a, nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqbaij_seqaij(), so one needs to copy the memory */
460   else mumps->val = aa->a;                                                      /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */
461   PetscFunctionReturn(PETSC_SUCCESS);
462 }
463 
464 static PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
465 {
466   const PetscInt *ai, *aj, *ajj;
467   PetscInt        bs;
468   PetscCount      nz, rnz, i, j, k, m;
469   PetscMUMPSInt  *row, *col;
470   PetscScalar    *val;
471   Mat_SeqSBAIJ   *aa  = (Mat_SeqSBAIJ *)A->data;
472   const PetscInt  bs2 = aa->bs2, mbs = aa->mbs;
473 #if defined(PETSC_USE_COMPLEX)
474   PetscBool isset, hermitian;
475 #endif
476 
477   PetscFunctionBegin;
478 #if defined(PETSC_USE_COMPLEX)
479   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
480   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
481 #endif
482   ai = aa->i;
483   aj = aa->j;
484   PetscCall(MatGetBlockSize(A, &bs));
485   if (reuse == MAT_INITIAL_MATRIX) {
486     const PetscCount alloc_size = aa->nz * bs2;
487 
488     PetscCall(PetscMalloc2(alloc_size, &row, alloc_size, &col));
489     if (bs > 1) {
490       PetscCall(PetscMalloc1(alloc_size, &mumps->val_alloc));
491       mumps->val = mumps->val_alloc;
492     } else {
493       mumps->val = aa->a;
494     }
495     mumps->irn = row;
496     mumps->jcn = col;
497   } else {
498     row = mumps->irn;
499     col = mumps->jcn;
500   }
501   val = mumps->val;
502 
503   nz = 0;
504   if (bs > 1) {
505     for (i = 0; i < mbs; i++) {
506       rnz = ai[i + 1] - ai[i];
507       ajj = aj + ai[i];
508       for (j = 0; j < rnz; j++) {
509         for (k = 0; k < bs; k++) {
510           for (m = 0; m < bs; m++) {
511             if (ajj[j] > i || k >= m) {
512               if (reuse == MAT_INITIAL_MATRIX) {
513                 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[nz]));
514                 PetscCall(PetscMUMPSIntCast(ajj[j] * bs + k + shift, &col[nz]));
515               }
516               val[nz++] = aa->a[(ai[i] + j) * bs2 + m + k * bs];
517             }
518           }
519         }
520       }
521     }
522   } else if (reuse == MAT_INITIAL_MATRIX) {
523     for (i = 0; i < mbs; i++) {
524       rnz = ai[i + 1] - ai[i];
525       ajj = aj + ai[i];
526       for (j = 0; j < rnz; j++) {
527         PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
528         PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz]));
529         nz++;
530       }
531     }
532     PetscCheck(nz == aa->nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different numbers of nonzeros %" PetscCount_FMT " != %" PetscInt_FMT, nz, aa->nz);
533   } else if (mumps->nest_vals)
534     PetscCall(PetscArraycpy(mumps->val, aa->a, aa->nz)); /* bs == 1 and MAT_REUSE_MATRIX, MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqsbaij_seqsbaij(), so one needs to copy the memory */
535   else mumps->val = aa->a;                               /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */
536   if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz;
537   PetscFunctionReturn(PETSC_SUCCESS);
538 }
539 
540 static PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
541 {
542   const PetscInt    *ai, *aj, *ajj, *adiag, M = A->rmap->n;
543   PetscCount         nz, rnz, i, j;
544   const PetscScalar *av, *v1;
545   PetscScalar       *val;
546   PetscMUMPSInt     *row, *col;
547   Mat_SeqAIJ        *aa = (Mat_SeqAIJ *)A->data;
548   PetscBool          missing;
549 #if defined(PETSC_USE_COMPLEX)
550   PetscBool hermitian, isset;
551 #endif
552 
553   PetscFunctionBegin;
554 #if defined(PETSC_USE_COMPLEX)
555   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
556   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
557 #endif
558   PetscCall(MatSeqAIJGetArrayRead(A, &av));
559   ai    = aa->i;
560   aj    = aa->j;
561   adiag = aa->diag;
562   PetscCall(MatMissingDiagonal_SeqAIJ(A, &missing, NULL));
563   if (reuse == MAT_INITIAL_MATRIX) {
564     /* count nz in the upper triangular part of A */
565     nz = 0;
566     if (missing) {
567       for (i = 0; i < M; i++) {
568         if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
569           for (j = ai[i]; j < ai[i + 1]; j++) {
570             if (aj[j] < i) continue;
571             nz++;
572           }
573         } else {
574           nz += ai[i + 1] - adiag[i];
575         }
576       }
577     } else {
578       for (i = 0; i < M; i++) nz += ai[i + 1] - adiag[i];
579     }
580     PetscCall(PetscMalloc2(nz, &row, nz, &col));
581     PetscCall(PetscMalloc1(nz, &val));
582     mumps->nnz = nz;
583     mumps->irn = row;
584     mumps->jcn = col;
585     mumps->val = mumps->val_alloc = val;
586 
587     nz = 0;
588     if (missing) {
589       for (i = 0; i < M; i++) {
590         if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
591           for (j = ai[i]; j < ai[i + 1]; j++) {
592             if (aj[j] < i) continue;
593             PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
594             PetscCall(PetscMUMPSIntCast(aj[j] + shift, &col[nz]));
595             val[nz] = av[j];
596             nz++;
597           }
598         } else {
599           rnz = ai[i + 1] - adiag[i];
600           ajj = aj + adiag[i];
601           v1  = av + adiag[i];
602           for (j = 0; j < rnz; j++) {
603             PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
604             PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz]));
605             val[nz++] = v1[j];
606           }
607         }
608       }
609     } else {
610       for (i = 0; i < M; i++) {
611         rnz = ai[i + 1] - adiag[i];
612         ajj = aj + adiag[i];
613         v1  = av + adiag[i];
614         for (j = 0; j < rnz; j++) {
615           PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
616           PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz]));
617           val[nz++] = v1[j];
618         }
619       }
620     }
621   } else {
622     nz  = 0;
623     val = mumps->val;
624     if (missing) {
625       for (i = 0; i < M; i++) {
626         if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
627           for (j = ai[i]; j < ai[i + 1]; j++) {
628             if (aj[j] < i) continue;
629             val[nz++] = av[j];
630           }
631         } else {
632           rnz = ai[i + 1] - adiag[i];
633           v1  = av + adiag[i];
634           for (j = 0; j < rnz; j++) val[nz++] = v1[j];
635         }
636       }
637     } else {
638       for (i = 0; i < M; i++) {
639         rnz = ai[i + 1] - adiag[i];
640         v1  = av + adiag[i];
641         for (j = 0; j < rnz; j++) val[nz++] = v1[j];
642       }
643     }
644   }
645   PetscCall(MatSeqAIJRestoreArrayRead(A, &av));
646   PetscFunctionReturn(PETSC_SUCCESS);
647 }
648 
649 static PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
650 {
651   const PetscInt    *ai, *aj, *bi, *bj, *garray, *ajj, *bjj;
652   PetscInt           bs;
653   PetscCount         rstart, nz, i, j, k, m, jj, irow, countA, countB;
654   PetscMUMPSInt     *row, *col;
655   const PetscScalar *av, *bv, *v1, *v2;
656   PetscScalar       *val;
657   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ *)A->data;
658   Mat_SeqSBAIJ      *aa  = (Mat_SeqSBAIJ *)mat->A->data;
659   Mat_SeqBAIJ       *bb  = (Mat_SeqBAIJ *)mat->B->data;
660   const PetscInt     bs2 = aa->bs2, mbs = aa->mbs;
661 #if defined(PETSC_USE_COMPLEX)
662   PetscBool hermitian, isset;
663 #endif
664 
665   PetscFunctionBegin;
666 #if defined(PETSC_USE_COMPLEX)
667   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
668   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
669 #endif
670   PetscCall(MatGetBlockSize(A, &bs));
671   rstart = A->rmap->rstart;
672   ai     = aa->i;
673   aj     = aa->j;
674   bi     = bb->i;
675   bj     = bb->j;
676   av     = aa->a;
677   bv     = bb->a;
678 
679   garray = mat->garray;
680 
681   if (reuse == MAT_INITIAL_MATRIX) {
682     nz = (aa->nz + bb->nz) * bs2; /* just a conservative estimate */
683     PetscCall(PetscMalloc2(nz, &row, nz, &col));
684     PetscCall(PetscMalloc1(nz, &val));
685     /* can not decide the exact mumps->nnz now because of the SBAIJ */
686     mumps->irn = row;
687     mumps->jcn = col;
688     mumps->val = mumps->val_alloc = val;
689   } else {
690     val = mumps->val;
691   }
692 
693   jj   = 0;
694   irow = rstart;
695   for (i = 0; i < mbs; i++) {
696     ajj    = aj + ai[i]; /* ptr to the beginning of this row */
697     countA = ai[i + 1] - ai[i];
698     countB = bi[i + 1] - bi[i];
699     bjj    = bj + bi[i];
700     v1     = av + ai[i] * bs2;
701     v2     = bv + bi[i] * bs2;
702 
703     if (bs > 1) {
704       /* A-part */
705       for (j = 0; j < countA; j++) {
706         for (k = 0; k < bs; k++) {
707           for (m = 0; m < bs; m++) {
708             if (rstart + ajj[j] * bs > irow || k >= m) {
709               if (reuse == MAT_INITIAL_MATRIX) {
710                 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj]));
711                 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] * bs + k + shift, &col[jj]));
712               }
713               val[jj++] = v1[j * bs2 + m + k * bs];
714             }
715           }
716         }
717       }
718 
719       /* B-part */
720       for (j = 0; j < countB; j++) {
721         for (k = 0; k < bs; k++) {
722           for (m = 0; m < bs; m++) {
723             if (reuse == MAT_INITIAL_MATRIX) {
724               PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj]));
725               PetscCall(PetscMUMPSIntCast(garray[bjj[j]] * bs + k + shift, &col[jj]));
726             }
727             val[jj++] = v2[j * bs2 + m + k * bs];
728           }
729         }
730       }
731     } else {
732       /* A-part */
733       for (j = 0; j < countA; j++) {
734         if (reuse == MAT_INITIAL_MATRIX) {
735           PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
736           PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj]));
737         }
738         val[jj++] = v1[j];
739       }
740 
741       /* B-part */
742       for (j = 0; j < countB; j++) {
743         if (reuse == MAT_INITIAL_MATRIX) {
744           PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
745           PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]));
746         }
747         val[jj++] = v2[j];
748       }
749     }
750     irow += bs;
751   }
752   if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = jj;
753   PetscFunctionReturn(PETSC_SUCCESS);
754 }
755 
756 static PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
757 {
758   const PetscInt    *ai, *aj, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj;
759   PetscCount         rstart, cstart, nz, i, j, jj, irow, countA, countB;
760   PetscMUMPSInt     *row, *col;
761   const PetscScalar *av, *bv, *v1, *v2;
762   PetscScalar       *val;
763   Mat                Ad, Ao;
764   Mat_SeqAIJ        *aa;
765   Mat_SeqAIJ        *bb;
766 
767   PetscFunctionBegin;
768   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray));
769   PetscCall(MatSeqAIJGetArrayRead(Ad, &av));
770   PetscCall(MatSeqAIJGetArrayRead(Ao, &bv));
771 
772   aa = (Mat_SeqAIJ *)Ad->data;
773   bb = (Mat_SeqAIJ *)Ao->data;
774   ai = aa->i;
775   aj = aa->j;
776   bi = bb->i;
777   bj = bb->j;
778 
779   rstart = A->rmap->rstart;
780   cstart = A->cmap->rstart;
781 
782   if (reuse == MAT_INITIAL_MATRIX) {
783     nz = (PetscCount)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */
784     PetscCall(PetscMalloc2(nz, &row, nz, &col));
785     PetscCall(PetscMalloc1(nz, &val));
786     mumps->nnz = nz;
787     mumps->irn = row;
788     mumps->jcn = col;
789     mumps->val = mumps->val_alloc = val;
790   } else {
791     val = mumps->val;
792   }
793 
794   jj   = 0;
795   irow = rstart;
796   for (i = 0; i < m; i++) {
797     ajj    = aj + ai[i]; /* ptr to the beginning of this row */
798     countA = ai[i + 1] - ai[i];
799     countB = bi[i + 1] - bi[i];
800     bjj    = bj + bi[i];
801     v1     = av + ai[i];
802     v2     = bv + bi[i];
803 
804     /* A-part */
805     for (j = 0; j < countA; j++) {
806       if (reuse == MAT_INITIAL_MATRIX) {
807         PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
808         PetscCall(PetscMUMPSIntCast(cstart + ajj[j] + shift, &col[jj]));
809       }
810       val[jj++] = v1[j];
811     }
812 
813     /* B-part */
814     for (j = 0; j < countB; j++) {
815       if (reuse == MAT_INITIAL_MATRIX) {
816         PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
817         PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]));
818       }
819       val[jj++] = v2[j];
820     }
821     irow++;
822   }
823   PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av));
824   PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv));
825   PetscFunctionReturn(PETSC_SUCCESS);
826 }
827 
828 static PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
829 {
830   Mat_MPIBAIJ       *mat = (Mat_MPIBAIJ *)A->data;
831   Mat_SeqBAIJ       *aa  = (Mat_SeqBAIJ *)mat->A->data;
832   Mat_SeqBAIJ       *bb  = (Mat_SeqBAIJ *)mat->B->data;
833   const PetscInt    *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j, *ajj, *bjj;
834   const PetscInt    *garray = mat->garray, mbs = mat->mbs, rstart = A->rmap->rstart, cstart = A->cmap->rstart;
835   const PetscInt     bs2 = mat->bs2;
836   PetscInt           bs;
837   PetscCount         nz, i, j, k, n, jj, irow, countA, countB, idx;
838   PetscMUMPSInt     *row, *col;
839   const PetscScalar *av = aa->a, *bv = bb->a, *v1, *v2;
840   PetscScalar       *val;
841 
842   PetscFunctionBegin;
843   PetscCall(MatGetBlockSize(A, &bs));
844   if (reuse == MAT_INITIAL_MATRIX) {
845     nz = bs2 * (aa->nz + bb->nz);
846     PetscCall(PetscMalloc2(nz, &row, nz, &col));
847     PetscCall(PetscMalloc1(nz, &val));
848     mumps->nnz = nz;
849     mumps->irn = row;
850     mumps->jcn = col;
851     mumps->val = mumps->val_alloc = val;
852   } else {
853     val = mumps->val;
854   }
855 
856   jj   = 0;
857   irow = rstart;
858   for (i = 0; i < mbs; i++) {
859     countA = ai[i + 1] - ai[i];
860     countB = bi[i + 1] - bi[i];
861     ajj    = aj + ai[i];
862     bjj    = bj + bi[i];
863     v1     = av + bs2 * ai[i];
864     v2     = bv + bs2 * bi[i];
865 
866     idx = 0;
867     /* A-part */
868     for (k = 0; k < countA; k++) {
869       for (j = 0; j < bs; j++) {
870         for (n = 0; n < bs; n++) {
871           if (reuse == MAT_INITIAL_MATRIX) {
872             PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj]));
873             PetscCall(PetscMUMPSIntCast(cstart + bs * ajj[k] + j + shift, &col[jj]));
874           }
875           val[jj++] = v1[idx++];
876         }
877       }
878     }
879 
880     idx = 0;
881     /* B-part */
882     for (k = 0; k < countB; k++) {
883       for (j = 0; j < bs; j++) {
884         for (n = 0; n < bs; n++) {
885           if (reuse == MAT_INITIAL_MATRIX) {
886             PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj]));
887             PetscCall(PetscMUMPSIntCast(bs * garray[bjj[k]] + j + shift, &col[jj]));
888           }
889           val[jj++] = v2[idx++];
890         }
891       }
892     }
893     irow += bs;
894   }
895   PetscFunctionReturn(PETSC_SUCCESS);
896 }
897 
898 static PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
899 {
900   const PetscInt    *ai, *aj, *adiag, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj;
901   PetscCount         rstart, nz, nza, nzb, i, j, jj, irow, countA, countB;
902   PetscMUMPSInt     *row, *col;
903   const PetscScalar *av, *bv, *v1, *v2;
904   PetscScalar       *val;
905   Mat                Ad, Ao;
906   Mat_SeqAIJ        *aa;
907   Mat_SeqAIJ        *bb;
908 #if defined(PETSC_USE_COMPLEX)
909   PetscBool hermitian, isset;
910 #endif
911 
912   PetscFunctionBegin;
913 #if defined(PETSC_USE_COMPLEX)
914   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
915   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
916 #endif
917   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray));
918   PetscCall(MatSeqAIJGetArrayRead(Ad, &av));
919   PetscCall(MatSeqAIJGetArrayRead(Ao, &bv));
920 
921   aa    = (Mat_SeqAIJ *)Ad->data;
922   bb    = (Mat_SeqAIJ *)Ao->data;
923   ai    = aa->i;
924   aj    = aa->j;
925   adiag = aa->diag;
926   bi    = bb->i;
927   bj    = bb->j;
928 
929   rstart = A->rmap->rstart;
930 
931   if (reuse == MAT_INITIAL_MATRIX) {
932     nza = 0; /* num of upper triangular entries in mat->A, including diagonals */
933     nzb = 0; /* num of upper triangular entries in mat->B */
934     for (i = 0; i < m; i++) {
935       nza += (ai[i + 1] - adiag[i]);
936       countB = bi[i + 1] - bi[i];
937       bjj    = bj + bi[i];
938       for (j = 0; j < countB; j++) {
939         if (garray[bjj[j]] > rstart) nzb++;
940       }
941     }
942 
943     nz = nza + nzb; /* total nz of upper triangular part of mat */
944     PetscCall(PetscMalloc2(nz, &row, nz, &col));
945     PetscCall(PetscMalloc1(nz, &val));
946     mumps->nnz = nz;
947     mumps->irn = row;
948     mumps->jcn = col;
949     mumps->val = mumps->val_alloc = val;
950   } else {
951     val = mumps->val;
952   }
953 
954   jj   = 0;
955   irow = rstart;
956   for (i = 0; i < m; i++) {
957     ajj    = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */
958     v1     = av + adiag[i];
959     countA = ai[i + 1] - adiag[i];
960     countB = bi[i + 1] - bi[i];
961     bjj    = bj + bi[i];
962     v2     = bv + bi[i];
963 
964     /* A-part */
965     for (j = 0; j < countA; j++) {
966       if (reuse == MAT_INITIAL_MATRIX) {
967         PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
968         PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj]));
969       }
970       val[jj++] = v1[j];
971     }
972 
973     /* B-part */
974     for (j = 0; j < countB; j++) {
975       if (garray[bjj[j]] > rstart) {
976         if (reuse == MAT_INITIAL_MATRIX) {
977           PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
978           PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]));
979         }
980         val[jj++] = v2[j];
981       }
982     }
983     irow++;
984   }
985   PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av));
986   PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv));
987   PetscFunctionReturn(PETSC_SUCCESS);
988 }
989 
990 static PetscErrorCode MatConvertToTriples_diagonal_xaij(Mat A, PETSC_UNUSED PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
991 {
992   const PetscScalar *av;
993   const PetscInt     M = A->rmap->n;
994   PetscCount         i;
995   PetscMUMPSInt     *row, *col;
996   Vec                v;
997 
998   PetscFunctionBegin;
999   PetscCall(MatDiagonalGetDiagonal(A, &v));
1000   PetscCall(VecGetArrayRead(v, &av));
1001   if (reuse == MAT_INITIAL_MATRIX) {
1002     PetscCall(PetscMalloc2(M, &row, M, &col));
1003     for (i = 0; i < M; i++) {
1004       PetscCall(PetscMUMPSIntCast(i + A->rmap->rstart, &row[i]));
1005       col[i] = row[i];
1006     }
1007     mumps->val = (PetscScalar *)av;
1008     mumps->irn = row;
1009     mumps->jcn = col;
1010     mumps->nnz = M;
1011   } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, av, M)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_diagonal_xaij(), so one needs to copy the memory */
1012   else mumps->val = (PetscScalar *)av;                                      /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */
1013   PetscCall(VecRestoreArrayRead(v, &av));
1014   PetscFunctionReturn(PETSC_SUCCESS);
1015 }
1016 
1017 static PetscErrorCode MatConvertToTriples_dense_xaij(Mat A, PETSC_UNUSED PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1018 {
1019   PetscScalar   *v;
1020   const PetscInt m = A->rmap->n, N = A->cmap->N;
1021   PetscInt       lda;
1022   PetscCount     i, j;
1023   PetscMUMPSInt *row, *col;
1024 
1025   PetscFunctionBegin;
1026   PetscCall(MatDenseGetArray(A, &v));
1027   PetscCall(MatDenseGetLDA(A, &lda));
1028   if (reuse == MAT_INITIAL_MATRIX) {
1029     PetscCall(PetscMalloc2(m * N, &row, m * N, &col));
1030     for (i = 0; i < m; i++) {
1031       col[i] = 0;
1032       PetscCall(PetscMUMPSIntCast(i + A->rmap->rstart, &row[i]));
1033     }
1034     for (j = 1; j < N; j++) {
1035       for (i = 0; i < m; i++) PetscCall(PetscMUMPSIntCast(j, col + i + m * j));
1036       PetscCall(PetscArraycpy(row + m * j, row + m * (j - 1), m));
1037     }
1038     if (lda == m) mumps->val = v;
1039     else {
1040       PetscCall(PetscMalloc1(m * N, &mumps->val));
1041       mumps->val_alloc = mumps->val;
1042       for (j = 0; j < N; j++) PetscCall(PetscArraycpy(mumps->val + m * j, v + lda * j, m));
1043     }
1044     mumps->irn = row;
1045     mumps->jcn = col;
1046     mumps->nnz = m * N;
1047   } else {
1048     if (lda == m && !mumps->nest_vals) mumps->val = v;
1049     else {
1050       for (j = 0; j < N; j++) PetscCall(PetscArraycpy(mumps->val + m * j, v + lda * j, m));
1051     }
1052   }
1053   PetscCall(MatDenseRestoreArray(A, &v));
1054   PetscFunctionReturn(PETSC_SUCCESS);
1055 }
1056 
1057 // If the input Mat (sub) is either MATTRANSPOSEVIRTUAL or MATHERMITIANTRANSPOSEVIRTUAL, this function gets the parent Mat until it is not a
1058 // MATTRANSPOSEVIRTUAL or MATHERMITIANTRANSPOSEVIRTUAL itself and returns the appropriate shift, scaling, and whether the parent Mat should be conjugated
1059 // and its rows and columns permuted
1060 // TODO FIXME: this should not be in this file and should instead be refactored where the same logic applies, e.g., MatAXPY_Dense_Nest()
1061 static PetscErrorCode MatGetTranspose_TransposeVirtual(Mat *sub, PetscBool *conjugate, PetscScalar *vshift, PetscScalar *vscale, PetscBool *swap)
1062 {
1063   Mat         A;
1064   PetscScalar s[2];
1065   PetscBool   isTrans, isHTrans, compare;
1066 
1067   PetscFunctionBegin;
1068   do {
1069     PetscCall(PetscObjectTypeCompare((PetscObject)*sub, MATTRANSPOSEVIRTUAL, &isTrans));
1070     if (isTrans) {
1071       PetscCall(MatTransposeGetMat(*sub, &A));
1072       isHTrans = PETSC_FALSE;
1073     } else {
1074       PetscCall(PetscObjectTypeCompare((PetscObject)*sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans));
1075       if (isHTrans) PetscCall(MatHermitianTransposeGetMat(*sub, &A));
1076     }
1077     compare = (PetscBool)(isTrans || isHTrans);
1078     if (compare) {
1079       if (vshift && vscale) {
1080         PetscCall(MatShellGetScalingShifts(*sub, s, s + 1, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
1081         if (!*conjugate) {
1082           *vshift += s[0] * *vscale;
1083           *vscale *= s[1];
1084         } else {
1085           *vshift += PetscConj(s[0]) * *vscale;
1086           *vscale *= PetscConj(s[1]);
1087         }
1088       }
1089       if (swap) *swap = (PetscBool)!*swap;
1090       if (isHTrans && conjugate) *conjugate = (PetscBool)!*conjugate;
1091       *sub = A;
1092     }
1093   } while (compare);
1094   PetscFunctionReturn(PETSC_SUCCESS);
1095 }
1096 
1097 static PetscErrorCode MatConvertToTriples_nest_xaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1098 {
1099   Mat     **mats;
1100   PetscInt  nr, nc;
1101   PetscBool chol = mumps->sym ? PETSC_TRUE : PETSC_FALSE;
1102 
1103   PetscFunctionBegin;
1104   PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats));
1105   if (reuse == MAT_INITIAL_MATRIX) {
1106     PetscMUMPSInt *irns, *jcns;
1107     PetscScalar   *vals;
1108     PetscCount     totnnz, cumnnz, maxnnz;
1109     PetscInt      *pjcns_w;
1110     IS            *rows, *cols;
1111     PetscInt     **rows_idx, **cols_idx;
1112 
1113     cumnnz = 0;
1114     maxnnz = 0;
1115     PetscCall(PetscMalloc2(nr * nc + 1, &mumps->nest_vals_start, nr * nc, &mumps->nest_convert_to_triples));
1116     for (PetscInt r = 0; r < nr; r++) {
1117       for (PetscInt c = 0; c < nc; c++) {
1118         Mat sub = mats[r][c];
1119 
1120         mumps->nest_convert_to_triples[r * nc + c] = NULL;
1121         if (chol && c < r) continue; /* skip lower-triangular block for Cholesky */
1122         if (sub) {
1123           PetscErrorCode (*convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *) = NULL;
1124           PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isDiag, isDense;
1125           MatInfo   info;
1126 
1127           PetscCall(MatGetTranspose_TransposeVirtual(&sub, NULL, NULL, NULL, NULL));
1128           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ));
1129           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ));
1130           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ));
1131           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ));
1132           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ));
1133           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ));
1134           PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATDIAGONAL, &isDiag));
1135           PetscCall(PetscObjectTypeCompareAny((PetscObject)sub, &isDense, MATSEQDENSE, MATMPIDENSE, NULL));
1136 
1137           if (chol) {
1138             if (r == c) {
1139               if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqsbaij;
1140               else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpisbaij;
1141               else if (isSeqSBAIJ) convert_to_triples = MatConvertToTriples_seqsbaij_seqsbaij;
1142               else if (isMPISBAIJ) convert_to_triples = MatConvertToTriples_mpisbaij_mpisbaij;
1143               else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij;
1144               else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij;
1145             } else {
1146               if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij;
1147               else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij;
1148               else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij;
1149               else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij;
1150               else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij;
1151               else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij;
1152             }
1153           } else {
1154             if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij;
1155             else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij;
1156             else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij;
1157             else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij;
1158             else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij;
1159             else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij;
1160           }
1161           PetscCheck(convert_to_triples, PetscObjectComm((PetscObject)sub), PETSC_ERR_SUP, "Not for block of type %s", ((PetscObject)sub)->type_name);
1162           mumps->nest_convert_to_triples[r * nc + c] = convert_to_triples;
1163           PetscCall(MatGetInfo(sub, MAT_LOCAL, &info));
1164           cumnnz += (PetscCount)info.nz_used; /* can be overestimated for Cholesky */
1165           maxnnz = PetscMax(maxnnz, info.nz_used);
1166         }
1167       }
1168     }
1169 
1170     /* Allocate total COO */
1171     totnnz = cumnnz;
1172     PetscCall(PetscMalloc2(totnnz, &irns, totnnz, &jcns));
1173     PetscCall(PetscMalloc1(totnnz, &vals));
1174 
1175     /* Handle rows and column maps
1176        We directly map rows and use an SF for the columns */
1177     PetscCall(PetscMalloc4(nr, &rows, nc, &cols, nr, &rows_idx, nc, &cols_idx));
1178     PetscCall(MatNestGetISs(A, rows, cols));
1179     for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetIndices(rows[r], (const PetscInt **)&rows_idx[r]));
1180     for (PetscInt c = 0; c < nc; c++) PetscCall(ISGetIndices(cols[c], (const PetscInt **)&cols_idx[c]));
1181     if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscMalloc1(maxnnz, &pjcns_w));
1182     else (void)maxnnz;
1183 
1184     cumnnz = 0;
1185     for (PetscInt r = 0; r < nr; r++) {
1186       for (PetscInt c = 0; c < nc; c++) {
1187         Mat             sub    = mats[r][c];
1188         const PetscInt *ridx   = rows_idx[r];
1189         const PetscInt *cidx   = cols_idx[c];
1190         PetscScalar     vscale = 1.0, vshift = 0.0;
1191         PetscInt        rst;
1192         PetscSF         csf;
1193         PetscBool       conjugate = PETSC_FALSE, swap = PETSC_FALSE;
1194         PetscLayout     cmap;
1195         PetscInt        innz;
1196 
1197         mumps->nest_vals_start[r * nc + c] = cumnnz;
1198         if (!mumps->nest_convert_to_triples[r * nc + c]) continue;
1199 
1200         /* Extract inner blocks if needed */
1201         PetscCall(MatGetTranspose_TransposeVirtual(&sub, &conjugate, &vshift, &vscale, &swap));
1202         PetscCheck(vshift == 0.0, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Nonzero shift in parent MatShell");
1203 
1204         /* Get column layout to map off-process columns */
1205         PetscCall(MatGetLayouts(sub, NULL, &cmap));
1206 
1207         /* Get row start to map on-process rows */
1208         PetscCall(MatGetOwnershipRange(sub, &rst, NULL));
1209 
1210         /* Directly use the mumps datastructure and use C ordering for now */
1211         PetscCall((*mumps->nest_convert_to_triples[r * nc + c])(sub, 0, MAT_INITIAL_MATRIX, mumps));
1212 
1213         /* Swap the role of rows and columns indices for transposed blocks
1214            since we need values with global final ordering */
1215         if (swap) {
1216           cidx = rows_idx[r];
1217           ridx = cols_idx[c];
1218         }
1219 
1220         /* Communicate column indices
1221            This could have been done with a single SF but it would have complicated the code a lot.
1222            But since we do it only once, we pay the price of setting up an SF for each block */
1223         if (PetscDefined(USE_64BIT_INDICES)) {
1224           for (PetscInt k = 0; k < mumps->nnz; k++) pjcns_w[k] = mumps->jcn[k];
1225         } else pjcns_w = (PetscInt *)mumps->jcn; /* This cast is needed only to silence warnings for 64bit integers builds */
1226         PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &csf));
1227         PetscCall(PetscIntCast(mumps->nnz, &innz));
1228         PetscCall(PetscSFSetGraphLayout(csf, cmap, innz, NULL, PETSC_OWN_POINTER, pjcns_w));
1229         PetscCall(PetscSFBcastBegin(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE));
1230         PetscCall(PetscSFBcastEnd(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE));
1231         PetscCall(PetscSFDestroy(&csf));
1232 
1233         /* Import indices: use direct map for rows and mapped indices for columns */
1234         if (swap) {
1235           for (PetscInt k = 0; k < mumps->nnz; k++) {
1236             PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &jcns[cumnnz + k]));
1237             PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &irns[cumnnz + k]));
1238           }
1239         } else {
1240           for (PetscInt k = 0; k < mumps->nnz; k++) {
1241             PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &irns[cumnnz + k]));
1242             PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &jcns[cumnnz + k]));
1243           }
1244         }
1245 
1246         /* Import values to full COO */
1247         if (conjugate) { /* conjugate the entries */
1248           PetscScalar *v = vals + cumnnz;
1249           for (PetscInt k = 0; k < mumps->nnz; k++) v[k] = vscale * PetscConj(mumps->val[k]);
1250         } else if (vscale != 1.0) {
1251           PetscScalar *v = vals + cumnnz;
1252           for (PetscInt k = 0; k < mumps->nnz; k++) v[k] = vscale * mumps->val[k];
1253         } else PetscCall(PetscArraycpy(vals + cumnnz, mumps->val, mumps->nnz));
1254 
1255         /* Shift new starting point and sanity check */
1256         cumnnz += mumps->nnz;
1257         PetscCheck(cumnnz <= totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected number of nonzeros %" PetscCount_FMT " != %" PetscCount_FMT, cumnnz, totnnz);
1258 
1259         /* Free scratch memory */
1260         PetscCall(PetscFree2(mumps->irn, mumps->jcn));
1261         PetscCall(PetscFree(mumps->val_alloc));
1262         mumps->val = NULL;
1263         mumps->nnz = 0;
1264       }
1265     }
1266     if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscFree(pjcns_w));
1267     for (PetscInt r = 0; r < nr; r++) PetscCall(ISRestoreIndices(rows[r], (const PetscInt **)&rows_idx[r]));
1268     for (PetscInt c = 0; c < nc; c++) PetscCall(ISRestoreIndices(cols[c], (const PetscInt **)&cols_idx[c]));
1269     PetscCall(PetscFree4(rows, cols, rows_idx, cols_idx));
1270     if (!chol) PetscCheck(cumnnz == totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different number of nonzeros %" PetscCount_FMT " != %" PetscCount_FMT, cumnnz, totnnz);
1271     mumps->nest_vals_start[nr * nc] = cumnnz;
1272 
1273     /* Set pointers for final MUMPS data structure */
1274     mumps->nest_vals = vals;
1275     mumps->val_alloc = NULL; /* do not use val_alloc since it may be reallocated with the OMP callpath */
1276     mumps->val       = vals;
1277     mumps->irn       = irns;
1278     mumps->jcn       = jcns;
1279     mumps->nnz       = cumnnz;
1280   } else {
1281     PetscScalar *oval = mumps->nest_vals;
1282     for (PetscInt r = 0; r < nr; r++) {
1283       for (PetscInt c = 0; c < nc; c++) {
1284         PetscBool   conjugate = PETSC_FALSE;
1285         Mat         sub       = mats[r][c];
1286         PetscScalar vscale = 1.0, vshift = 0.0;
1287         PetscInt    midx = r * nc + c;
1288 
1289         if (!mumps->nest_convert_to_triples[midx]) continue;
1290         PetscCall(MatGetTranspose_TransposeVirtual(&sub, &conjugate, &vshift, &vscale, NULL));
1291         PetscCheck(vshift == 0.0, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Nonzero shift in parent MatShell");
1292         mumps->val = oval + mumps->nest_vals_start[midx];
1293         PetscCall((*mumps->nest_convert_to_triples[midx])(sub, shift, MAT_REUSE_MATRIX, mumps));
1294         if (conjugate) {
1295           PetscCount nnz = mumps->nest_vals_start[midx + 1] - mumps->nest_vals_start[midx];
1296           for (PetscCount k = 0; k < nnz; k++) mumps->val[k] = vscale * PetscConj(mumps->val[k]);
1297         } else if (vscale != 1.0) {
1298           PetscCount nnz = mumps->nest_vals_start[midx + 1] - mumps->nest_vals_start[midx];
1299           for (PetscCount k = 0; k < nnz; k++) mumps->val[k] *= vscale;
1300         }
1301       }
1302     }
1303     mumps->val = oval;
1304   }
1305   PetscFunctionReturn(PETSC_SUCCESS);
1306 }
1307 
1308 static PetscErrorCode MatDestroy_MUMPS(Mat A)
1309 {
1310   Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;
1311 
1312   PetscFunctionBegin;
1313   PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc));
1314   PetscCall(VecScatterDestroy(&mumps->scat_rhs));
1315   PetscCall(VecScatterDestroy(&mumps->scat_sol));
1316   PetscCall(VecDestroy(&mumps->b_seq));
1317   PetscCall(VecDestroy(&mumps->x_seq));
1318   PetscCall(PetscFree(mumps->id.perm_in));
1319   PetscCall(PetscFree2(mumps->irn, mumps->jcn));
1320   PetscCall(PetscFree(mumps->val_alloc));
1321   PetscCall(PetscFree(mumps->info));
1322   PetscCall(PetscFree(mumps->ICNTL_pre));
1323   PetscCall(PetscFree(mumps->CNTL_pre));
1324   PetscCall(MatMumpsResetSchur_Private(mumps));
1325   if (mumps->id.job != JOB_NULL) { /* cannot call PetscMUMPS_c() if JOB_INIT has never been called for this instance */
1326     mumps->id.job = JOB_END;
1327     PetscMUMPS_c(mumps);
1328     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in termination: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
1329     if (mumps->mumps_comm != MPI_COMM_NULL) {
1330       if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) PetscCallMPI(MPI_Comm_free(&mumps->mumps_comm));
1331       else PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A), &mumps->mumps_comm));
1332     }
1333   }
1334 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1335   if (mumps->use_petsc_omp_support) {
1336     PetscCall(PetscOmpCtrlDestroy(&mumps->omp_ctrl));
1337     PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf));
1338     PetscCall(PetscFree3(mumps->rhs_nrow, mumps->rhs_recvcounts, mumps->rhs_disps));
1339   }
1340 #endif
1341   PetscCall(PetscFree(mumps->ia_alloc));
1342   PetscCall(PetscFree(mumps->ja_alloc));
1343   PetscCall(PetscFree(mumps->recvcount));
1344   PetscCall(PetscFree(mumps->reqs));
1345   PetscCall(PetscFree(mumps->irhs_loc));
1346   PetscCall(PetscFree2(mumps->nest_vals_start, mumps->nest_convert_to_triples));
1347   PetscCall(PetscFree(mumps->nest_vals));
1348   PetscCall(PetscFree(A->data));
1349 
1350   /* clear composed functions */
1351   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
1352   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorSetSchurIS_C", NULL));
1353   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorCreateSchurComplement_C", NULL));
1354   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetIcntl_C", NULL));
1355   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetIcntl_C", NULL));
1356   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetCntl_C", NULL));
1357   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetCntl_C", NULL));
1358   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfo_C", NULL));
1359   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfog_C", NULL));
1360   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfo_C", NULL));
1361   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfog_C", NULL));
1362   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetNullPivots_C", NULL));
1363   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverse_C", NULL));
1364   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverseTranspose_C", NULL));
1365   PetscFunctionReturn(PETSC_SUCCESS);
1366 }
1367 
1368 /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */
1369 static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A, PetscInt nrhs, const PetscScalar *array)
1370 {
1371   Mat_MUMPS        *mumps   = (Mat_MUMPS *)A->data;
1372   const PetscMPIInt ompsize = mumps->omp_comm_size;
1373   PetscInt          i, m, M, rstart;
1374 
1375   PetscFunctionBegin;
1376   PetscCall(MatGetSize(A, &M, NULL));
1377   PetscCall(MatGetLocalSize(A, &m, NULL));
1378   PetscCheck(M <= PETSC_MUMPS_INT_MAX, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt");
1379   if (ompsize == 1) {
1380     if (!mumps->irhs_loc) {
1381       mumps->nloc_rhs = (PetscMUMPSInt)m;
1382       PetscCall(PetscMalloc1(m, &mumps->irhs_loc));
1383       PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
1384       for (i = 0; i < m; i++) PetscCall(PetscMUMPSIntCast(rstart + i + 1, &mumps->irhs_loc[i])); /* use 1-based indices */
1385     }
1386     mumps->id.rhs_loc = (MumpsScalar *)array;
1387   } else {
1388 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1389     const PetscInt *ranges;
1390     PetscMPIInt     j, k, sendcount, *petsc_ranks, *omp_ranks;
1391     MPI_Group       petsc_group, omp_group;
1392     PetscScalar    *recvbuf = NULL;
1393 
1394     if (mumps->is_omp_master) {
1395       /* Lazily initialize the omp stuff for distributed rhs */
1396       if (!mumps->irhs_loc) {
1397         PetscCall(PetscMalloc2(ompsize, &omp_ranks, ompsize, &petsc_ranks));
1398         PetscCall(PetscMalloc3(ompsize, &mumps->rhs_nrow, ompsize, &mumps->rhs_recvcounts, ompsize, &mumps->rhs_disps));
1399         PetscCallMPI(MPI_Comm_group(mumps->petsc_comm, &petsc_group));
1400         PetscCallMPI(MPI_Comm_group(mumps->omp_comm, &omp_group));
1401         for (j = 0; j < ompsize; j++) omp_ranks[j] = j;
1402         PetscCallMPI(MPI_Group_translate_ranks(omp_group, ompsize, omp_ranks, petsc_group, petsc_ranks));
1403 
1404         /* Populate mumps->irhs_loc[], rhs_nrow[] */
1405         mumps->nloc_rhs = 0;
1406         PetscCall(MatGetOwnershipRanges(A, &ranges));
1407         for (j = 0; j < ompsize; j++) {
1408           mumps->rhs_nrow[j] = ranges[petsc_ranks[j] + 1] - ranges[petsc_ranks[j]];
1409           mumps->nloc_rhs += mumps->rhs_nrow[j];
1410         }
1411         PetscCall(PetscMalloc1(mumps->nloc_rhs, &mumps->irhs_loc));
1412         for (j = k = 0; j < ompsize; j++) {
1413           for (i = ranges[petsc_ranks[j]]; i < ranges[petsc_ranks[j] + 1]; i++, k++) mumps->irhs_loc[k] = i + 1; /* uses 1-based indices */
1414         }
1415 
1416         PetscCall(PetscFree2(omp_ranks, petsc_ranks));
1417         PetscCallMPI(MPI_Group_free(&petsc_group));
1418         PetscCallMPI(MPI_Group_free(&omp_group));
1419       }
1420 
1421       /* Realloc buffers when current nrhs is bigger than what we have met */
1422       if (nrhs > mumps->max_nrhs) {
1423         PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf));
1424         PetscCall(PetscMalloc2(mumps->nloc_rhs * nrhs, &mumps->rhs_loc, mumps->nloc_rhs * nrhs, &mumps->rhs_recvbuf));
1425         mumps->max_nrhs = nrhs;
1426       }
1427 
1428       /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */
1429       for (j = 0; j < ompsize; j++) PetscCall(PetscMPIIntCast(mumps->rhs_nrow[j] * nrhs, &mumps->rhs_recvcounts[j]));
1430       mumps->rhs_disps[0] = 0;
1431       for (j = 1; j < ompsize; j++) {
1432         mumps->rhs_disps[j] = mumps->rhs_disps[j - 1] + mumps->rhs_recvcounts[j - 1];
1433         PetscCheck(mumps->rhs_disps[j] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscMPIInt overflow!");
1434       }
1435       recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */
1436     }
1437 
1438     PetscCall(PetscMPIIntCast(m * nrhs, &sendcount));
1439     PetscCallMPI(MPI_Gatherv(array, sendcount, MPIU_SCALAR, recvbuf, mumps->rhs_recvcounts, mumps->rhs_disps, MPIU_SCALAR, 0, mumps->omp_comm));
1440 
1441     if (mumps->is_omp_master) {
1442       if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */
1443         PetscScalar *dst, *dstbase = mumps->rhs_loc;
1444         for (j = 0; j < ompsize; j++) {
1445           const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j];
1446           dst                    = dstbase;
1447           for (i = 0; i < nrhs; i++) {
1448             PetscCall(PetscArraycpy(dst, src, mumps->rhs_nrow[j]));
1449             src += mumps->rhs_nrow[j];
1450             dst += mumps->nloc_rhs;
1451           }
1452           dstbase += mumps->rhs_nrow[j];
1453         }
1454       }
1455       mumps->id.rhs_loc = (MumpsScalar *)mumps->rhs_loc;
1456     }
1457 #endif /* PETSC_HAVE_OPENMP_SUPPORT */
1458   }
1459   mumps->id.nrhs     = (PetscMUMPSInt)nrhs;
1460   mumps->id.nloc_rhs = (PetscMUMPSInt)mumps->nloc_rhs;
1461   mumps->id.lrhs_loc = mumps->nloc_rhs;
1462   mumps->id.irhs_loc = mumps->irhs_loc;
1463   PetscFunctionReturn(PETSC_SUCCESS);
1464 }
1465 
1466 static PetscErrorCode MatSolve_MUMPS(Mat A, Vec b, Vec x)
1467 {
1468   Mat_MUMPS         *mumps  = (Mat_MUMPS *)A->data;
1469   const PetscScalar *rarray = NULL;
1470   PetscScalar       *array;
1471   IS                 is_iden, is_petsc;
1472   PetscInt           i;
1473   PetscBool          second_solve = PETSC_FALSE;
1474   static PetscBool   cite1 = PETSC_FALSE, cite2 = PETSC_FALSE;
1475 
1476   PetscFunctionBegin;
1477   PetscCall(PetscCitationsRegister("@article{MUMPS01,\n  author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n  title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n  journal = {SIAM "
1478                                    "Journal on Matrix Analysis and Applications},\n  volume = {23},\n  number = {1},\n  pages = {15--41},\n  year = {2001}\n}\n",
1479                                    &cite1));
1480   PetscCall(PetscCitationsRegister("@article{MUMPS02,\n  author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n  title = {Hybrid scheduling for the parallel solution of linear systems},\n  journal = {Parallel "
1481                                    "Computing},\n  volume = {32},\n  number = {2},\n  pages = {136--156},\n  year = {2006}\n}\n",
1482                                    &cite2));
1483 
1484   PetscCall(VecFlag(x, A->factorerrortype));
1485   if (A->factorerrortype) {
1486     PetscCall(PetscInfo(A, "MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1487     PetscFunctionReturn(PETSC_SUCCESS);
1488   }
1489 
1490   mumps->id.nrhs = 1;
1491   if (mumps->petsc_size > 1) {
1492     if (mumps->ICNTL20 == 10) {
1493       mumps->id.ICNTL(20) = 10; /* dense distributed RHS */
1494       PetscCall(VecGetArrayRead(b, &rarray));
1495       PetscCall(MatMumpsSetUpDistRHSInfo(A, 1, rarray));
1496     } else {
1497       mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential rhs vector*/
1498       PetscCall(VecScatterBegin(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD));
1499       PetscCall(VecScatterEnd(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD));
1500       if (!mumps->myid) {
1501         PetscCall(VecGetArray(mumps->b_seq, &array));
1502         mumps->id.rhs = (MumpsScalar *)array;
1503       }
1504     }
1505   } else {                   /* petsc_size == 1 */
1506     mumps->id.ICNTL(20) = 0; /* dense centralized RHS */
1507     PetscCall(VecCopy(b, x));
1508     PetscCall(VecGetArray(x, &array));
1509     mumps->id.rhs = (MumpsScalar *)array;
1510   }
1511 
1512   /*
1513      handle condensation step of Schur complement (if any)
1514      We set by default ICNTL(26) == -1 when Schur indices have been provided by the user.
1515      According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase
1516      Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system.
1517      This requires an extra call to PetscMUMPS_c and the computation of the factors for S
1518   */
1519   if (mumps->id.size_schur > 0) {
1520     PetscCheck(mumps->petsc_size <= 1, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc");
1521     if (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2) {
1522       second_solve = PETSC_TRUE;
1523       PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE));
1524       mumps->id.ICNTL(26) = 1; /* condensation phase */
1525     } else if (mumps->id.ICNTL(26) == 1) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE));
1526   }
1527   /* solve phase */
1528   mumps->id.job = JOB_SOLVE;
1529   PetscMUMPS_c(mumps);
1530   PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
1531 
1532   /* handle expansion step of Schur complement (if any) */
1533   if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE));
1534   else if (mumps->id.ICNTL(26) == 1) {
1535     PetscCall(MatMumpsSolveSchur_Private(A));
1536     for (i = 0; i < mumps->id.size_schur; ++i) {
1537 #if !defined(PETSC_USE_COMPLEX)
1538       PetscScalar val = mumps->id.redrhs[i];
1539 #else
1540       PetscScalar val = mumps->id.redrhs[i].r + PETSC_i * mumps->id.redrhs[i].i;
1541 #endif
1542       array[mumps->id.listvar_schur[i] - 1] = val;
1543     }
1544   }
1545 
1546   if (mumps->petsc_size > 1) { /* convert mumps distributed solution to PETSc mpi x */
1547     if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) {
1548       /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */
1549       PetscCall(VecScatterDestroy(&mumps->scat_sol));
1550     }
1551     if (!mumps->scat_sol) { /* create scatter scat_sol */
1552       PetscInt *isol2_loc = NULL;
1553       PetscCall(ISCreateStride(PETSC_COMM_SELF, mumps->id.lsol_loc, 0, 1, &is_iden)); /* from */
1554       PetscCall(PetscMalloc1(mumps->id.lsol_loc, &isol2_loc));
1555       for (i = 0; i < mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i] - 1;                        /* change Fortran style to C style */
1556       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, mumps->id.lsol_loc, isol2_loc, PETSC_OWN_POINTER, &is_petsc)); /* to */
1557       PetscCall(VecScatterCreate(mumps->x_seq, is_iden, x, is_petsc, &mumps->scat_sol));
1558       PetscCall(ISDestroy(&is_iden));
1559       PetscCall(ISDestroy(&is_petsc));
1560       mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */
1561     }
1562 
1563     PetscCall(VecScatterBegin(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD));
1564     PetscCall(VecScatterEnd(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD));
1565   }
1566 
1567   if (mumps->petsc_size > 1) {
1568     if (mumps->ICNTL20 == 10) {
1569       PetscCall(VecRestoreArrayRead(b, &rarray));
1570     } else if (!mumps->myid) {
1571       PetscCall(VecRestoreArray(mumps->b_seq, &array));
1572     }
1573   } else PetscCall(VecRestoreArray(x, &array));
1574 
1575   PetscCall(PetscLogFlops(2.0 * PetscMax(0, (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n)));
1576   PetscFunctionReturn(PETSC_SUCCESS);
1577 }
1578 
1579 static PetscErrorCode MatSolveTranspose_MUMPS(Mat A, Vec b, Vec x)
1580 {
1581   Mat_MUMPS          *mumps = (Mat_MUMPS *)A->data;
1582   const PetscMUMPSInt value = mumps->id.ICNTL(9);
1583 
1584   PetscFunctionBegin;
1585   mumps->id.ICNTL(9) = 0;
1586   PetscCall(MatSolve_MUMPS(A, b, x));
1587   mumps->id.ICNTL(9) = value;
1588   PetscFunctionReturn(PETSC_SUCCESS);
1589 }
1590 
1591 static PetscErrorCode MatMatSolve_MUMPS(Mat A, Mat B, Mat X)
1592 {
1593   Mat                Bt = NULL;
1594   PetscBool          denseX, denseB, flg, flgT;
1595   Mat_MUMPS         *mumps = (Mat_MUMPS *)A->data;
1596   PetscInt           i, nrhs, M, nrhsM;
1597   PetscScalar       *array;
1598   const PetscScalar *rbray;
1599   PetscInt           lsol_loc, nlsol_loc, *idxx, iidx = 0;
1600   PetscMUMPSInt     *isol_loc, *isol_loc_save;
1601   PetscScalar       *bray, *sol_loc, *sol_loc_save;
1602   IS                 is_to, is_from;
1603   PetscInt           k, proc, j, m, myrstart;
1604   const PetscInt    *rstart;
1605   Vec                v_mpi, msol_loc;
1606   VecScatter         scat_sol;
1607   Vec                b_seq;
1608   VecScatter         scat_rhs;
1609   PetscScalar       *aa;
1610   PetscInt           spnr, *ia, *ja;
1611   Mat_MPIAIJ        *b = NULL;
1612 
1613   PetscFunctionBegin;
1614   PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &denseX, MATSEQDENSE, MATMPIDENSE, NULL));
1615   PetscCheck(denseX, PetscObjectComm((PetscObject)X), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix");
1616 
1617   PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &denseB, MATSEQDENSE, MATMPIDENSE, NULL));
1618   if (denseB) {
1619     PetscCheck(B->rmap->n == X->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Matrix B and X must have same row distribution");
1620     mumps->id.ICNTL(20) = 0; /* dense RHS */
1621   } else {                   /* sparse B */
1622     PetscCheck(X != B, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_IDN, "X and B must be different matrices");
1623     PetscCall(PetscObjectTypeCompare((PetscObject)B, MATTRANSPOSEVIRTUAL, &flgT));
1624     PetscCheck(flgT, PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_WRONG, "Matrix B must be MATTRANSPOSEVIRTUAL matrix");
1625     PetscCall(MatShellGetScalingShifts(B, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
1626     /* input B is transpose of actual RHS matrix,
1627      because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */
1628     PetscCall(MatTransposeGetMat(B, &Bt));
1629     mumps->id.ICNTL(20) = 1; /* sparse RHS */
1630   }
1631 
1632   PetscCall(MatGetSize(B, &M, &nrhs));
1633   PetscCall(PetscIntMultError(nrhs, M, &nrhsM));
1634   mumps->id.nrhs = (PetscMUMPSInt)nrhs;
1635   mumps->id.lrhs = (PetscMUMPSInt)M;
1636   mumps->id.rhs  = NULL;
1637 
1638   if (mumps->petsc_size == 1) {
1639     PetscScalar *aa;
1640     PetscInt     spnr, *ia, *ja;
1641     PetscBool    second_solve = PETSC_FALSE;
1642 
1643     PetscCall(MatDenseGetArray(X, &array));
1644     mumps->id.rhs = (MumpsScalar *)array;
1645 
1646     if (denseB) {
1647       /* copy B to X */
1648       PetscCall(MatDenseGetArrayRead(B, &rbray));
1649       PetscCall(PetscArraycpy(array, rbray, nrhsM));
1650       PetscCall(MatDenseRestoreArrayRead(B, &rbray));
1651     } else { /* sparse B */
1652       PetscCall(MatSeqAIJGetArray(Bt, &aa));
1653       PetscCall(MatGetRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
1654       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
1655       PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs));
1656       mumps->id.rhs_sparse = (MumpsScalar *)aa;
1657     }
1658     /* handle condensation step of Schur complement (if any) */
1659     if (mumps->id.size_schur > 0) {
1660       if (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2) {
1661         second_solve = PETSC_TRUE;
1662         PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE));
1663         mumps->id.ICNTL(26) = 1; /* condensation phase */
1664       } else if (mumps->id.ICNTL(26) == 1) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE));
1665     }
1666     /* solve phase */
1667     mumps->id.job = JOB_SOLVE;
1668     PetscMUMPS_c(mumps);
1669     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
1670 
1671     /* handle expansion step of Schur complement (if any) */
1672     if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE));
1673     else if (mumps->id.ICNTL(26) == 1) {
1674       PetscCall(MatMumpsSolveSchur_Private(A));
1675       for (j = 0; j < nrhs; ++j)
1676         for (i = 0; i < mumps->id.size_schur; ++i) {
1677 #if !defined(PETSC_USE_COMPLEX)
1678           PetscScalar val = mumps->id.redrhs[i + j * mumps->id.lredrhs];
1679 #else
1680           PetscScalar val = mumps->id.redrhs[i + j * mumps->id.lredrhs].r + PETSC_i * mumps->id.redrhs[i + j * mumps->id.lredrhs].i;
1681 #endif
1682           array[mumps->id.listvar_schur[i] - 1 + j * M] = val;
1683         }
1684     }
1685     if (!denseB) { /* sparse B */
1686       PetscCall(MatSeqAIJRestoreArray(Bt, &aa));
1687       PetscCall(MatRestoreRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
1688       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure");
1689     }
1690     PetscCall(MatDenseRestoreArray(X, &array));
1691     PetscFunctionReturn(PETSC_SUCCESS);
1692   }
1693 
1694   /* parallel case: MUMPS requires rhs B to be centralized on the host! */
1695   PetscCheck(!mumps->id.ICNTL(19), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc");
1696 
1697   /* create msol_loc to hold mumps local solution */
1698   isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */
1699   sol_loc_save  = (PetscScalar *)mumps->id.sol_loc;
1700 
1701   lsol_loc = mumps->id.lsol_loc;
1702   PetscCall(PetscIntMultError(nrhs, lsol_loc, &nlsol_loc)); /* length of sol_loc */
1703   PetscCall(PetscMalloc2(nlsol_loc, &sol_loc, lsol_loc, &isol_loc));
1704   mumps->id.sol_loc  = (MumpsScalar *)sol_loc;
1705   mumps->id.isol_loc = isol_loc;
1706 
1707   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nlsol_loc, (PetscScalar *)sol_loc, &msol_loc));
1708 
1709   if (denseB) {
1710     if (mumps->ICNTL20 == 10) {
1711       mumps->id.ICNTL(20) = 10; /* dense distributed RHS */
1712       PetscCall(MatDenseGetArrayRead(B, &rbray));
1713       PetscCall(MatMumpsSetUpDistRHSInfo(A, nrhs, rbray));
1714       PetscCall(MatDenseRestoreArrayRead(B, &rbray));
1715       PetscCall(MatGetLocalSize(B, &m, NULL));
1716       PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhsM, NULL, &v_mpi));
1717     } else {
1718       mumps->id.ICNTL(20) = 0; /* dense centralized RHS */
1719       /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in
1720         very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank
1721         0, re-arrange B into desired order, which is a local operation.
1722       */
1723 
1724       /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */
1725       /* wrap dense rhs matrix B into a vector v_mpi */
1726       PetscCall(MatGetLocalSize(B, &m, NULL));
1727       PetscCall(MatDenseGetArray(B, &bray));
1728       PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhsM, (const PetscScalar *)bray, &v_mpi));
1729       PetscCall(MatDenseRestoreArray(B, &bray));
1730 
1731       /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */
1732       if (!mumps->myid) {
1733         PetscInt *idx;
1734         /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */
1735         PetscCall(PetscMalloc1(nrhsM, &idx));
1736         PetscCall(MatGetOwnershipRanges(B, &rstart));
1737         for (proc = 0, k = 0; proc < mumps->petsc_size; proc++) {
1738           for (j = 0; j < nrhs; j++) {
1739             for (i = rstart[proc]; i < rstart[proc + 1]; i++) idx[k++] = j * M + i;
1740           }
1741         }
1742 
1743         PetscCall(VecCreateSeq(PETSC_COMM_SELF, nrhsM, &b_seq));
1744         PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nrhsM, idx, PETSC_OWN_POINTER, &is_to));
1745         PetscCall(ISCreateStride(PETSC_COMM_SELF, nrhsM, 0, 1, &is_from));
1746       } else {
1747         PetscCall(VecCreateSeq(PETSC_COMM_SELF, 0, &b_seq));
1748         PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_to));
1749         PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_from));
1750       }
1751       PetscCall(VecScatterCreate(v_mpi, is_from, b_seq, is_to, &scat_rhs));
1752       PetscCall(VecScatterBegin(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD));
1753       PetscCall(ISDestroy(&is_to));
1754       PetscCall(ISDestroy(&is_from));
1755       PetscCall(VecScatterEnd(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD));
1756 
1757       if (!mumps->myid) { /* define rhs on the host */
1758         PetscCall(VecGetArray(b_seq, &bray));
1759         mumps->id.rhs = (MumpsScalar *)bray;
1760         PetscCall(VecRestoreArray(b_seq, &bray));
1761       }
1762     }
1763   } else { /* sparse B */
1764     b = (Mat_MPIAIJ *)Bt->data;
1765 
1766     /* wrap dense X into a vector v_mpi */
1767     PetscCall(MatGetLocalSize(X, &m, NULL));
1768     PetscCall(MatDenseGetArray(X, &bray));
1769     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)X), 1, nrhs * m, nrhsM, (const PetscScalar *)bray, &v_mpi));
1770     PetscCall(MatDenseRestoreArray(X, &bray));
1771 
1772     if (!mumps->myid) {
1773       PetscCall(MatSeqAIJGetArray(b->A, &aa));
1774       PetscCall(MatGetRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
1775       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
1776       PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs));
1777       mumps->id.rhs_sparse = (MumpsScalar *)aa;
1778     } else {
1779       mumps->id.irhs_ptr    = NULL;
1780       mumps->id.irhs_sparse = NULL;
1781       mumps->id.nz_rhs      = 0;
1782       mumps->id.rhs_sparse  = NULL;
1783     }
1784   }
1785 
1786   /* solve phase */
1787   mumps->id.job = JOB_SOLVE;
1788   PetscMUMPS_c(mumps);
1789   PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
1790 
1791   /* scatter mumps distributed solution to PETSc vector v_mpi, which shares local arrays with solution matrix X */
1792   PetscCall(MatDenseGetArray(X, &array));
1793   PetscCall(VecPlaceArray(v_mpi, array));
1794 
1795   /* create scatter scat_sol */
1796   PetscCall(MatGetOwnershipRanges(X, &rstart));
1797   /* iidx: index for scatter mumps solution to PETSc X */
1798 
1799   PetscCall(ISCreateStride(PETSC_COMM_SELF, nlsol_loc, 0, 1, &is_from));
1800   PetscCall(PetscMalloc1(nlsol_loc, &idxx));
1801   for (i = 0; i < lsol_loc; i++) {
1802     isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */
1803 
1804     for (proc = 0; proc < mumps->petsc_size; proc++) {
1805       if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc + 1]) {
1806         myrstart = rstart[proc];
1807         k        = isol_loc[i] - myrstart;          /* local index on 1st column of PETSc vector X */
1808         iidx     = k + myrstart * nrhs;             /* maps mumps isol_loc[i] to PETSc index in X */
1809         m        = rstart[proc + 1] - rstart[proc]; /* rows of X for this proc */
1810         break;
1811       }
1812     }
1813 
1814     for (j = 0; j < nrhs; j++) idxx[i + j * lsol_loc] = iidx + j * m;
1815   }
1816   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nlsol_loc, idxx, PETSC_COPY_VALUES, &is_to));
1817   PetscCall(VecScatterCreate(msol_loc, is_from, v_mpi, is_to, &scat_sol));
1818   PetscCall(VecScatterBegin(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD));
1819   PetscCall(ISDestroy(&is_from));
1820   PetscCall(ISDestroy(&is_to));
1821   PetscCall(VecScatterEnd(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD));
1822   PetscCall(MatDenseRestoreArray(X, &array));
1823 
1824   /* free spaces */
1825   mumps->id.sol_loc  = (MumpsScalar *)sol_loc_save;
1826   mumps->id.isol_loc = isol_loc_save;
1827 
1828   PetscCall(PetscFree2(sol_loc, isol_loc));
1829   PetscCall(PetscFree(idxx));
1830   PetscCall(VecDestroy(&msol_loc));
1831   PetscCall(VecDestroy(&v_mpi));
1832   if (!denseB) {
1833     if (!mumps->myid) {
1834       b = (Mat_MPIAIJ *)Bt->data;
1835       PetscCall(MatSeqAIJRestoreArray(b->A, &aa));
1836       PetscCall(MatRestoreRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
1837       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure");
1838     }
1839   } else {
1840     if (mumps->ICNTL20 == 0) {
1841       PetscCall(VecDestroy(&b_seq));
1842       PetscCall(VecScatterDestroy(&scat_rhs));
1843     }
1844   }
1845   PetscCall(VecScatterDestroy(&scat_sol));
1846   PetscCall(PetscLogFlops(nrhs * PetscMax(0, 2.0 * (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n)));
1847   PetscFunctionReturn(PETSC_SUCCESS);
1848 }
1849 
1850 static PetscErrorCode MatMatSolveTranspose_MUMPS(Mat A, Mat B, Mat X)
1851 {
1852   Mat_MUMPS          *mumps = (Mat_MUMPS *)A->data;
1853   const PetscMUMPSInt value = mumps->id.ICNTL(9);
1854 
1855   PetscFunctionBegin;
1856   mumps->id.ICNTL(9) = 0;
1857   PetscCall(MatMatSolve_MUMPS(A, B, X));
1858   mumps->id.ICNTL(9) = value;
1859   PetscFunctionReturn(PETSC_SUCCESS);
1860 }
1861 
1862 static PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A, Mat Bt, Mat X)
1863 {
1864   PetscBool flg;
1865   Mat       B;
1866 
1867   PetscFunctionBegin;
1868   PetscCall(PetscObjectTypeCompareAny((PetscObject)Bt, &flg, MATSEQAIJ, MATMPIAIJ, NULL));
1869   PetscCheck(flg, PetscObjectComm((PetscObject)Bt), PETSC_ERR_ARG_WRONG, "Matrix Bt must be MATAIJ matrix");
1870 
1871   /* Create B=Bt^T that uses Bt's data structure */
1872   PetscCall(MatCreateTranspose(Bt, &B));
1873 
1874   PetscCall(MatMatSolve_MUMPS(A, B, X));
1875   PetscCall(MatDestroy(&B));
1876   PetscFunctionReturn(PETSC_SUCCESS);
1877 }
1878 
1879 #if !defined(PETSC_USE_COMPLEX)
1880 /*
1881   input:
1882    F:        numeric factor
1883   output:
1884    nneg:     total number of negative pivots
1885    nzero:    total number of zero pivots
1886    npos:     (global dimension of F) - nneg - nzero
1887 */
1888 static PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos)
1889 {
1890   Mat_MUMPS  *mumps = (Mat_MUMPS *)F->data;
1891   PetscMPIInt size;
1892 
1893   PetscFunctionBegin;
1894   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)F), &size));
1895   /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */
1896   PetscCheck(size <= 1 || mumps->id.ICNTL(13) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia", mumps->id.INFOG(13));
1897 
1898   if (nneg) *nneg = mumps->id.INFOG(12);
1899   if (nzero || npos) {
1900     PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection");
1901     if (nzero) *nzero = mumps->id.INFOG(28);
1902     if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28));
1903   }
1904   PetscFunctionReturn(PETSC_SUCCESS);
1905 }
1906 #endif
1907 
1908 static PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse, Mat_MUMPS *mumps)
1909 {
1910   PetscMPIInt    nreqs;
1911   PetscMUMPSInt *irn, *jcn;
1912   PetscMPIInt    count;
1913   PetscCount     totnnz, remain;
1914   const PetscInt osize = mumps->omp_comm_size;
1915   PetscScalar   *val;
1916 
1917   PetscFunctionBegin;
1918   if (osize > 1) {
1919     if (reuse == MAT_INITIAL_MATRIX) {
1920       /* master first gathers counts of nonzeros to receive */
1921       if (mumps->is_omp_master) PetscCall(PetscMalloc1(osize, &mumps->recvcount));
1922       PetscCallMPI(MPI_Gather(&mumps->nnz, 1, MPIU_INT64, mumps->recvcount, 1, MPIU_INT64, 0 /*master*/, mumps->omp_comm));
1923 
1924       /* Then each computes number of send/recvs */
1925       if (mumps->is_omp_master) {
1926         /* Start from 1 since self communication is not done in MPI */
1927         nreqs = 0;
1928         for (PetscMPIInt i = 1; i < osize; i++) nreqs += (mumps->recvcount[i] + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX;
1929       } else {
1930         nreqs = (PetscMPIInt)(((mumps->nnz + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX));
1931       }
1932       PetscCall(PetscMalloc1(nreqs * 3, &mumps->reqs)); /* Triple the requests since we send irn, jcn and val separately */
1933 
1934       /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others.
1935          MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz
1936          might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size
1937          is very small, the current approach should have no extra overhead compared to MPI_Gatherv.
1938        */
1939       nreqs = 0; /* counter for actual send/recvs */
1940       if (mumps->is_omp_master) {
1941         totnnz = 0;
1942 
1943         for (PetscMPIInt i = 0; i < osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */
1944         PetscCall(PetscMalloc2(totnnz, &irn, totnnz, &jcn));
1945         PetscCall(PetscMalloc1(totnnz, &val));
1946 
1947         /* Self communication */
1948         PetscCall(PetscArraycpy(irn, mumps->irn, mumps->nnz));
1949         PetscCall(PetscArraycpy(jcn, mumps->jcn, mumps->nnz));
1950         PetscCall(PetscArraycpy(val, mumps->val, mumps->nnz));
1951 
1952         /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */
1953         PetscCall(PetscFree2(mumps->irn, mumps->jcn));
1954         PetscCall(PetscFree(mumps->val_alloc));
1955         mumps->nnz = totnnz;
1956         mumps->irn = irn;
1957         mumps->jcn = jcn;
1958         mumps->val = mumps->val_alloc = val;
1959 
1960         irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */
1961         jcn += mumps->recvcount[0];
1962         val += mumps->recvcount[0];
1963 
1964         /* Remote communication */
1965         for (PetscMPIInt i = 1; i < osize; i++) {
1966           count  = (PetscMPIInt)PetscMin(mumps->recvcount[i], (PetscMPIInt)PETSC_MPI_INT_MAX);
1967           remain = mumps->recvcount[i] - count;
1968           while (count > 0) {
1969             PetscCallMPI(MPIU_Irecv(irn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1970             PetscCallMPI(MPIU_Irecv(jcn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1971             PetscCallMPI(MPIU_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1972             irn += count;
1973             jcn += count;
1974             val += count;
1975             count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX);
1976             remain -= count;
1977           }
1978         }
1979       } else {
1980         irn    = mumps->irn;
1981         jcn    = mumps->jcn;
1982         val    = mumps->val;
1983         count  = (PetscMPIInt)PetscMin(mumps->nnz, (PetscMPIInt)PETSC_MPI_INT_MAX);
1984         remain = mumps->nnz - count;
1985         while (count > 0) {
1986           PetscCallMPI(MPIU_Isend(irn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1987           PetscCallMPI(MPIU_Isend(jcn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1988           PetscCallMPI(MPIU_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1989           irn += count;
1990           jcn += count;
1991           val += count;
1992           count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX);
1993           remain -= count;
1994         }
1995       }
1996     } else {
1997       nreqs = 0;
1998       if (mumps->is_omp_master) {
1999         val = mumps->val + mumps->recvcount[0];
2000         for (PetscMPIInt i = 1; i < osize; i++) { /* Remote communication only since self data is already in place */
2001           count  = (PetscMPIInt)PetscMin(mumps->recvcount[i], (PetscMPIInt)PETSC_MPI_INT_MAX);
2002           remain = mumps->recvcount[i] - count;
2003           while (count > 0) {
2004             PetscCallMPI(MPIU_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2005             val += count;
2006             count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX);
2007             remain -= count;
2008           }
2009         }
2010       } else {
2011         val    = mumps->val;
2012         count  = (PetscMPIInt)PetscMin(mumps->nnz, (PetscMPIInt)PETSC_MPI_INT_MAX);
2013         remain = mumps->nnz - count;
2014         while (count > 0) {
2015           PetscCallMPI(MPIU_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2016           val += count;
2017           count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX);
2018           remain -= count;
2019         }
2020       }
2021     }
2022     PetscCallMPI(MPI_Waitall(nreqs, mumps->reqs, MPI_STATUSES_IGNORE));
2023     mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */
2024   }
2025   PetscFunctionReturn(PETSC_SUCCESS);
2026 }
2027 
2028 static PetscErrorCode MatFactorNumeric_MUMPS(Mat F, Mat A, PETSC_UNUSED const MatFactorInfo *info)
2029 {
2030   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2031   PetscBool  isMPIAIJ;
2032 
2033   PetscFunctionBegin;
2034   if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) {
2035     if (mumps->id.INFOG(1) == -6) PetscCall(PetscInfo(A, "MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2036     PetscCall(PetscInfo(A, "MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2037     PetscFunctionReturn(PETSC_SUCCESS);
2038   }
2039 
2040   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps));
2041   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX, mumps));
2042 
2043   /* numerical factorization phase */
2044   mumps->id.job = JOB_FACTNUMERIC;
2045   if (!mumps->id.ICNTL(18)) { /* A is centralized */
2046     if (!mumps->myid) mumps->id.a = (MumpsScalar *)mumps->val;
2047   } else {
2048     mumps->id.a_loc = (MumpsScalar *)mumps->val;
2049   }
2050   PetscMUMPS_c(mumps);
2051   if (mumps->id.INFOG(1) < 0) {
2052     PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2));
2053     if (mumps->id.INFOG(1) == -10) {
2054       PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2055       F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
2056     } else if (mumps->id.INFOG(1) == -13) {
2057       PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, cannot allocate required memory %d megabytes\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2058       F->factorerrortype = MAT_FACTOR_OUTMEMORY;
2059     } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) {
2060       PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d, problem with work array\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2061       F->factorerrortype = MAT_FACTOR_OUTMEMORY;
2062     } else {
2063       PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2064       F->factorerrortype = MAT_FACTOR_OTHER;
2065     }
2066   }
2067   PetscCheck(mumps->myid || mumps->id.ICNTL(16) <= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in numerical factorization: ICNTL(16)=%d " MUMPS_MANUALS, mumps->id.INFOG(16));
2068 
2069   F->assembled = PETSC_TRUE;
2070 
2071   if (F->schur) { /* reset Schur status to unfactored */
2072 #if defined(PETSC_HAVE_CUDA)
2073     F->schur->offloadmask = PETSC_OFFLOAD_CPU;
2074 #endif
2075     if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
2076       mumps->id.ICNTL(19) = 2;
2077       PetscCall(MatTranspose(F->schur, MAT_INPLACE_MATRIX, &F->schur));
2078     }
2079     PetscCall(MatFactorRestoreSchurComplement(F, NULL, MAT_FACTOR_SCHUR_UNFACTORED));
2080   }
2081 
2082   /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */
2083   if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3;
2084 
2085   if (!mumps->is_omp_master) mumps->id.INFO(23) = 0;
2086   if (mumps->petsc_size > 1) {
2087     PetscInt     lsol_loc;
2088     PetscScalar *sol_loc;
2089 
2090     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &isMPIAIJ));
2091 
2092     /* distributed solution; Create x_seq=sol_loc for repeated use */
2093     if (mumps->x_seq) {
2094       PetscCall(VecScatterDestroy(&mumps->scat_sol));
2095       PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc));
2096       PetscCall(VecDestroy(&mumps->x_seq));
2097     }
2098     lsol_loc = mumps->id.INFO(23); /* length of sol_loc */
2099     PetscCall(PetscMalloc2(lsol_loc, &sol_loc, lsol_loc, &mumps->id.isol_loc));
2100     mumps->id.lsol_loc = (PetscMUMPSInt)lsol_loc;
2101     mumps->id.sol_loc  = (MumpsScalar *)sol_loc;
2102     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, lsol_loc, sol_loc, &mumps->x_seq));
2103   }
2104   PetscCall(PetscLogFlops((double)mumps->id.RINFO(2)));
2105   PetscFunctionReturn(PETSC_SUCCESS);
2106 }
2107 
2108 /* Sets MUMPS options from the options database */
2109 static PetscErrorCode MatSetFromOptions_MUMPS(Mat F, Mat A)
2110 {
2111   Mat_MUMPS    *mumps = (Mat_MUMPS *)F->data;
2112   PetscMUMPSInt icntl = 0, size, *listvar_schur;
2113   PetscInt      info[80], i, ninfo = 80, rbs, cbs;
2114   PetscBool     flg = PETSC_FALSE, schur = (PetscBool)(mumps->id.ICNTL(26) == -1);
2115   MumpsScalar  *arr;
2116 
2117   PetscFunctionBegin;
2118   PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "MUMPS Options", "Mat");
2119   if (mumps->id.job == JOB_NULL) { /* MatSetFromOptions_MUMPS() has never been called before */
2120     PetscInt nthreads   = 0;
2121     PetscInt nCNTL_pre  = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0;
2122     PetscInt nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0;
2123 
2124     mumps->petsc_comm = PetscObjectComm((PetscObject)A);
2125     PetscCallMPI(MPI_Comm_size(mumps->petsc_comm, &mumps->petsc_size));
2126     PetscCallMPI(MPI_Comm_rank(mumps->petsc_comm, &mumps->myid)); /* "if (!myid)" still works even if mumps_comm is different */
2127 
2128     PetscCall(PetscOptionsName("-mat_mumps_use_omp_threads", "Convert MPI processes into OpenMP threads", "None", &mumps->use_petsc_omp_support));
2129     if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */
2130     /* do not use PetscOptionsInt() so that the option -mat_mumps_use_omp_threads is not displayed twice in the help */
2131     PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)F)->prefix, "-mat_mumps_use_omp_threads", &nthreads, NULL));
2132     if (mumps->use_petsc_omp_support) {
2133       PetscCheck(!schur, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use -%smat_mumps_use_omp_threads with the Schur complement feature", ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : "");
2134 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
2135       PetscCall(PetscOmpCtrlCreate(mumps->petsc_comm, nthreads, &mumps->omp_ctrl));
2136       PetscCall(PetscOmpCtrlGetOmpComms(mumps->omp_ctrl, &mumps->omp_comm, &mumps->mumps_comm, &mumps->is_omp_master));
2137 #else
2138       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "The system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual",
2139               ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : "");
2140 #endif
2141     } else {
2142       mumps->omp_comm      = PETSC_COMM_SELF;
2143       mumps->mumps_comm    = mumps->petsc_comm;
2144       mumps->is_omp_master = PETSC_TRUE;
2145     }
2146     PetscCallMPI(MPI_Comm_size(mumps->omp_comm, &mumps->omp_comm_size));
2147     mumps->reqs = NULL;
2148     mumps->tag  = 0;
2149 
2150     if (mumps->mumps_comm != MPI_COMM_NULL) {
2151       if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) {
2152         /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */
2153         MPI_Comm comm;
2154         PetscCallMPI(MPI_Comm_dup(mumps->mumps_comm, &comm));
2155         mumps->mumps_comm = comm;
2156       } else PetscCall(PetscCommGetComm(mumps->petsc_comm, &mumps->mumps_comm));
2157     }
2158 
2159     mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm);
2160     mumps->id.job          = JOB_INIT;
2161     mumps->id.par          = 1; /* host participates factorizaton and solve */
2162     mumps->id.sym          = mumps->sym;
2163 
2164     size          = mumps->id.size_schur;
2165     arr           = mumps->id.schur;
2166     listvar_schur = mumps->id.listvar_schur;
2167     PetscMUMPS_c(mumps);
2168     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
2169 
2170     /* set PETSc-MUMPS default options - override MUMPS default */
2171     mumps->id.ICNTL(3) = 0;
2172     mumps->id.ICNTL(4) = 0;
2173     if (mumps->petsc_size == 1) {
2174       mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */
2175       mumps->id.ICNTL(7)  = 7; /* automatic choice of ordering done by the package */
2176     } else {
2177       mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */
2178       mumps->id.ICNTL(21) = 1; /* distributed solution */
2179     }
2180 
2181     /* restore cached ICNTL and CNTL values */
2182     for (icntl = 0; icntl < nICNTL_pre; ++icntl) mumps->id.ICNTL(mumps->ICNTL_pre[1 + 2 * icntl]) = mumps->ICNTL_pre[2 + 2 * icntl];
2183     for (icntl = 0; icntl < nCNTL_pre; ++icntl) mumps->id.CNTL((PetscInt)mumps->CNTL_pre[1 + 2 * icntl]) = mumps->CNTL_pre[2 + 2 * icntl];
2184     PetscCall(PetscFree(mumps->ICNTL_pre));
2185     PetscCall(PetscFree(mumps->CNTL_pre));
2186 
2187     if (schur) {
2188       mumps->id.size_schur    = size;
2189       mumps->id.schur_lld     = size;
2190       mumps->id.schur         = arr;
2191       mumps->id.listvar_schur = listvar_schur;
2192       if (mumps->petsc_size > 1) {
2193         PetscBool gs; /* gs is false if any rank other than root has non-empty IS */
2194 
2195         mumps->id.ICNTL(19) = 1;                                                                            /* MUMPS returns Schur centralized on the host */
2196         gs                  = mumps->myid ? (mumps->id.size_schur ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */
2197         PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &gs, 1, MPIU_BOOL, MPI_LAND, mumps->petsc_comm));
2198         PetscCheck(gs, PETSC_COMM_SELF, PETSC_ERR_SUP, "MUMPS distributed parallel Schur complements not yet supported from PETSc");
2199       } else {
2200         if (F->factortype == MAT_FACTOR_LU) {
2201           mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */
2202         } else {
2203           mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */
2204         }
2205       }
2206       mumps->id.ICNTL(26) = -1;
2207     }
2208 
2209     /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code.
2210        For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS.
2211      */
2212     PetscCallMPI(MPI_Bcast(mumps->id.icntl, 40, MPI_INT, 0, mumps->omp_comm));
2213     PetscCallMPI(MPI_Bcast(mumps->id.cntl, 15, MPIU_REAL, 0, mumps->omp_comm));
2214 
2215     mumps->scat_rhs = NULL;
2216     mumps->scat_sol = NULL;
2217   }
2218   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_1", "ICNTL(1): output stream for error messages", "None", mumps->id.ICNTL(1), &icntl, &flg));
2219   if (flg) mumps->id.ICNTL(1) = icntl;
2220   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_2", "ICNTL(2): output stream for diagnostic printing, statistics, and warning", "None", mumps->id.ICNTL(2), &icntl, &flg));
2221   if (flg) mumps->id.ICNTL(2) = icntl;
2222   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_3", "ICNTL(3): output stream for global information, collected on the host", "None", mumps->id.ICNTL(3), &icntl, &flg));
2223   if (flg) mumps->id.ICNTL(3) = icntl;
2224 
2225   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_4", "ICNTL(4): level of printing (0 to 4)", "None", mumps->id.ICNTL(4), &icntl, &flg));
2226   if (flg) mumps->id.ICNTL(4) = icntl;
2227   if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */
2228 
2229   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_6", "ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)", "None", mumps->id.ICNTL(6), &icntl, &flg));
2230   if (flg) mumps->id.ICNTL(6) = icntl;
2231 
2232   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_7", "ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)", "None", mumps->id.ICNTL(7), &icntl, &flg));
2233   if (flg) {
2234     PetscCheck(icntl != 1 && icntl >= 0 && icntl <= 7, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto");
2235     mumps->id.ICNTL(7) = icntl;
2236   }
2237 
2238   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_8", "ICNTL(8): scaling strategy (-2 to 8 or 77)", "None", mumps->id.ICNTL(8), &mumps->id.ICNTL(8), NULL));
2239   /* PetscCall(PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL)); handled by MatSolveTranspose_MUMPS() */
2240   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_10", "ICNTL(10): max num of refinements", "None", mumps->id.ICNTL(10), &mumps->id.ICNTL(10), NULL));
2241   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_11", "ICNTL(11): statistics related to an error analysis (via -ksp_view)", "None", mumps->id.ICNTL(11), &mumps->id.ICNTL(11), NULL));
2242   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_12", "ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)", "None", mumps->id.ICNTL(12), &mumps->id.ICNTL(12), NULL));
2243   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_13", "ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting", "None", mumps->id.ICNTL(13), &mumps->id.ICNTL(13), NULL));
2244   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_14", "ICNTL(14): percentage increase in the estimated working space", "None", mumps->id.ICNTL(14), &mumps->id.ICNTL(14), NULL));
2245   PetscCall(MatGetBlockSizes(A, &rbs, &cbs));
2246   if (rbs == cbs && rbs > 1) mumps->id.ICNTL(15) = (PetscMUMPSInt)-rbs;
2247   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_15", "ICNTL(15): compression of the input matrix resulting from a block format", "None", mumps->id.ICNTL(15), &mumps->id.ICNTL(15), &flg));
2248   if (flg) {
2249     PetscCheck(mumps->id.ICNTL(15) <= 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Positive -mat_mumps_icntl_15 not handled");
2250     PetscCheck((-mumps->id.ICNTL(15) % cbs == 0) && (-mumps->id.ICNTL(15) % rbs == 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "The opposite of -mat_mumps_icntl_15 must be a multiple of the column and row blocksizes");
2251   }
2252   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_19", "ICNTL(19): computes the Schur complement", "None", mumps->id.ICNTL(19), &mumps->id.ICNTL(19), NULL));
2253   if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */
2254     PetscCall(MatDestroy(&F->schur));
2255     PetscCall(MatMumpsResetSchur_Private(mumps));
2256   }
2257 
2258   /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps",
2259      and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran
2260      and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...'
2261      This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug,
2262      see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590.
2263      In short, we could not use distributed RHS until with MPICH v4.0b1 or we enabled a workaround in mumps-5.6.2+
2264    */
2265   mumps->ICNTL20 = 10; /* Distributed dense RHS, by default */
2266 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) || (PetscDefined(HAVE_MPICH) && MPICH_NUMVERSION < 40000101) || PetscDefined(HAVE_MSMPI)
2267   mumps->ICNTL20 = 0; /* Centralized dense RHS, if need be */
2268 #endif
2269   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_20", "ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides", "None", mumps->ICNTL20, &mumps->ICNTL20, &flg));
2270   PetscCheck(!flg || mumps->ICNTL20 == 10 || mumps->ICNTL20 == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10", (int)mumps->ICNTL20);
2271 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0)
2272   PetscCheck(!flg || mumps->ICNTL20 != 10, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=10 is not supported before MUMPS-5.3.0");
2273 #endif
2274   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL)); we only use distributed solution vector */
2275 
2276   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_22", "ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)", "None", mumps->id.ICNTL(22), &mumps->id.ICNTL(22), NULL));
2277   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_23", "ICNTL(23): max size of the working memory (MB) that can allocate per processor", "None", mumps->id.ICNTL(23), &mumps->id.ICNTL(23), NULL));
2278   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_24", "ICNTL(24): detection of null pivot rows (0 or 1)", "None", mumps->id.ICNTL(24), &mumps->id.ICNTL(24), NULL));
2279   if (mumps->id.ICNTL(24)) { mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ }
2280 
2281   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_25", "ICNTL(25): computes a solution of a deficient matrix and a null space basis", "None", mumps->id.ICNTL(25), &mumps->id.ICNTL(25), NULL));
2282   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_26", "ICNTL(26): drives the solution phase if a Schur complement matrix", "None", mumps->id.ICNTL(26), &mumps->id.ICNTL(26), NULL));
2283   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_27", "ICNTL(27): controls the blocking size for multiple right-hand sides", "None", mumps->id.ICNTL(27), &mumps->id.ICNTL(27), NULL));
2284   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_28", "ICNTL(28): use 1 for sequential analysis and ICNTL(7) ordering, or 2 for parallel analysis and ICNTL(29) ordering", "None", mumps->id.ICNTL(28), &mumps->id.ICNTL(28), NULL));
2285   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_29", "ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis", "None", mumps->id.ICNTL(29), &mumps->id.ICNTL(29), NULL));
2286   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL)); */ /* call MatMumpsGetInverse() directly */
2287   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_31", "ICNTL(31): indicates which factors may be discarded during factorization", "None", mumps->id.ICNTL(31), &mumps->id.ICNTL(31), NULL));
2288   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elimination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL));  -- not supported by PETSc API */
2289   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_33", "ICNTL(33): compute determinant", "None", mumps->id.ICNTL(33), &mumps->id.ICNTL(33), NULL));
2290   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_35", "ICNTL(35): activates Block Low Rank (BLR) based factorization", "None", mumps->id.ICNTL(35), &mumps->id.ICNTL(35), NULL));
2291   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_36", "ICNTL(36): choice of BLR factorization variant", "None", mumps->id.ICNTL(36), &mumps->id.ICNTL(36), NULL));
2292   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_37", "ICNTL(37): compression of the contribution blocks (CB)", "None", mumps->id.ICNTL(37), &mumps->id.ICNTL(37), NULL));
2293   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_38", "ICNTL(38): estimated compression rate of LU factors with BLR", "None", mumps->id.ICNTL(38), &mumps->id.ICNTL(38), NULL));
2294   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_48", "ICNTL(48): multithreading with tree parallelism", "None", mumps->id.ICNTL(48), &mumps->id.ICNTL(48), NULL));
2295   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_56", "ICNTL(56): postponing and rank-revealing factorization", "None", mumps->id.ICNTL(56), &mumps->id.ICNTL(56), NULL));
2296   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_58", "ICNTL(58): defines options for symbolic factorization", "None", mumps->id.ICNTL(58), &mumps->id.ICNTL(58), NULL));
2297 
2298   PetscCall(PetscOptionsReal("-mat_mumps_cntl_1", "CNTL(1): relative pivoting threshold", "None", mumps->id.CNTL(1), &mumps->id.CNTL(1), NULL));
2299   PetscCall(PetscOptionsReal("-mat_mumps_cntl_2", "CNTL(2): stopping criterion of refinement", "None", mumps->id.CNTL(2), &mumps->id.CNTL(2), NULL));
2300   PetscCall(PetscOptionsReal("-mat_mumps_cntl_3", "CNTL(3): absolute pivoting threshold", "None", mumps->id.CNTL(3), &mumps->id.CNTL(3), NULL));
2301   PetscCall(PetscOptionsReal("-mat_mumps_cntl_4", "CNTL(4): value for static pivoting", "None", mumps->id.CNTL(4), &mumps->id.CNTL(4), NULL));
2302   PetscCall(PetscOptionsReal("-mat_mumps_cntl_5", "CNTL(5): fixation for null pivots", "None", mumps->id.CNTL(5), &mumps->id.CNTL(5), NULL));
2303   PetscCall(PetscOptionsReal("-mat_mumps_cntl_7", "CNTL(7): dropping parameter used during BLR", "None", mumps->id.CNTL(7), &mumps->id.CNTL(7), NULL));
2304 
2305   PetscCall(PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL));
2306 
2307   PetscCall(PetscOptionsIntArray("-mat_mumps_view_info", "request INFO local to each processor", "", info, &ninfo, NULL));
2308   if (ninfo) {
2309     PetscCheck(ninfo <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "number of INFO %" PetscInt_FMT " must <= 80", ninfo);
2310     PetscCall(PetscMalloc1(ninfo, &mumps->info));
2311     mumps->ninfo = ninfo;
2312     for (i = 0; i < ninfo; i++) {
2313       PetscCheck(info[i] >= 0 && info[i] <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "index of INFO %" PetscInt_FMT " must between 1 and 80", ninfo);
2314       mumps->info[i] = info[i];
2315     }
2316   }
2317   PetscOptionsEnd();
2318   PetscFunctionReturn(PETSC_SUCCESS);
2319 }
2320 
2321 static PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F, Mat A, PETSC_UNUSED const MatFactorInfo *info, Mat_MUMPS *mumps)
2322 {
2323   PetscFunctionBegin;
2324   if (mumps->id.INFOG(1) < 0) {
2325     PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in analysis: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
2326     if (mumps->id.INFOG(1) == -6) {
2327       PetscCall(PetscInfo(F, "MUMPS error in analysis: matrix is singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2328       F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT;
2329     } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) {
2330       PetscCall(PetscInfo(F, "MUMPS error in analysis: problem with work array, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2331       F->factorerrortype = MAT_FACTOR_OUTMEMORY;
2332     } else if (mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0) {
2333       PetscCall(PetscInfo(F, "MUMPS error in analysis: empty matrix\n"));
2334     } else {
2335       PetscCall(PetscInfo(F, "MUMPS error in analysis: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS "\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2336       F->factorerrortype = MAT_FACTOR_OTHER;
2337     }
2338   }
2339   if (!mumps->id.n) F->factorerrortype = MAT_FACTOR_NOERROR;
2340   PetscFunctionReturn(PETSC_SUCCESS);
2341 }
2342 
2343 static PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F, Mat A, IS r, PETSC_UNUSED IS c, const MatFactorInfo *info)
2344 {
2345   Mat_MUMPS     *mumps = (Mat_MUMPS *)F->data;
2346   Vec            b;
2347   const PetscInt M = A->rmap->N;
2348 
2349   PetscFunctionBegin;
2350   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
2351     /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */
2352     PetscFunctionReturn(PETSC_SUCCESS);
2353   }
2354 
2355   /* Set MUMPS options from the options database */
2356   PetscCall(MatSetFromOptions_MUMPS(F, A));
2357 
2358   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
2359   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps));
2360 
2361   /* analysis phase */
2362   mumps->id.job = JOB_FACTSYMBOLIC;
2363   PetscCall(PetscMUMPSIntCast(M, &mumps->id.n));
2364   switch (mumps->id.ICNTL(18)) {
2365   case 0: /* centralized assembled matrix input */
2366     if (!mumps->myid) {
2367       mumps->id.nnz = mumps->nnz;
2368       mumps->id.irn = mumps->irn;
2369       mumps->id.jcn = mumps->jcn;
2370       if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val;
2371       if (r && mumps->id.ICNTL(7) == 7) {
2372         mumps->id.ICNTL(7) = 1;
2373         if (!mumps->myid) {
2374           const PetscInt *idx;
2375           PetscInt        i;
2376 
2377           PetscCall(PetscMalloc1(M, &mumps->id.perm_in));
2378           PetscCall(ISGetIndices(r, &idx));
2379           for (i = 0; i < M; i++) PetscCall(PetscMUMPSIntCast(idx[i] + 1, &mumps->id.perm_in[i])); /* perm_in[]: start from 1, not 0! */
2380           PetscCall(ISRestoreIndices(r, &idx));
2381         }
2382       }
2383     }
2384     break;
2385   case 3: /* distributed assembled matrix input (size>1) */
2386     mumps->id.nnz_loc = mumps->nnz;
2387     mumps->id.irn_loc = mumps->irn;
2388     mumps->id.jcn_loc = mumps->jcn;
2389     if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val;
2390     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2391       PetscCall(MatCreateVecs(A, NULL, &b));
2392       PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq));
2393       PetscCall(VecDestroy(&b));
2394     }
2395     break;
2396   }
2397   PetscMUMPS_c(mumps);
2398   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps));
2399 
2400   F->ops->lufactornumeric   = MatFactorNumeric_MUMPS;
2401   F->ops->solve             = MatSolve_MUMPS;
2402   F->ops->solvetranspose    = MatSolveTranspose_MUMPS;
2403   F->ops->matsolve          = MatMatSolve_MUMPS;
2404   F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS;
2405   F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS;
2406 
2407   mumps->matstruc = SAME_NONZERO_PATTERN;
2408   PetscFunctionReturn(PETSC_SUCCESS);
2409 }
2410 
2411 /* Note the PETSc r and c permutations are ignored */
2412 static PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F, Mat A, PETSC_UNUSED IS r, PETSC_UNUSED IS c, const MatFactorInfo *info)
2413 {
2414   Mat_MUMPS     *mumps = (Mat_MUMPS *)F->data;
2415   Vec            b;
2416   const PetscInt M = A->rmap->N;
2417 
2418   PetscFunctionBegin;
2419   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
2420     /* F is assembled by a previous call of MatLUFactorSymbolic_BAIJMUMPS() */
2421     PetscFunctionReturn(PETSC_SUCCESS);
2422   }
2423 
2424   /* Set MUMPS options from the options database */
2425   PetscCall(MatSetFromOptions_MUMPS(F, A));
2426 
2427   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
2428   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps));
2429 
2430   /* analysis phase */
2431   mumps->id.job = JOB_FACTSYMBOLIC;
2432   PetscCall(PetscMUMPSIntCast(M, &mumps->id.n));
2433   switch (mumps->id.ICNTL(18)) {
2434   case 0: /* centralized assembled matrix input */
2435     if (!mumps->myid) {
2436       mumps->id.nnz = mumps->nnz;
2437       mumps->id.irn = mumps->irn;
2438       mumps->id.jcn = mumps->jcn;
2439       if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val;
2440     }
2441     break;
2442   case 3: /* distributed assembled matrix input (size>1) */
2443     mumps->id.nnz_loc = mumps->nnz;
2444     mumps->id.irn_loc = mumps->irn;
2445     mumps->id.jcn_loc = mumps->jcn;
2446     if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val;
2447     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2448       PetscCall(MatCreateVecs(A, NULL, &b));
2449       PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq));
2450       PetscCall(VecDestroy(&b));
2451     }
2452     break;
2453   }
2454   PetscMUMPS_c(mumps);
2455   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps));
2456 
2457   F->ops->lufactornumeric   = MatFactorNumeric_MUMPS;
2458   F->ops->solve             = MatSolve_MUMPS;
2459   F->ops->solvetranspose    = MatSolveTranspose_MUMPS;
2460   F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS;
2461 
2462   mumps->matstruc = SAME_NONZERO_PATTERN;
2463   PetscFunctionReturn(PETSC_SUCCESS);
2464 }
2465 
2466 /* Note the PETSc r permutation and factor info are ignored */
2467 static PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F, Mat A, PETSC_UNUSED IS r, const MatFactorInfo *info)
2468 {
2469   Mat_MUMPS     *mumps = (Mat_MUMPS *)F->data;
2470   Vec            b;
2471   const PetscInt M = A->rmap->N;
2472 
2473   PetscFunctionBegin;
2474   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
2475     /* F is assembled by a previous call of MatCholeskyFactorSymbolic_MUMPS() */
2476     PetscFunctionReturn(PETSC_SUCCESS);
2477   }
2478 
2479   /* Set MUMPS options from the options database */
2480   PetscCall(MatSetFromOptions_MUMPS(F, A));
2481 
2482   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
2483   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps));
2484 
2485   /* analysis phase */
2486   mumps->id.job = JOB_FACTSYMBOLIC;
2487   PetscCall(PetscMUMPSIntCast(M, &mumps->id.n));
2488   switch (mumps->id.ICNTL(18)) {
2489   case 0: /* centralized assembled matrix input */
2490     if (!mumps->myid) {
2491       mumps->id.nnz = mumps->nnz;
2492       mumps->id.irn = mumps->irn;
2493       mumps->id.jcn = mumps->jcn;
2494       if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val;
2495     }
2496     break;
2497   case 3: /* distributed assembled matrix input (size>1) */
2498     mumps->id.nnz_loc = mumps->nnz;
2499     mumps->id.irn_loc = mumps->irn;
2500     mumps->id.jcn_loc = mumps->jcn;
2501     if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val;
2502     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2503       PetscCall(MatCreateVecs(A, NULL, &b));
2504       PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq));
2505       PetscCall(VecDestroy(&b));
2506     }
2507     break;
2508   }
2509   PetscMUMPS_c(mumps);
2510   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps));
2511 
2512   F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS;
2513   F->ops->solve                 = MatSolve_MUMPS;
2514   F->ops->solvetranspose        = MatSolve_MUMPS;
2515   F->ops->matsolve              = MatMatSolve_MUMPS;
2516   F->ops->mattransposesolve     = MatMatTransposeSolve_MUMPS;
2517   F->ops->matsolvetranspose     = MatMatSolveTranspose_MUMPS;
2518 #if defined(PETSC_USE_COMPLEX)
2519   F->ops->getinertia = NULL;
2520 #else
2521   F->ops->getinertia = MatGetInertia_SBAIJMUMPS;
2522 #endif
2523 
2524   mumps->matstruc = SAME_NONZERO_PATTERN;
2525   PetscFunctionReturn(PETSC_SUCCESS);
2526 }
2527 
2528 static PetscErrorCode MatView_MUMPS(Mat A, PetscViewer viewer)
2529 {
2530   PetscBool         iascii;
2531   PetscViewerFormat format;
2532   Mat_MUMPS        *mumps = (Mat_MUMPS *)A->data;
2533 
2534   PetscFunctionBegin;
2535   /* check if matrix is mumps type */
2536   if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(PETSC_SUCCESS);
2537 
2538   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
2539   if (iascii) {
2540     PetscCall(PetscViewerGetFormat(viewer, &format));
2541     if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
2542       PetscCall(PetscViewerASCIIPrintf(viewer, "MUMPS run parameters:\n"));
2543       if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
2544         PetscCall(PetscViewerASCIIPrintf(viewer, "  SYM (matrix type):                   %d\n", mumps->id.sym));
2545         PetscCall(PetscViewerASCIIPrintf(viewer, "  PAR (host participation):            %d\n", mumps->id.par));
2546         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(1) (output for error):         %d\n", mumps->id.ICNTL(1)));
2547         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(2) (output of diagnostic msg): %d\n", mumps->id.ICNTL(2)));
2548         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(3) (output for global info):   %d\n", mumps->id.ICNTL(3)));
2549         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(4) (level of printing):        %d\n", mumps->id.ICNTL(4)));
2550         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(5) (input mat struct):         %d\n", mumps->id.ICNTL(5)));
2551         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(6) (matrix prescaling):        %d\n", mumps->id.ICNTL(6)));
2552         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(7) (sequential matrix ordering):%d\n", mumps->id.ICNTL(7)));
2553         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(8) (scaling strategy):         %d\n", mumps->id.ICNTL(8)));
2554         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(10) (max num of refinements):  %d\n", mumps->id.ICNTL(10)));
2555         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(11) (error analysis):          %d\n", mumps->id.ICNTL(11)));
2556         if (mumps->id.ICNTL(11) > 0) {
2557           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(4) (inf norm of input mat):        %g\n", (double)mumps->id.RINFOG(4)));
2558           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(5) (inf norm of solution):         %g\n", (double)mumps->id.RINFOG(5)));
2559           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(6) (inf norm of residual):         %g\n", (double)mumps->id.RINFOG(6)));
2560           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(7),RINFOG(8) (backward error est): %g, %g\n", (double)mumps->id.RINFOG(7), (double)mumps->id.RINFOG(8)));
2561           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(9) (error estimate):               %g\n", (double)mumps->id.RINFOG(9)));
2562           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n", (double)mumps->id.RINFOG(10), (double)mumps->id.RINFOG(11)));
2563         }
2564         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(12) (efficiency control):                         %d\n", mumps->id.ICNTL(12)));
2565         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(13) (sequential factorization of the root node):  %d\n", mumps->id.ICNTL(13)));
2566         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(14) (percentage of estimated workspace increase): %d\n", mumps->id.ICNTL(14)));
2567         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(15) (compression of the input matrix):            %d\n", mumps->id.ICNTL(15)));
2568         /* ICNTL(15-17) not used */
2569         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(18) (input mat struct):                           %d\n", mumps->id.ICNTL(18)));
2570         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(19) (Schur complement info):                      %d\n", mumps->id.ICNTL(19)));
2571         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(20) (RHS sparse pattern):                         %d\n", mumps->id.ICNTL(20)));
2572         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(21) (solution struct):                            %d\n", mumps->id.ICNTL(21)));
2573         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(22) (in-core/out-of-core facility):               %d\n", mumps->id.ICNTL(22)));
2574         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(23) (max size of memory can be allocated locally):%d\n", mumps->id.ICNTL(23)));
2575 
2576         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(24) (detection of null pivot rows):               %d\n", mumps->id.ICNTL(24)));
2577         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(25) (computation of a null space basis):          %d\n", mumps->id.ICNTL(25)));
2578         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(26) (Schur options for RHS or solution):          %d\n", mumps->id.ICNTL(26)));
2579         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(27) (blocking size for multiple RHS):             %d\n", mumps->id.ICNTL(27)));
2580         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(28) (use parallel or sequential ordering):        %d\n", mumps->id.ICNTL(28)));
2581         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(29) (parallel ordering):                          %d\n", mumps->id.ICNTL(29)));
2582 
2583         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(30) (user-specified set of entries in inv(A)):    %d\n", mumps->id.ICNTL(30)));
2584         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(31) (factors is discarded in the solve phase):    %d\n", mumps->id.ICNTL(31)));
2585         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(33) (compute determinant):                        %d\n", mumps->id.ICNTL(33)));
2586         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(35) (activate BLR based factorization):           %d\n", mumps->id.ICNTL(35)));
2587         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(36) (choice of BLR factorization variant):        %d\n", mumps->id.ICNTL(36)));
2588         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(37) (compression of the contribution blocks):     %d\n", mumps->id.ICNTL(37)));
2589         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(38) (estimated compression rate of LU factors):   %d\n", mumps->id.ICNTL(38)));
2590         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(48) (multithreading with tree parallelism):       %d\n", mumps->id.ICNTL(48)));
2591         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(56) (postponing and rank-revealing factorization):%d\n", mumps->id.ICNTL(56)));
2592         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(58) (options for symbolic factorization):         %d\n", mumps->id.ICNTL(58)));
2593 
2594         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(1) (relative pivoting threshold):      %g\n", (double)mumps->id.CNTL(1)));
2595         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(2) (stopping criterion of refinement): %g\n", (double)mumps->id.CNTL(2)));
2596         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(3) (absolute pivoting threshold):      %g\n", (double)mumps->id.CNTL(3)));
2597         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(4) (value of static pivoting):         %g\n", (double)mumps->id.CNTL(4)));
2598         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(5) (fixation for null pivots):         %g\n", (double)mumps->id.CNTL(5)));
2599         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(7) (dropping parameter for BLR):       %g\n", (double)mumps->id.CNTL(7)));
2600 
2601         /* information local to each processor */
2602         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(1) (local estimated flops for the elimination after analysis):\n"));
2603         PetscCall(PetscViewerASCIIPushSynchronized(viewer));
2604         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %g\n", mumps->myid, (double)mumps->id.RINFO(1)));
2605         PetscCall(PetscViewerFlush(viewer));
2606         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(2) (local estimated flops for the assembly after factorization):\n"));
2607         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %g\n", mumps->myid, (double)mumps->id.RINFO(2)));
2608         PetscCall(PetscViewerFlush(viewer));
2609         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(3) (local estimated flops for the elimination after factorization):\n"));
2610         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %g\n", mumps->myid, (double)mumps->id.RINFO(3)));
2611         PetscCall(PetscViewerFlush(viewer));
2612 
2613         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization):\n"));
2614         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(15)));
2615         PetscCall(PetscViewerFlush(viewer));
2616 
2617         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization):\n"));
2618         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(16)));
2619         PetscCall(PetscViewerFlush(viewer));
2620 
2621         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(23) (num of pivots eliminated on this processor after factorization):\n"));
2622         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(23)));
2623         PetscCall(PetscViewerFlush(viewer));
2624 
2625         if (mumps->ninfo && mumps->ninfo <= 80) {
2626           PetscInt i;
2627           for (i = 0; i < mumps->ninfo; i++) {
2628             PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(%" PetscInt_FMT "):\n", mumps->info[i]));
2629             PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(mumps->info[i])));
2630             PetscCall(PetscViewerFlush(viewer));
2631           }
2632         }
2633         PetscCall(PetscViewerASCIIPopSynchronized(viewer));
2634       } else PetscCall(PetscViewerASCIIPrintf(viewer, "  Use -%sksp_view ::ascii_info_detail to display information for all processes\n", ((PetscObject)A)->prefix ? ((PetscObject)A)->prefix : ""));
2635 
2636       if (mumps->myid == 0) { /* information from the host */
2637         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFOG(1) (global estimated flops for the elimination after analysis): %g\n", (double)mumps->id.RINFOG(1)));
2638         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFOG(2) (global estimated flops for the assembly after factorization): %g\n", (double)mumps->id.RINFOG(2)));
2639         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFOG(3) (global estimated flops for the elimination after factorization): %g\n", (double)mumps->id.RINFOG(3)));
2640         PetscCall(PetscViewerASCIIPrintf(viewer, "  (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n", (double)mumps->id.RINFOG(12), (double)mumps->id.RINFOG(13), mumps->id.INFOG(34)));
2641 
2642         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(3)));
2643         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(4)));
2644         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(5) (estimated maximum front size in the complete tree): %d\n", mumps->id.INFOG(5)));
2645         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(6) (number of nodes in the complete tree): %d\n", mumps->id.INFOG(6)));
2646         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(7) (ordering option effectively used after analysis): %d\n", mumps->id.INFOG(7)));
2647         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n", mumps->id.INFOG(8)));
2648         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n", mumps->id.INFOG(9)));
2649         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(10) (total integer space store the matrix factors after factorization): %d\n", mumps->id.INFOG(10)));
2650         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(11) (order of largest frontal matrix after factorization): %d\n", mumps->id.INFOG(11)));
2651         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(12) (number of off-diagonal pivots): %d\n", mumps->id.INFOG(12)));
2652         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(13) (number of delayed pivots after factorization): %d\n", mumps->id.INFOG(13)));
2653         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(14) (number of memory compress after factorization): %d\n", mumps->id.INFOG(14)));
2654         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(15) (number of steps of iterative refinement after solution): %d\n", mumps->id.INFOG(15)));
2655         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n", mumps->id.INFOG(16)));
2656         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n", mumps->id.INFOG(17)));
2657         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n", mumps->id.INFOG(18)));
2658         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n", mumps->id.INFOG(19)));
2659         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(20) (estimated number of entries in the factors): %d\n", mumps->id.INFOG(20)));
2660         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n", mumps->id.INFOG(21)));
2661         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n", mumps->id.INFOG(22)));
2662         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n", mumps->id.INFOG(23)));
2663         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n", mumps->id.INFOG(24)));
2664         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n", mumps->id.INFOG(25)));
2665         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(28) (after factorization: number of null pivots encountered): %d\n", mumps->id.INFOG(28)));
2666         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n", mumps->id.INFOG(29)));
2667         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n", mumps->id.INFOG(30), mumps->id.INFOG(31)));
2668         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(32) (after analysis: type of analysis done): %d\n", mumps->id.INFOG(32)));
2669         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(33) (value used for ICNTL(8)): %d\n", mumps->id.INFOG(33)));
2670         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(34) (exponent of the determinant if determinant is requested): %d\n", mumps->id.INFOG(34)));
2671         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n", mumps->id.INFOG(35)));
2672         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(36)));
2673         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n", mumps->id.INFOG(37)));
2674         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(38)));
2675         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n", mumps->id.INFOG(39)));
2676       }
2677     }
2678   }
2679   PetscFunctionReturn(PETSC_SUCCESS);
2680 }
2681 
2682 static PetscErrorCode MatGetInfo_MUMPS(Mat A, PETSC_UNUSED MatInfoType flag, MatInfo *info)
2683 {
2684   Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;
2685 
2686   PetscFunctionBegin;
2687   info->block_size        = 1.0;
2688   info->nz_allocated      = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20);
2689   info->nz_used           = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20);
2690   info->nz_unneeded       = 0.0;
2691   info->assemblies        = 0.0;
2692   info->mallocs           = 0.0;
2693   info->memory            = 0.0;
2694   info->fill_ratio_given  = 0;
2695   info->fill_ratio_needed = 0;
2696   info->factor_mallocs    = 0;
2697   PetscFunctionReturn(PETSC_SUCCESS);
2698 }
2699 
2700 static PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is)
2701 {
2702   Mat_MUMPS         *mumps = (Mat_MUMPS *)F->data;
2703   const PetscScalar *arr;
2704   const PetscInt    *idxs;
2705   PetscInt           size, i;
2706 
2707   PetscFunctionBegin;
2708   PetscCall(ISGetLocalSize(is, &size));
2709   /* Schur complement matrix */
2710   PetscCall(MatDestroy(&F->schur));
2711   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size, size, NULL, &F->schur));
2712   PetscCall(MatDenseGetArrayRead(F->schur, &arr));
2713   mumps->id.schur = (MumpsScalar *)arr;
2714   PetscCall(PetscMUMPSIntCast(size, &mumps->id.size_schur));
2715   PetscCall(PetscMUMPSIntCast(size, &mumps->id.schur_lld));
2716   PetscCall(MatDenseRestoreArrayRead(F->schur, &arr));
2717   if (mumps->sym == 1) PetscCall(MatSetOption(F->schur, MAT_SPD, PETSC_TRUE));
2718 
2719   /* MUMPS expects Fortran style indices */
2720   PetscCall(PetscFree(mumps->id.listvar_schur));
2721   PetscCall(PetscMalloc1(size, &mumps->id.listvar_schur));
2722   PetscCall(ISGetIndices(is, &idxs));
2723   for (i = 0; i < size; i++) PetscCall(PetscMUMPSIntCast(idxs[i] + 1, &mumps->id.listvar_schur[i]));
2724   PetscCall(ISRestoreIndices(is, &idxs));
2725   /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */
2726   mumps->id.ICNTL(26) = -1;
2727   PetscFunctionReturn(PETSC_SUCCESS);
2728 }
2729 
2730 static PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F, Mat *S)
2731 {
2732   Mat          St;
2733   Mat_MUMPS   *mumps = (Mat_MUMPS *)F->data;
2734   PetscScalar *array;
2735 
2736   PetscFunctionBegin;
2737   PetscCheck(mumps->id.ICNTL(19), PetscObjectComm((PetscObject)F), PETSC_ERR_ORDER, "Schur complement mode not selected! Call MatFactorSetSchurIS() to enable it");
2738   PetscCall(MatCreate(PETSC_COMM_SELF, &St));
2739   PetscCall(MatSetSizes(St, PETSC_DECIDE, PETSC_DECIDE, mumps->id.size_schur, mumps->id.size_schur));
2740   PetscCall(MatSetType(St, MATDENSE));
2741   PetscCall(MatSetUp(St));
2742   PetscCall(MatDenseGetArray(St, &array));
2743   if (!mumps->sym) {                /* MUMPS always return a full matrix */
2744     if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
2745       PetscInt i, j, N = mumps->id.size_schur;
2746       for (i = 0; i < N; i++) {
2747         for (j = 0; j < N; j++) {
2748 #if !defined(PETSC_USE_COMPLEX)
2749           PetscScalar val = mumps->id.schur[i * N + j];
2750 #else
2751           PetscScalar val = mumps->id.schur[i * N + j].r + PETSC_i * mumps->id.schur[i * N + j].i;
2752 #endif
2753           array[j * N + i] = val;
2754         }
2755       }
2756     } else { /* stored by columns */
2757       PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur));
2758     }
2759   } else {                          /* either full or lower-triangular (not packed) */
2760     if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */
2761       PetscInt i, j, N = mumps->id.size_schur;
2762       for (i = 0; i < N; i++) {
2763         for (j = i; j < N; j++) {
2764 #if !defined(PETSC_USE_COMPLEX)
2765           PetscScalar val = mumps->id.schur[i * N + j];
2766 #else
2767           PetscScalar val = mumps->id.schur[i * N + j].r + PETSC_i * mumps->id.schur[i * N + j].i;
2768 #endif
2769           array[i * N + j] = array[j * N + i] = val;
2770         }
2771       }
2772     } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */
2773       PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur));
2774     } else { /* ICNTL(19) == 1 lower triangular stored by rows */
2775       PetscInt i, j, N = mumps->id.size_schur;
2776       for (i = 0; i < N; i++) {
2777         for (j = 0; j < i + 1; j++) {
2778 #if !defined(PETSC_USE_COMPLEX)
2779           PetscScalar val = mumps->id.schur[i * N + j];
2780 #else
2781           PetscScalar val = mumps->id.schur[i * N + j].r + PETSC_i * mumps->id.schur[i * N + j].i;
2782 #endif
2783           array[i * N + j] = array[j * N + i] = val;
2784         }
2785       }
2786     }
2787   }
2788   PetscCall(MatDenseRestoreArray(St, &array));
2789   *S = St;
2790   PetscFunctionReturn(PETSC_SUCCESS);
2791 }
2792 
2793 static PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt ival)
2794 {
2795   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2796 
2797   PetscFunctionBegin;
2798   if (mumps->id.job == JOB_NULL) {                                            /* need to cache icntl and ival since PetscMUMPS_c() has never been called */
2799     PetscMUMPSInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; /* number of already cached ICNTL */
2800     for (i = 0; i < nICNTL_pre; ++i)
2801       if (mumps->ICNTL_pre[1 + 2 * i] == icntl) break; /* is this ICNTL already cached? */
2802     if (i == nICNTL_pre) {                             /* not already cached */
2803       if (i > 0) PetscCall(PetscRealloc(sizeof(PetscMUMPSInt) * (2 * nICNTL_pre + 3), &mumps->ICNTL_pre));
2804       else PetscCall(PetscCalloc(sizeof(PetscMUMPSInt) * 3, &mumps->ICNTL_pre));
2805       mumps->ICNTL_pre[0]++;
2806     }
2807     mumps->ICNTL_pre[1 + 2 * i] = (PetscMUMPSInt)icntl;
2808     PetscCall(PetscMUMPSIntCast(ival, mumps->ICNTL_pre + 2 + 2 * i));
2809   } else PetscCall(PetscMUMPSIntCast(ival, &mumps->id.ICNTL(icntl)));
2810   PetscFunctionReturn(PETSC_SUCCESS);
2811 }
2812 
2813 static PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt *ival)
2814 {
2815   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2816 
2817   PetscFunctionBegin;
2818   if (mumps->id.job == JOB_NULL) {
2819     PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0;
2820     *ival = 0;
2821     for (i = 0; i < nICNTL_pre; ++i) {
2822       if (mumps->ICNTL_pre[1 + 2 * i] == icntl) *ival = mumps->ICNTL_pre[2 + 2 * i];
2823     }
2824   } else *ival = mumps->id.ICNTL(icntl);
2825   PetscFunctionReturn(PETSC_SUCCESS);
2826 }
2827 
2828 /*@
2829   MatMumpsSetIcntl - Set MUMPS parameter ICNTL() <https://mumps-solver.org/index.php?page=doc>
2830 
2831   Logically Collective
2832 
2833   Input Parameters:
2834 + F     - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
2835 . icntl - index of MUMPS parameter array `ICNTL()`
2836 - ival  - value of MUMPS `ICNTL(icntl)`
2837 
2838   Options Database Key:
2839 . -mat_mumps_icntl_<icntl> <ival> - change the option numbered `icntl` to `ival`
2840 
2841   Level: beginner
2842 
2843   Note:
2844   Ignored if MUMPS is not installed or `F` is not a MUMPS matrix
2845 
2846 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2847 @*/
2848 PetscErrorCode MatMumpsSetIcntl(Mat F, PetscInt icntl, PetscInt ival)
2849 {
2850   PetscFunctionBegin;
2851   PetscValidType(F, 1);
2852   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2853   PetscValidLogicalCollectiveInt(F, icntl, 2);
2854   PetscValidLogicalCollectiveInt(F, ival, 3);
2855   PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 48 || icntl == 56 || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl);
2856   PetscTryMethod(F, "MatMumpsSetIcntl_C", (Mat, PetscInt, PetscInt), (F, icntl, ival));
2857   PetscFunctionReturn(PETSC_SUCCESS);
2858 }
2859 
2860 /*@
2861   MatMumpsGetIcntl - Get MUMPS parameter ICNTL() <https://mumps-solver.org/index.php?page=doc>
2862 
2863   Logically Collective
2864 
2865   Input Parameters:
2866 + F     - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
2867 - icntl - index of MUMPS parameter array ICNTL()
2868 
2869   Output Parameter:
2870 . ival - value of MUMPS ICNTL(icntl)
2871 
2872   Level: beginner
2873 
2874 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2875 @*/
2876 PetscErrorCode MatMumpsGetIcntl(Mat F, PetscInt icntl, PetscInt *ival)
2877 {
2878   PetscFunctionBegin;
2879   PetscValidType(F, 1);
2880   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2881   PetscValidLogicalCollectiveInt(F, icntl, 2);
2882   PetscAssertPointer(ival, 3);
2883   PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 48 || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl);
2884   PetscUseMethod(F, "MatMumpsGetIcntl_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival));
2885   PetscFunctionReturn(PETSC_SUCCESS);
2886 }
2887 
2888 static PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal val)
2889 {
2890   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2891 
2892   PetscFunctionBegin;
2893   if (mumps->id.job == JOB_NULL) {
2894     PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0;
2895     for (i = 0; i < nCNTL_pre; ++i)
2896       if (mumps->CNTL_pre[1 + 2 * i] == icntl) break;
2897     if (i == nCNTL_pre) {
2898       if (i > 0) PetscCall(PetscRealloc(sizeof(PetscReal) * (2 * nCNTL_pre + 3), &mumps->CNTL_pre));
2899       else PetscCall(PetscCalloc(sizeof(PetscReal) * 3, &mumps->CNTL_pre));
2900       mumps->CNTL_pre[0]++;
2901     }
2902     mumps->CNTL_pre[1 + 2 * i] = icntl;
2903     mumps->CNTL_pre[2 + 2 * i] = val;
2904   } else mumps->id.CNTL(icntl) = val;
2905   PetscFunctionReturn(PETSC_SUCCESS);
2906 }
2907 
2908 static PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal *val)
2909 {
2910   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2911 
2912   PetscFunctionBegin;
2913   if (mumps->id.job == JOB_NULL) {
2914     PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0;
2915     *val = 0.0;
2916     for (i = 0; i < nCNTL_pre; ++i) {
2917       if (mumps->CNTL_pre[1 + 2 * i] == icntl) *val = mumps->CNTL_pre[2 + 2 * i];
2918     }
2919   } else *val = mumps->id.CNTL(icntl);
2920   PetscFunctionReturn(PETSC_SUCCESS);
2921 }
2922 
2923 /*@
2924   MatMumpsSetCntl - Set MUMPS parameter CNTL() <https://mumps-solver.org/index.php?page=doc>
2925 
2926   Logically Collective
2927 
2928   Input Parameters:
2929 + F     - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
2930 . icntl - index of MUMPS parameter array `CNTL()`
2931 - val   - value of MUMPS `CNTL(icntl)`
2932 
2933   Options Database Key:
2934 . -mat_mumps_cntl_<icntl> <val> - change the option numbered icntl to ival
2935 
2936   Level: beginner
2937 
2938   Note:
2939   Ignored if MUMPS is not installed or `F` is not a MUMPS matrix
2940 
2941 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2942 @*/
2943 PetscErrorCode MatMumpsSetCntl(Mat F, PetscInt icntl, PetscReal val)
2944 {
2945   PetscFunctionBegin;
2946   PetscValidType(F, 1);
2947   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2948   PetscValidLogicalCollectiveInt(F, icntl, 2);
2949   PetscValidLogicalCollectiveReal(F, val, 3);
2950   PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl);
2951   PetscTryMethod(F, "MatMumpsSetCntl_C", (Mat, PetscInt, PetscReal), (F, icntl, val));
2952   PetscFunctionReturn(PETSC_SUCCESS);
2953 }
2954 
2955 /*@
2956   MatMumpsGetCntl - Get MUMPS parameter CNTL() <https://mumps-solver.org/index.php?page=doc>
2957 
2958   Logically Collective
2959 
2960   Input Parameters:
2961 + F     - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
2962 - icntl - index of MUMPS parameter array CNTL()
2963 
2964   Output Parameter:
2965 . val - value of MUMPS CNTL(icntl)
2966 
2967   Level: beginner
2968 
2969 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2970 @*/
2971 PetscErrorCode MatMumpsGetCntl(Mat F, PetscInt icntl, PetscReal *val)
2972 {
2973   PetscFunctionBegin;
2974   PetscValidType(F, 1);
2975   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2976   PetscValidLogicalCollectiveInt(F, icntl, 2);
2977   PetscAssertPointer(val, 3);
2978   PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl);
2979   PetscUseMethod(F, "MatMumpsGetCntl_C", (Mat, PetscInt, PetscReal *), (F, icntl, val));
2980   PetscFunctionReturn(PETSC_SUCCESS);
2981 }
2982 
2983 static PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F, PetscInt icntl, PetscInt *info)
2984 {
2985   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2986 
2987   PetscFunctionBegin;
2988   *info = mumps->id.INFO(icntl);
2989   PetscFunctionReturn(PETSC_SUCCESS);
2990 }
2991 
2992 static PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F, PetscInt icntl, PetscInt *infog)
2993 {
2994   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2995 
2996   PetscFunctionBegin;
2997   *infog = mumps->id.INFOG(icntl);
2998   PetscFunctionReturn(PETSC_SUCCESS);
2999 }
3000 
3001 static PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfo)
3002 {
3003   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3004 
3005   PetscFunctionBegin;
3006   *rinfo = mumps->id.RINFO(icntl);
3007   PetscFunctionReturn(PETSC_SUCCESS);
3008 }
3009 
3010 static PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfog)
3011 {
3012   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3013 
3014   PetscFunctionBegin;
3015   *rinfog = mumps->id.RINFOG(icntl);
3016   PetscFunctionReturn(PETSC_SUCCESS);
3017 }
3018 
3019 static PetscErrorCode MatMumpsGetNullPivots_MUMPS(Mat F, PetscInt *size, PetscInt **array)
3020 {
3021   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3022 
3023   PetscFunctionBegin;
3024   PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection");
3025   *size  = 0;
3026   *array = NULL;
3027   if (!mumps->myid) {
3028     *size = mumps->id.INFOG(28);
3029     PetscCall(PetscMalloc1(*size, array));
3030     for (int i = 0; i < *size; i++) (*array)[i] = mumps->id.pivnul_list[i] - 1;
3031   }
3032   PetscFunctionReturn(PETSC_SUCCESS);
3033 }
3034 
3035 static PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F, Mat spRHS)
3036 {
3037   Mat          Bt = NULL, Btseq = NULL;
3038   PetscBool    flg;
3039   Mat_MUMPS   *mumps = (Mat_MUMPS *)F->data;
3040   PetscScalar *aa;
3041   PetscInt     spnr, *ia, *ja, M, nrhs;
3042 
3043   PetscFunctionBegin;
3044   PetscAssertPointer(spRHS, 2);
3045   PetscCall(PetscObjectTypeCompare((PetscObject)spRHS, MATTRANSPOSEVIRTUAL, &flg));
3046   PetscCheck(flg, PetscObjectComm((PetscObject)spRHS), PETSC_ERR_ARG_WRONG, "Matrix spRHS must be type MATTRANSPOSEVIRTUAL matrix");
3047   PetscCall(MatShellGetScalingShifts(spRHS, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
3048   PetscCall(MatTransposeGetMat(spRHS, &Bt));
3049 
3050   PetscCall(MatMumpsSetIcntl(F, 30, 1));
3051 
3052   if (mumps->petsc_size > 1) {
3053     Mat_MPIAIJ *b = (Mat_MPIAIJ *)Bt->data;
3054     Btseq         = b->A;
3055   } else {
3056     Btseq = Bt;
3057   }
3058 
3059   PetscCall(MatGetSize(spRHS, &M, &nrhs));
3060   mumps->id.nrhs = (PetscMUMPSInt)nrhs;
3061   PetscCall(PetscMUMPSIntCast(M, &mumps->id.lrhs));
3062   mumps->id.rhs = NULL;
3063 
3064   if (!mumps->myid) {
3065     PetscCall(MatSeqAIJGetArray(Btseq, &aa));
3066     PetscCall(MatGetRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
3067     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
3068     PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs));
3069     mumps->id.rhs_sparse = (MumpsScalar *)aa;
3070   } else {
3071     mumps->id.irhs_ptr    = NULL;
3072     mumps->id.irhs_sparse = NULL;
3073     mumps->id.nz_rhs      = 0;
3074     mumps->id.rhs_sparse  = NULL;
3075   }
3076   mumps->id.ICNTL(20) = 1; /* rhs is sparse */
3077   mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */
3078 
3079   /* solve phase */
3080   mumps->id.job = JOB_SOLVE;
3081   PetscMUMPS_c(mumps);
3082   PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2));
3083 
3084   if (!mumps->myid) {
3085     PetscCall(MatSeqAIJRestoreArray(Btseq, &aa));
3086     PetscCall(MatRestoreRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
3087     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
3088   }
3089   PetscFunctionReturn(PETSC_SUCCESS);
3090 }
3091 
3092 /*@
3093   MatMumpsGetInverse - Get user-specified set of entries in inverse of `A` <https://mumps-solver.org/index.php?page=doc>
3094 
3095   Logically Collective
3096 
3097   Input Parameter:
3098 . F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3099 
3100   Output Parameter:
3101 . spRHS - sequential sparse matrix in `MATTRANSPOSEVIRTUAL` format with requested entries of inverse of `A`
3102 
3103   Level: beginner
3104 
3105 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()`
3106 @*/
3107 PetscErrorCode MatMumpsGetInverse(Mat F, Mat spRHS)
3108 {
3109   PetscFunctionBegin;
3110   PetscValidType(F, 1);
3111   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3112   PetscUseMethod(F, "MatMumpsGetInverse_C", (Mat, Mat), (F, spRHS));
3113   PetscFunctionReturn(PETSC_SUCCESS);
3114 }
3115 
3116 static PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F, Mat spRHST)
3117 {
3118   Mat spRHS;
3119 
3120   PetscFunctionBegin;
3121   PetscCall(MatCreateTranspose(spRHST, &spRHS));
3122   PetscCall(MatMumpsGetInverse_MUMPS(F, spRHS));
3123   PetscCall(MatDestroy(&spRHS));
3124   PetscFunctionReturn(PETSC_SUCCESS);
3125 }
3126 
3127 /*@
3128   MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix $A^T $ <https://mumps-solver.org/index.php?page=doc>
3129 
3130   Logically Collective
3131 
3132   Input Parameter:
3133 . F - the factored matrix of A obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3134 
3135   Output Parameter:
3136 . spRHST - sequential sparse matrix in `MATAIJ` format containing the requested entries of inverse of `A`^T
3137 
3138   Level: beginner
3139 
3140 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()`, `MatMumpsGetInverse()`
3141 @*/
3142 PetscErrorCode MatMumpsGetInverseTranspose(Mat F, Mat spRHST)
3143 {
3144   PetscBool flg;
3145 
3146   PetscFunctionBegin;
3147   PetscValidType(F, 1);
3148   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3149   PetscCall(PetscObjectTypeCompareAny((PetscObject)spRHST, &flg, MATSEQAIJ, MATMPIAIJ, NULL));
3150   PetscCheck(flg, PetscObjectComm((PetscObject)spRHST), PETSC_ERR_ARG_WRONG, "Matrix spRHST must be MATAIJ matrix");
3151   PetscUseMethod(F, "MatMumpsGetInverseTranspose_C", (Mat, Mat), (F, spRHST));
3152   PetscFunctionReturn(PETSC_SUCCESS);
3153 }
3154 
3155 /*@
3156   MatMumpsGetInfo - Get MUMPS parameter INFO() <https://mumps-solver.org/index.php?page=doc>
3157 
3158   Logically Collective
3159 
3160   Input Parameters:
3161 + F     - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3162 - icntl - index of MUMPS parameter array INFO()
3163 
3164   Output Parameter:
3165 . ival - value of MUMPS INFO(icntl)
3166 
3167   Level: beginner
3168 
3169 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
3170 @*/
3171 PetscErrorCode MatMumpsGetInfo(Mat F, PetscInt icntl, PetscInt *ival)
3172 {
3173   PetscFunctionBegin;
3174   PetscValidType(F, 1);
3175   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3176   PetscAssertPointer(ival, 3);
3177   PetscUseMethod(F, "MatMumpsGetInfo_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival));
3178   PetscFunctionReturn(PETSC_SUCCESS);
3179 }
3180 
3181 /*@
3182   MatMumpsGetInfog - Get MUMPS parameter INFOG() <https://mumps-solver.org/index.php?page=doc>
3183 
3184   Logically Collective
3185 
3186   Input Parameters:
3187 + F     - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3188 - icntl - index of MUMPS parameter array INFOG()
3189 
3190   Output Parameter:
3191 . ival - value of MUMPS INFOG(icntl)
3192 
3193   Level: beginner
3194 
3195 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
3196 @*/
3197 PetscErrorCode MatMumpsGetInfog(Mat F, PetscInt icntl, PetscInt *ival)
3198 {
3199   PetscFunctionBegin;
3200   PetscValidType(F, 1);
3201   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3202   PetscAssertPointer(ival, 3);
3203   PetscUseMethod(F, "MatMumpsGetInfog_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival));
3204   PetscFunctionReturn(PETSC_SUCCESS);
3205 }
3206 
3207 /*@
3208   MatMumpsGetRinfo - Get MUMPS parameter RINFO() <https://mumps-solver.org/index.php?page=doc>
3209 
3210   Logically Collective
3211 
3212   Input Parameters:
3213 + F     - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3214 - icntl - index of MUMPS parameter array RINFO()
3215 
3216   Output Parameter:
3217 . val - value of MUMPS RINFO(icntl)
3218 
3219   Level: beginner
3220 
3221 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfog()`
3222 @*/
3223 PetscErrorCode MatMumpsGetRinfo(Mat F, PetscInt icntl, PetscReal *val)
3224 {
3225   PetscFunctionBegin;
3226   PetscValidType(F, 1);
3227   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3228   PetscAssertPointer(val, 3);
3229   PetscUseMethod(F, "MatMumpsGetRinfo_C", (Mat, PetscInt, PetscReal *), (F, icntl, val));
3230   PetscFunctionReturn(PETSC_SUCCESS);
3231 }
3232 
3233 /*@
3234   MatMumpsGetRinfog - Get MUMPS parameter RINFOG() <https://mumps-solver.org/index.php?page=doc>
3235 
3236   Logically Collective
3237 
3238   Input Parameters:
3239 + F     - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3240 - icntl - index of MUMPS parameter array RINFOG()
3241 
3242   Output Parameter:
3243 . val - value of MUMPS RINFOG(icntl)
3244 
3245   Level: beginner
3246 
3247 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`
3248 @*/
3249 PetscErrorCode MatMumpsGetRinfog(Mat F, PetscInt icntl, PetscReal *val)
3250 {
3251   PetscFunctionBegin;
3252   PetscValidType(F, 1);
3253   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3254   PetscAssertPointer(val, 3);
3255   PetscUseMethod(F, "MatMumpsGetRinfog_C", (Mat, PetscInt, PetscReal *), (F, icntl, val));
3256   PetscFunctionReturn(PETSC_SUCCESS);
3257 }
3258 
3259 /*@
3260   MatMumpsGetNullPivots - Get MUMPS parameter PIVNUL_LIST() <https://mumps-solver.org/index.php?page=doc>
3261 
3262   Logically Collective
3263 
3264   Input Parameter:
3265 . F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3266 
3267   Output Parameters:
3268 + size  - local size of the array. The size of the array is non-zero only on MPI rank 0
3269 - array - array of rows with null pivot, these rows follow 0-based indexing. The array gets allocated within the function and the user is responsible
3270           for freeing this array.
3271 
3272   Level: beginner
3273 
3274 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`
3275 @*/
3276 PetscErrorCode MatMumpsGetNullPivots(Mat F, PetscInt *size, PetscInt **array)
3277 {
3278   PetscFunctionBegin;
3279   PetscValidType(F, 1);
3280   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3281   PetscAssertPointer(size, 2);
3282   PetscAssertPointer(array, 3);
3283   PetscUseMethod(F, "MatMumpsGetNullPivots_C", (Mat, PetscInt *, PetscInt **), (F, size, array));
3284   PetscFunctionReturn(PETSC_SUCCESS);
3285 }
3286 
3287 /*MC
3288   MATSOLVERMUMPS -  A matrix type providing direct solvers (LU and Cholesky) for
3289   MPI distributed and sequential matrices via the external package MUMPS <https://mumps-solver.org/index.php?page=doc>
3290 
3291   Works with `MATAIJ` and `MATSBAIJ` matrices
3292 
3293   Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS
3294 
3295   Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode.
3296   See details below.
3297 
3298   Use `-pc_type cholesky` or `lu` `-pc_factor_mat_solver_type mumps` to use this direct solver
3299 
3300   Options Database Keys:
3301 +  -mat_mumps_icntl_1  - ICNTL(1): output stream for error messages
3302 .  -mat_mumps_icntl_2  - ICNTL(2): output stream for diagnostic printing, statistics, and warning
3303 .  -mat_mumps_icntl_3  - ICNTL(3): output stream for global information, collected on the host
3304 .  -mat_mumps_icntl_4  - ICNTL(4): level of printing (0 to 4)
3305 .  -mat_mumps_icntl_6  - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)
3306 .  -mat_mumps_icntl_7  - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto
3307                           Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only)
3308 .  -mat_mumps_icntl_8  - ICNTL(8): scaling strategy (-2 to 8 or 77)
3309 .  -mat_mumps_icntl_10 - ICNTL(10): max num of refinements
3310 .  -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view)
3311 .  -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)
3312 .  -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting
3313 .  -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space
3314 .  -mat_mumps_icntl_15 - ICNTL(15): compression of the input matrix resulting from a block format
3315 .  -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement
3316 .  -mat_mumps_icntl_20 - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS
3317 .  -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)
3318 .  -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor
3319 .  -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1)
3320 .  -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis
3321 .  -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix
3322 .  -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ICNTL(7) ordering, or 2 for parallel analysis and ICNTL(29) ordering
3323 .  -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis
3324 .  -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A)
3325 .  -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization
3326 .  -mat_mumps_icntl_33 - ICNTL(33): compute determinant
3327 .  -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature
3328 .  -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant
3329 .  -mat_mumps_icntl_37 - ICNTL(37): compression of the contribution blocks (CB)
3330 .  -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR
3331 .  -mat_mumps_icntl_48 - ICNTL(48): multithreading with tree parallelism
3332 .  -mat_mumps_icntl_58 - ICNTL(58): options for symbolic factorization
3333 .  -mat_mumps_cntl_1   - CNTL(1): relative pivoting threshold
3334 .  -mat_mumps_cntl_2   - CNTL(2): stopping criterion of refinement
3335 .  -mat_mumps_cntl_3   - CNTL(3): absolute pivoting threshold
3336 .  -mat_mumps_cntl_4   - CNTL(4): value for static pivoting
3337 .  -mat_mumps_cntl_5   - CNTL(5): fixation for null pivots
3338 .  -mat_mumps_cntl_7   - CNTL(7): precision of the dropping parameter used during BLR factorization
3339 -  -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS.
3340                                     Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual.
3341 
3342   Level: beginner
3343 
3344   Notes:
3345   MUMPS Cholesky does not handle (complex) Hermitian matrices (see User's Guide at <https://mumps-solver.org/index.php?page=doc>) so using it will
3346   error if the matrix is Hermitian.
3347 
3348   When used within a `KSP`/`PC` solve the options are prefixed with that of the `PC`. Otherwise one can set the options prefix by calling
3349   `MatSetOptionsPrefixFactor()` on the matrix from which the factor was obtained or `MatSetOptionsPrefix()` on the factor matrix.
3350 
3351   When a MUMPS factorization fails inside a KSP solve, for example with a `KSP_DIVERGED_PC_FAILED`, one can find the MUMPS information about
3352   the failure with
3353 .vb
3354           KSPGetPC(ksp,&pc);
3355           PCFactorGetMatrix(pc,&mat);
3356           MatMumpsGetInfo(mat,....);
3357           MatMumpsGetInfog(mat,....); etc.
3358 .ve
3359   Or run with `-ksp_error_if_not_converged` and the program will be stopped and the information printed in the error message.
3360 
3361   MUMPS provides 64-bit integer support in two build modes:
3362   full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and
3363   requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI).
3364 
3365   selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices,
3366   MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and
3367   columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit
3368   integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS.
3369 
3370   With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc.
3371 
3372   Two modes to run MUMPS/PETSc with OpenMP
3373 .vb
3374    Set `OMP_NUM_THREADS` and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP
3375    threads per rank, then you may use "export `OMP_NUM_THREADS` = 16 && mpirun -n 4 ./test".
3376 .ve
3377 
3378 .vb
3379    `-mat_mumps_use_omp_threads` [m] and run your code with as many MPI ranks as the number of cores. For example,
3380    if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16"
3381 .ve
3382 
3383    To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part
3384    (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with `--with-openmp` `--download-hwloc`
3385    (or `--with-hwloc`), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS
3386    libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS
3387    (PETSc will automatically try to utilized a threaded BLAS if `--with-openmp` is provided).
3388 
3389    If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI
3390    processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of
3391    size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm
3392    are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set
3393    by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs.
3394    In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets,
3395    if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind
3396    MPI ranks to cores, then with `-mat_mumps_use_omp_threads` 16, a master rank (and threads it spawns) will use half cores in socket 0, and half
3397    cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the
3398    problem will not happen. Therefore, when you use `-mat_mumps_use_omp_threads`, you need to keep an eye on your MPI rank mapping and CPU binding.
3399    For example, with the Slurm job scheduler, one can use srun `--cpu-bind`=verbose -m block:block to map consecutive MPI ranks to sockets and
3400    examine the mapping result.
3401 
3402    PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set `OMP_PROC_BIND` and `OMP_PLACES` in job scripts,
3403    for example, export `OMP_PLACES`=threads and export `OMP_PROC_BIND`=spread. One does not need to export `OMP_NUM_THREADS`=m in job scripts as PETSc
3404    calls `omp_set_num_threads`(m) internally before calling MUMPS.
3405 
3406    See {cite}`heroux2011bi` and {cite}`gutierrez2017accommodating`
3407 
3408 .seealso: [](ch_matrices), `Mat`, `PCFactorSetMatSolverType()`, `MatSolverType`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`, `KSPGetPC()`, `PCFactorGetMatrix()`
3409 M*/
3410 
3411 static PetscErrorCode MatFactorGetSolverType_mumps(PETSC_UNUSED Mat A, MatSolverType *type)
3412 {
3413   PetscFunctionBegin;
3414   *type = MATSOLVERMUMPS;
3415   PetscFunctionReturn(PETSC_SUCCESS);
3416 }
3417 
3418 /* MatGetFactor for Seq and MPI AIJ matrices */
3419 static PetscErrorCode MatGetFactor_aij_mumps(Mat A, MatFactorType ftype, Mat *F)
3420 {
3421   Mat         B;
3422   Mat_MUMPS  *mumps;
3423   PetscBool   isSeqAIJ, isDiag, isDense;
3424   PetscMPIInt size;
3425 
3426   PetscFunctionBegin;
3427 #if defined(PETSC_USE_COMPLEX)
3428   if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) {
3429     PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n"));
3430     *F = NULL;
3431     PetscFunctionReturn(PETSC_SUCCESS);
3432   }
3433 #endif
3434   /* Create the factorization matrix */
3435   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isSeqAIJ));
3436   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATDIAGONAL, &isDiag));
3437   PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &isDense, MATSEQDENSE, MATMPIDENSE, NULL));
3438   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
3439   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
3440   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name));
3441   PetscCall(MatSetUp(B));
3442 
3443   PetscCall(PetscNew(&mumps));
3444 
3445   B->ops->view    = MatView_MUMPS;
3446   B->ops->getinfo = MatGetInfo_MUMPS;
3447 
3448   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
3449   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
3450   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
3451   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
3452   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
3453   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
3454   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
3455   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
3456   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
3457   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
3458   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
3459   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
3460   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
3461   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
3462 
3463   if (ftype == MAT_FACTOR_LU) {
3464     B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
3465     B->factortype            = MAT_FACTOR_LU;
3466     if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij;
3467     else if (isDiag) mumps->ConvertToTriples = MatConvertToTriples_diagonal_xaij;
3468     else if (isDense) mumps->ConvertToTriples = MatConvertToTriples_dense_xaij;
3469     else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij;
3470     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]));
3471     mumps->sym = 0;
3472   } else {
3473     B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
3474     B->factortype                  = MAT_FACTOR_CHOLESKY;
3475     if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij;
3476     else if (isDiag) mumps->ConvertToTriples = MatConvertToTriples_diagonal_xaij;
3477     else if (isDense) mumps->ConvertToTriples = MatConvertToTriples_dense_xaij;
3478     else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij;
3479     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY]));
3480 #if defined(PETSC_USE_COMPLEX)
3481     mumps->sym = 2;
3482 #else
3483     if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1;
3484     else mumps->sym = 2;
3485 #endif
3486   }
3487 
3488   /* set solvertype */
3489   PetscCall(PetscFree(B->solvertype));
3490   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
3491   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
3492   if (size == 1) {
3493     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3494     B->canuseordering = PETSC_TRUE;
3495   }
3496   B->ops->destroy = MatDestroy_MUMPS;
3497   B->data         = (void *)mumps;
3498 
3499   *F               = B;
3500   mumps->id.job    = JOB_NULL;
3501   mumps->ICNTL_pre = NULL;
3502   mumps->CNTL_pre  = NULL;
3503   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
3504   PetscFunctionReturn(PETSC_SUCCESS);
3505 }
3506 
3507 /* MatGetFactor for Seq and MPI SBAIJ matrices */
3508 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A, PETSC_UNUSED MatFactorType ftype, Mat *F)
3509 {
3510   Mat         B;
3511   Mat_MUMPS  *mumps;
3512   PetscBool   isSeqSBAIJ;
3513   PetscMPIInt size;
3514 
3515   PetscFunctionBegin;
3516 #if defined(PETSC_USE_COMPLEX)
3517   if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) {
3518     PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n"));
3519     *F = NULL;
3520     PetscFunctionReturn(PETSC_SUCCESS);
3521   }
3522 #endif
3523   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
3524   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
3525   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name));
3526   PetscCall(MatSetUp(B));
3527 
3528   PetscCall(PetscNew(&mumps));
3529   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &isSeqSBAIJ));
3530   if (isSeqSBAIJ) {
3531     mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij;
3532   } else {
3533     mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij;
3534   }
3535 
3536   B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
3537   B->ops->view                   = MatView_MUMPS;
3538   B->ops->getinfo                = MatGetInfo_MUMPS;
3539 
3540   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
3541   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
3542   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
3543   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
3544   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
3545   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
3546   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
3547   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
3548   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
3549   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
3550   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
3551   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
3552   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
3553   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
3554 
3555   B->factortype = MAT_FACTOR_CHOLESKY;
3556 #if defined(PETSC_USE_COMPLEX)
3557   mumps->sym = 2;
3558 #else
3559   if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1;
3560   else mumps->sym = 2;
3561 #endif
3562 
3563   /* set solvertype */
3564   PetscCall(PetscFree(B->solvertype));
3565   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
3566   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
3567   if (size == 1) {
3568     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3569     B->canuseordering = PETSC_TRUE;
3570   }
3571   PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY]));
3572   B->ops->destroy = MatDestroy_MUMPS;
3573   B->data         = (void *)mumps;
3574 
3575   *F               = B;
3576   mumps->id.job    = JOB_NULL;
3577   mumps->ICNTL_pre = NULL;
3578   mumps->CNTL_pre  = NULL;
3579   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
3580   PetscFunctionReturn(PETSC_SUCCESS);
3581 }
3582 
3583 static PetscErrorCode MatGetFactor_baij_mumps(Mat A, MatFactorType ftype, Mat *F)
3584 {
3585   Mat         B;
3586   Mat_MUMPS  *mumps;
3587   PetscBool   isSeqBAIJ;
3588   PetscMPIInt size;
3589 
3590   PetscFunctionBegin;
3591   /* Create the factorization matrix */
3592   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &isSeqBAIJ));
3593   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
3594   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
3595   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name));
3596   PetscCall(MatSetUp(B));
3597 
3598   PetscCall(PetscNew(&mumps));
3599   if (ftype == MAT_FACTOR_LU) {
3600     B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS;
3601     B->factortype            = MAT_FACTOR_LU;
3602     if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij;
3603     else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij;
3604     mumps->sym = 0;
3605     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]));
3606   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead");
3607 
3608   B->ops->view    = MatView_MUMPS;
3609   B->ops->getinfo = MatGetInfo_MUMPS;
3610 
3611   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
3612   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
3613   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
3614   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
3615   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
3616   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
3617   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
3618   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
3619   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
3620   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
3621   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
3622   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
3623   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
3624   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
3625 
3626   /* set solvertype */
3627   PetscCall(PetscFree(B->solvertype));
3628   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
3629   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
3630   if (size == 1) {
3631     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3632     B->canuseordering = PETSC_TRUE;
3633   }
3634   B->ops->destroy = MatDestroy_MUMPS;
3635   B->data         = (void *)mumps;
3636 
3637   *F               = B;
3638   mumps->id.job    = JOB_NULL;
3639   mumps->ICNTL_pre = NULL;
3640   mumps->CNTL_pre  = NULL;
3641   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
3642   PetscFunctionReturn(PETSC_SUCCESS);
3643 }
3644 
3645 /* MatGetFactor for Seq and MPI SELL matrices */
3646 static PetscErrorCode MatGetFactor_sell_mumps(Mat A, MatFactorType ftype, Mat *F)
3647 {
3648   Mat         B;
3649   Mat_MUMPS  *mumps;
3650   PetscBool   isSeqSELL;
3651   PetscMPIInt size;
3652 
3653   PetscFunctionBegin;
3654   /* Create the factorization matrix */
3655   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSELL, &isSeqSELL));
3656   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
3657   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
3658   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name));
3659   PetscCall(MatSetUp(B));
3660 
3661   PetscCall(PetscNew(&mumps));
3662 
3663   B->ops->view    = MatView_MUMPS;
3664   B->ops->getinfo = MatGetInfo_MUMPS;
3665 
3666   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
3667   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
3668   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
3669   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
3670   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
3671   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
3672   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
3673   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
3674   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
3675   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
3676   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
3677   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
3678 
3679   if (ftype == MAT_FACTOR_LU) {
3680     B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
3681     B->factortype            = MAT_FACTOR_LU;
3682     if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij;
3683     else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented");
3684     mumps->sym = 0;
3685     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]));
3686   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented");
3687 
3688   /* set solvertype */
3689   PetscCall(PetscFree(B->solvertype));
3690   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
3691   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
3692   if (size == 1) {
3693     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization  */
3694     B->canuseordering = PETSC_TRUE;
3695   }
3696   B->ops->destroy = MatDestroy_MUMPS;
3697   B->data         = (void *)mumps;
3698 
3699   *F               = B;
3700   mumps->id.job    = JOB_NULL;
3701   mumps->ICNTL_pre = NULL;
3702   mumps->CNTL_pre  = NULL;
3703   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
3704   PetscFunctionReturn(PETSC_SUCCESS);
3705 }
3706 
3707 /* MatGetFactor for MATNEST matrices */
3708 static PetscErrorCode MatGetFactor_nest_mumps(Mat A, MatFactorType ftype, Mat *F)
3709 {
3710   Mat         B, **mats;
3711   Mat_MUMPS  *mumps;
3712   PetscInt    nr, nc;
3713   PetscMPIInt size;
3714   PetscBool   flg = PETSC_TRUE;
3715 
3716   PetscFunctionBegin;
3717 #if defined(PETSC_USE_COMPLEX)
3718   if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) {
3719     PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n"));
3720     *F = NULL;
3721     PetscFunctionReturn(PETSC_SUCCESS);
3722   }
3723 #endif
3724 
3725   /* Return if some condition is not satisfied */
3726   *F = NULL;
3727   PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats));
3728   if (ftype == MAT_FACTOR_CHOLESKY) {
3729     IS       *rows, *cols;
3730     PetscInt *m, *M;
3731 
3732     PetscCheck(nr == nc, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MAT_FACTOR_CHOLESKY not supported for nest sizes %" PetscInt_FMT " != %" PetscInt_FMT ". Use MAT_FACTOR_LU.", nr, nc);
3733     PetscCall(PetscMalloc2(nr, &rows, nc, &cols));
3734     PetscCall(MatNestGetISs(A, rows, cols));
3735     for (PetscInt r = 0; flg && r < nr; r++) PetscCall(ISEqualUnsorted(rows[r], cols[r], &flg));
3736     if (!flg) {
3737       PetscCall(PetscFree2(rows, cols));
3738       PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for unequal row and column maps. Use MAT_FACTOR_LU.\n"));
3739       PetscFunctionReturn(PETSC_SUCCESS);
3740     }
3741     PetscCall(PetscMalloc2(nr, &m, nr, &M));
3742     for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetMinMax(rows[r], &m[r], &M[r]));
3743     for (PetscInt r = 0; flg && r < nr; r++)
3744       for (PetscInt k = r + 1; flg && k < nr; k++)
3745         if ((m[k] <= m[r] && m[r] <= M[k]) || (m[k] <= M[r] && M[r] <= M[k])) flg = PETSC_FALSE;
3746     PetscCall(PetscFree2(m, M));
3747     PetscCall(PetscFree2(rows, cols));
3748     if (!flg) {
3749       PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for intersecting row maps. Use MAT_FACTOR_LU.\n"));
3750       PetscFunctionReturn(PETSC_SUCCESS);
3751     }
3752   }
3753 
3754   for (PetscInt r = 0; r < nr; r++) {
3755     for (PetscInt c = 0; c < nc; c++) {
3756       Mat       sub = mats[r][c];
3757       PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isDiag, isDense;
3758 
3759       if (!sub || (ftype == MAT_FACTOR_CHOLESKY && c < r)) continue;
3760       PetscCall(MatGetTranspose_TransposeVirtual(&sub, NULL, NULL, NULL, NULL));
3761       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ));
3762       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ));
3763       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ));
3764       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ));
3765       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ));
3766       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ));
3767       PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATDIAGONAL, &isDiag));
3768       PetscCall(PetscObjectTypeCompareAny((PetscObject)sub, &isDense, MATSEQDENSE, MATMPIDENSE, NULL));
3769       if (ftype == MAT_FACTOR_CHOLESKY) {
3770         if (r == c) {
3771           if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isSeqSBAIJ && !isMPISBAIJ && !isDiag && !isDense) {
3772             PetscCall(PetscInfo(sub, "MAT_FACTOR_CHOLESKY not supported for diagonal block of type %s.\n", ((PetscObject)sub)->type_name));
3773             flg = PETSC_FALSE;
3774           }
3775         } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isDiag && !isDense) {
3776           PetscCall(PetscInfo(sub, "MAT_FACTOR_CHOLESKY not supported for off-diagonal block of type %s.\n", ((PetscObject)sub)->type_name));
3777           flg = PETSC_FALSE;
3778         }
3779       } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isDiag && !isDense) {
3780         PetscCall(PetscInfo(sub, "MAT_FACTOR_LU not supported for block of type %s.\n", ((PetscObject)sub)->type_name));
3781         flg = PETSC_FALSE;
3782       }
3783     }
3784   }
3785   if (!flg) PetscFunctionReturn(PETSC_SUCCESS);
3786 
3787   /* Create the factorization matrix */
3788   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
3789   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
3790   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name));
3791   PetscCall(MatSetUp(B));
3792 
3793   PetscCall(PetscNew(&mumps));
3794 
3795   B->ops->view    = MatView_MUMPS;
3796   B->ops->getinfo = MatGetInfo_MUMPS;
3797 
3798   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
3799   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
3800   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
3801   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
3802   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
3803   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
3804   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
3805   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
3806   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
3807   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
3808   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
3809   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
3810   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
3811   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
3812 
3813   if (ftype == MAT_FACTOR_LU) {
3814     B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
3815     B->factortype            = MAT_FACTOR_LU;
3816     mumps->sym               = 0;
3817   } else {
3818     B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
3819     B->factortype                  = MAT_FACTOR_CHOLESKY;
3820 #if defined(PETSC_USE_COMPLEX)
3821     mumps->sym = 2;
3822 #else
3823     if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1;
3824     else mumps->sym = 2;
3825 #endif
3826   }
3827   mumps->ConvertToTriples = MatConvertToTriples_nest_xaij;
3828   PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[ftype]));
3829 
3830   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
3831   if (size == 1) {
3832     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3833     B->canuseordering = PETSC_TRUE;
3834   }
3835 
3836   /* set solvertype */
3837   PetscCall(PetscFree(B->solvertype));
3838   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
3839   B->ops->destroy = MatDestroy_MUMPS;
3840   B->data         = (void *)mumps;
3841 
3842   *F               = B;
3843   mumps->id.job    = JOB_NULL;
3844   mumps->ICNTL_pre = NULL;
3845   mumps->CNTL_pre  = NULL;
3846   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
3847   PetscFunctionReturn(PETSC_SUCCESS);
3848 }
3849 
3850 PETSC_INTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void)
3851 {
3852   PetscFunctionBegin;
3853   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
3854   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
3855   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps));
3856   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps));
3857   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPISBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps));
3858   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
3859   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
3860   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps));
3861   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps));
3862   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps));
3863   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSELL, MAT_FACTOR_LU, MatGetFactor_sell_mumps));
3864   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATDIAGONAL, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
3865   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATDIAGONAL, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
3866   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQDENSE, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
3867   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQDENSE, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
3868   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIDENSE, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
3869   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIDENSE, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
3870   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_LU, MatGetFactor_nest_mumps));
3871   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_CHOLESKY, MatGetFactor_nest_mumps));
3872   PetscFunctionReturn(PETSC_SUCCESS);
3873 }
3874