xref: /petsc/src/mat/impls/aij/mpi/mumps/mumps.c (revision a29dfd43bb0c77e2653d3bfa2c953f902720a6d2)
1 /*
2     Provides an interface to the MUMPS sparse solver
3 */
4 #include <petscpkg_version.h>
5 #include <petscsf.h>
6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I  "petscmat.h"  I*/
7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
8 #include <../src/mat/impls/sell/mpi/mpisell.h>
9 
10 #define MUMPS_MANUALS "(see users manual https://mumps-solver.org/index.php?page=doc \"Error and warning diagnostics\")"
11 
12 EXTERN_C_BEGIN
13 #if defined(PETSC_USE_COMPLEX)
14   #if defined(PETSC_USE_REAL_SINGLE)
15     #include <cmumps_c.h>
16   #else
17     #include <zmumps_c.h>
18   #endif
19 #else
20   #if defined(PETSC_USE_REAL_SINGLE)
21     #include <smumps_c.h>
22   #else
23     #include <dmumps_c.h>
24   #endif
25 #endif
26 EXTERN_C_END
27 #define JOB_INIT         -1
28 #define JOB_NULL         0
29 #define JOB_FACTSYMBOLIC 1
30 #define JOB_FACTNUMERIC  2
31 #define JOB_SOLVE        3
32 #define JOB_END          -2
33 
34 /* calls to MUMPS */
35 #if defined(PETSC_USE_COMPLEX)
36   #if defined(PETSC_USE_REAL_SINGLE)
37     #define MUMPS_c cmumps_c
38   #else
39     #define MUMPS_c zmumps_c
40   #endif
41 #else
42   #if defined(PETSC_USE_REAL_SINGLE)
43     #define MUMPS_c smumps_c
44   #else
45     #define MUMPS_c dmumps_c
46   #endif
47 #endif
48 
49 /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for
50    number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the
51    naming convention in PetscMPIInt, PetscBLASInt etc.
52 */
53 typedef MUMPS_INT PetscMUMPSInt;
54 
55 #if PETSC_PKG_MUMPS_VERSION_GE(5, 3, 0)
56   #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */
57     #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out"
58   #endif
59 #else
60   #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */
61     #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out"
62   #endif
63 #endif
64 
65 #define MPIU_MUMPSINT       MPI_INT
66 #define PETSC_MUMPS_INT_MAX 2147483647
67 #define PETSC_MUMPS_INT_MIN -2147483648
68 
69 /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/
70 static inline PetscErrorCode PetscMUMPSIntCast(PetscInt a, PetscMUMPSInt *b)
71 {
72   PetscFunctionBegin;
73 #if PetscDefined(USE_64BIT_INDICES)
74   PetscAssert(a <= PETSC_MUMPS_INT_MAX && a >= PETSC_MUMPS_INT_MIN, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt");
75 #endif
76   *b = (PetscMUMPSInt)(a);
77   PetscFunctionReturn(PETSC_SUCCESS);
78 }
79 
80 /* Put these utility routines here since they are only used in this file */
81 static inline PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems *PetscOptionsObject, const char opt[], const char text[], const char man[], PetscMUMPSInt currentvalue, PetscMUMPSInt *value, PetscBool *set, PetscMUMPSInt lb, PetscMUMPSInt ub)
82 {
83   PetscInt  myval;
84   PetscBool myset;
85 
86   PetscFunctionBegin;
87   /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */
88   PetscCall(PetscOptionsInt_Private(PetscOptionsObject, opt, text, man, (PetscInt)currentvalue, &myval, &myset, lb, ub));
89   if (myset) PetscCall(PetscMUMPSIntCast(myval, value));
90   if (set) *set = myset;
91   PetscFunctionReturn(PETSC_SUCCESS);
92 }
93 #define PetscOptionsMUMPSInt(a, b, c, d, e, f) PetscOptionsMUMPSInt_Private(PetscOptionsObject, a, b, c, d, e, f, PETSC_MUMPS_INT_MIN, PETSC_MUMPS_INT_MAX)
94 
95 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */
96 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
97   #define PetscMUMPS_c(mumps) \
98     do { \
99       if (mumps->use_petsc_omp_support) { \
100         if (mumps->is_omp_master) { \
101           PetscCall(PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl)); \
102           PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \
103           PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \
104           PetscCall(PetscFPTrapPop()); \
105           PetscCall(PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl)); \
106         } \
107         PetscCall(PetscOmpCtrlBarrier(mumps->omp_ctrl)); \
108         /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific      \
109          to processes, so we only Bcast info[1], an error code and leave others (since they do not have   \
110          an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82.                   \
111          omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \
112       */ \
113         PetscCallMPI(MPI_Bcast(mumps->id.infog, PETSC_STATIC_ARRAY_LENGTH(mumps->id.infog), MPIU_MUMPSINT, 0, mumps->omp_comm)); \
114         PetscCallMPI(MPI_Bcast(mumps->id.rinfog, PETSC_STATIC_ARRAY_LENGTH(mumps->id.rinfog), MPIU_REAL, 0, mumps->omp_comm)); \
115         PetscCallMPI(MPI_Bcast(mumps->id.info, PETSC_STATIC_ARRAY_LENGTH(mumps->id.info), MPIU_MUMPSINT, 0, mumps->omp_comm)); \
116         PetscCallMPI(MPI_Bcast(mumps->id.rinfo, PETSC_STATIC_ARRAY_LENGTH(mumps->id.rinfo), MPIU_REAL, 0, mumps->omp_comm)); \
117       } else { \
118         PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \
119         PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \
120         PetscCall(PetscFPTrapPop()); \
121       } \
122     } while (0)
123 #else
124   #define PetscMUMPS_c(mumps) \
125     do { \
126       PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \
127       PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \
128       PetscCall(PetscFPTrapPop()); \
129     } while (0)
130 #endif
131 
132 /* declare MumpsScalar */
133 #if defined(PETSC_USE_COMPLEX)
134   #if defined(PETSC_USE_REAL_SINGLE)
135     #define MumpsScalar mumps_complex
136   #else
137     #define MumpsScalar mumps_double_complex
138   #endif
139 #else
140   #define MumpsScalar PetscScalar
141 #endif
142 
143 /* macros s.t. indices match MUMPS documentation */
144 #define ICNTL(I)  icntl[(I) - 1]
145 #define CNTL(I)   cntl[(I) - 1]
146 #define INFOG(I)  infog[(I) - 1]
147 #define INFO(I)   info[(I) - 1]
148 #define RINFOG(I) rinfog[(I) - 1]
149 #define RINFO(I)  rinfo[(I) - 1]
150 
151 typedef struct Mat_MUMPS Mat_MUMPS;
152 struct Mat_MUMPS {
153 #if defined(PETSC_USE_COMPLEX)
154   #if defined(PETSC_USE_REAL_SINGLE)
155   CMUMPS_STRUC_C id;
156   #else
157   ZMUMPS_STRUC_C id;
158   #endif
159 #else
160   #if defined(PETSC_USE_REAL_SINGLE)
161   SMUMPS_STRUC_C id;
162   #else
163   DMUMPS_STRUC_C id;
164   #endif
165 #endif
166 
167   MatStructure   matstruc;
168   PetscMPIInt    myid, petsc_size;
169   PetscMUMPSInt *irn, *jcn;       /* the (i,j,v) triplets passed to mumps. */
170   PetscScalar   *val, *val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */
171   PetscInt64     nnz;             /* number of nonzeros. The type is called selective 64-bit in mumps */
172   PetscMUMPSInt  sym;
173   MPI_Comm       mumps_comm;
174   PetscMUMPSInt *ICNTL_pre;
175   PetscReal     *CNTL_pre;
176   PetscMUMPSInt  ICNTL9_pre;         /* check if ICNTL(9) is changed from previous MatSolve */
177   VecScatter     scat_rhs, scat_sol; /* used by MatSolve() */
178   PetscMUMPSInt  ICNTL20;            /* use centralized (0) or distributed (10) dense RHS */
179   PetscMUMPSInt  lrhs_loc, nloc_rhs, *irhs_loc;
180 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
181   PetscInt    *rhs_nrow, max_nrhs;
182   PetscMPIInt *rhs_recvcounts, *rhs_disps;
183   PetscScalar *rhs_loc, *rhs_recvbuf;
184 #endif
185   Vec            b_seq, x_seq;
186   PetscInt       ninfo, *info; /* which INFO to display */
187   PetscInt       sizeredrhs;
188   PetscScalar   *schur_sol;
189   PetscInt       schur_sizesol;
190   PetscMUMPSInt *ia_alloc, *ja_alloc; /* work arrays used for the CSR struct for sparse rhs */
191   PetscInt64     cur_ilen, cur_jlen;  /* current len of ia_alloc[], ja_alloc[] */
192   PetscErrorCode (*ConvertToTriples)(Mat, PetscInt, MatReuse, Mat_MUMPS *);
193 
194   /* Support for MATNEST */
195   PetscErrorCode (**nest_convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *);
196   PetscInt64  *nest_vals_start;
197   PetscScalar *nest_vals;
198 
199   /* stuff used by petsc/mumps OpenMP support*/
200   PetscBool    use_petsc_omp_support;
201   PetscOmpCtrl omp_ctrl;             /* an OpenMP controller that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */
202   MPI_Comm     petsc_comm, omp_comm; /* petsc_comm is petsc matrix's comm */
203   PetscInt64  *recvcount;            /* a collection of nnz on omp_master */
204   PetscMPIInt  tag, omp_comm_size;
205   PetscBool    is_omp_master; /* is this rank the master of omp_comm */
206   MPI_Request *reqs;
207 };
208 
209 /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt.
210    Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices.
211  */
212 static PetscErrorCode PetscMUMPSIntCSRCast(PETSC_UNUSED Mat_MUMPS *mumps, PetscInt nrow, PetscInt *ia, PetscInt *ja, PetscMUMPSInt **ia_mumps, PetscMUMPSInt **ja_mumps, PetscMUMPSInt *nnz_mumps)
213 {
214   PetscInt nnz = ia[nrow] - 1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscInt64 since mumps only uses PetscMUMPSInt for rhs */
215 
216   PetscFunctionBegin;
217 #if defined(PETSC_USE_64BIT_INDICES)
218   {
219     PetscInt i;
220     if (nrow + 1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */
221       PetscCall(PetscFree(mumps->ia_alloc));
222       PetscCall(PetscMalloc1(nrow + 1, &mumps->ia_alloc));
223       mumps->cur_ilen = nrow + 1;
224     }
225     if (nnz > mumps->cur_jlen) {
226       PetscCall(PetscFree(mumps->ja_alloc));
227       PetscCall(PetscMalloc1(nnz, &mumps->ja_alloc));
228       mumps->cur_jlen = nnz;
229     }
230     for (i = 0; i < nrow + 1; i++) PetscCall(PetscMUMPSIntCast(ia[i], &mumps->ia_alloc[i]));
231     for (i = 0; i < nnz; i++) PetscCall(PetscMUMPSIntCast(ja[i], &mumps->ja_alloc[i]));
232     *ia_mumps = mumps->ia_alloc;
233     *ja_mumps = mumps->ja_alloc;
234   }
235 #else
236   *ia_mumps = ia;
237   *ja_mumps = ja;
238 #endif
239   PetscCall(PetscMUMPSIntCast(nnz, nnz_mumps));
240   PetscFunctionReturn(PETSC_SUCCESS);
241 }
242 
243 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS *mumps)
244 {
245   PetscFunctionBegin;
246   PetscCall(PetscFree(mumps->id.listvar_schur));
247   PetscCall(PetscFree(mumps->id.redrhs));
248   PetscCall(PetscFree(mumps->schur_sol));
249   mumps->id.size_schur = 0;
250   mumps->id.schur_lld  = 0;
251   mumps->id.ICNTL(19)  = 0;
252   PetscFunctionReturn(PETSC_SUCCESS);
253 }
254 
255 /* solve with rhs in mumps->id.redrhs and return in the same location */
256 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F)
257 {
258   Mat_MUMPS           *mumps = (Mat_MUMPS *)F->data;
259   Mat                  S, B, X;
260   MatFactorSchurStatus schurstatus;
261   PetscInt             sizesol;
262 
263   PetscFunctionBegin;
264   PetscCall(MatFactorFactorizeSchurComplement(F));
265   PetscCall(MatFactorGetSchurComplement(F, &S, &schurstatus));
266   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &B));
267   PetscCall(MatSetType(B, ((PetscObject)S)->type_name));
268 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
269   PetscCall(MatBindToCPU(B, S->boundtocpu));
270 #endif
271   switch (schurstatus) {
272   case MAT_FACTOR_SCHUR_FACTORED:
273     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &X));
274     PetscCall(MatSetType(X, ((PetscObject)S)->type_name));
275 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
276     PetscCall(MatBindToCPU(X, S->boundtocpu));
277 #endif
278     if (!mumps->id.ICNTL(9)) { /* transpose solve */
279       PetscCall(MatMatSolveTranspose(S, B, X));
280     } else {
281       PetscCall(MatMatSolve(S, B, X));
282     }
283     break;
284   case MAT_FACTOR_SCHUR_INVERTED:
285     sizesol = mumps->id.nrhs * mumps->id.size_schur;
286     if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) {
287       PetscCall(PetscFree(mumps->schur_sol));
288       PetscCall(PetscMalloc1(sizesol, &mumps->schur_sol));
289       mumps->schur_sizesol = sizesol;
290     }
291     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->schur_sol, &X));
292     PetscCall(MatSetType(X, ((PetscObject)S)->type_name));
293 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
294     PetscCall(MatBindToCPU(X, S->boundtocpu));
295 #endif
296     PetscCall(MatProductCreateWithMat(S, B, NULL, X));
297     if (!mumps->id.ICNTL(9)) { /* transpose solve */
298       PetscCall(MatProductSetType(X, MATPRODUCT_AtB));
299     } else {
300       PetscCall(MatProductSetType(X, MATPRODUCT_AB));
301     }
302     PetscCall(MatProductSetFromOptions(X));
303     PetscCall(MatProductSymbolic(X));
304     PetscCall(MatProductNumeric(X));
305 
306     PetscCall(MatCopy(X, B, SAME_NONZERO_PATTERN));
307     break;
308   default:
309     SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unhandled MatFactorSchurStatus %d", F->schur_status);
310   }
311   PetscCall(MatFactorRestoreSchurComplement(F, &S, schurstatus));
312   PetscCall(MatDestroy(&B));
313   PetscCall(MatDestroy(&X));
314   PetscFunctionReturn(PETSC_SUCCESS);
315 }
316 
317 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion)
318 {
319   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
320 
321   PetscFunctionBegin;
322   if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */
323     PetscFunctionReturn(PETSC_SUCCESS);
324   }
325   if (!expansion) { /* prepare for the condensation step */
326     PetscInt sizeredrhs = mumps->id.nrhs * mumps->id.size_schur;
327     /* allocate MUMPS internal array to store reduced right-hand sides */
328     if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) {
329       PetscCall(PetscFree(mumps->id.redrhs));
330       mumps->id.lredrhs = mumps->id.size_schur;
331       PetscCall(PetscMalloc1(mumps->id.nrhs * mumps->id.lredrhs, &mumps->id.redrhs));
332       mumps->sizeredrhs = mumps->id.nrhs * mumps->id.lredrhs;
333     }
334   } else { /* prepare for the expansion step */
335     /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */
336     PetscCall(MatMumpsSolveSchur_Private(F));
337     mumps->id.ICNTL(26) = 2; /* expansion phase */
338     PetscMUMPS_c(mumps);
339     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
340     /* restore defaults */
341     mumps->id.ICNTL(26) = -1;
342     /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */
343     if (mumps->id.nrhs > 1) {
344       PetscCall(PetscFree(mumps->id.redrhs));
345       mumps->id.lredrhs = 0;
346       mumps->sizeredrhs = 0;
347     }
348   }
349   PetscFunctionReturn(PETSC_SUCCESS);
350 }
351 
352 /*
353   MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz]
354 
355   input:
356     A       - matrix in aij,baij or sbaij format
357     shift   - 0: C style output triple; 1: Fortran style output triple.
358     reuse   - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple
359               MAT_REUSE_MATRIX:   only the values in v array are updated
360   output:
361     nnz     - dim of r, c, and v (number of local nonzero entries of A)
362     r, c, v - row and col index, matrix values (matrix triples)
363 
364   The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is
365   freed with PetscFree(mumps->irn);  This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means
366   that the PetscMalloc() cannot easily be replaced with a PetscMalloc3().
367 
368  */
369 
370 static PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
371 {
372   const PetscScalar *av;
373   const PetscInt    *ai, *aj, *ajj, M = A->rmap->n;
374   PetscInt64         nz, rnz, i, j, k;
375   PetscMUMPSInt     *row, *col;
376   Mat_SeqAIJ        *aa = (Mat_SeqAIJ *)A->data;
377 
378   PetscFunctionBegin;
379   PetscCall(MatSeqAIJGetArrayRead(A, &av));
380   if (reuse == MAT_INITIAL_MATRIX) {
381     nz = aa->nz;
382     ai = aa->i;
383     aj = aa->j;
384     PetscCall(PetscMalloc2(nz, &row, nz, &col));
385     for (i = k = 0; i < M; i++) {
386       rnz = ai[i + 1] - ai[i];
387       ajj = aj + ai[i];
388       for (j = 0; j < rnz; j++) {
389         PetscCall(PetscMUMPSIntCast(i + shift, &row[k]));
390         PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[k]));
391         k++;
392       }
393     }
394     mumps->val = (PetscScalar *)av;
395     mumps->irn = row;
396     mumps->jcn = col;
397     mumps->nnz = nz;
398   } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, av, aa->nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqaij_seqaij(), so one needs to copy the memory */
399   else mumps->val = (PetscScalar *)av;                                           /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */
400   PetscCall(MatSeqAIJRestoreArrayRead(A, &av));
401   PetscFunctionReturn(PETSC_SUCCESS);
402 }
403 
404 static PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
405 {
406   PetscInt64     nz, i, j, k, r;
407   Mat_SeqSELL   *a = (Mat_SeqSELL *)A->data;
408   PetscMUMPSInt *row, *col;
409 
410   PetscFunctionBegin;
411   nz = a->sliidx[a->totalslices];
412   if (reuse == MAT_INITIAL_MATRIX) {
413     PetscCall(PetscMalloc2(nz, &row, nz, &col));
414     for (i = k = 0; i < a->totalslices; i++) {
415       for (j = a->sliidx[i], r = 0; j < a->sliidx[i + 1]; j++, r = ((r + 1) & 0x07)) PetscCall(PetscMUMPSIntCast(8 * i + r + shift, &row[k++]));
416     }
417     for (i = 0; i < nz; i++) PetscCall(PetscMUMPSIntCast(a->colidx[i] + shift, &col[i]));
418     mumps->irn = row;
419     mumps->jcn = col;
420     mumps->nnz = nz;
421     mumps->val = a->val;
422   } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, a->val, nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqsell_seqaij(), so one needs to copy the memory */
423   else mumps->val = a->val;                                                      /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */
424   PetscFunctionReturn(PETSC_SUCCESS);
425 }
426 
427 static PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
428 {
429   Mat_SeqBAIJ    *aa = (Mat_SeqBAIJ *)A->data;
430   const PetscInt *ai, *aj, *ajj, bs2 = aa->bs2;
431   PetscInt64      M, nz = bs2 * aa->nz, idx = 0, rnz, i, j, k, m;
432   PetscInt        bs;
433   PetscMUMPSInt  *row, *col;
434 
435   PetscFunctionBegin;
436   if (reuse == MAT_INITIAL_MATRIX) {
437     PetscCall(MatGetBlockSize(A, &bs));
438     M  = A->rmap->N / bs;
439     ai = aa->i;
440     aj = aa->j;
441     PetscCall(PetscMalloc2(nz, &row, nz, &col));
442     for (i = 0; i < M; i++) {
443       ajj = aj + ai[i];
444       rnz = ai[i + 1] - ai[i];
445       for (k = 0; k < rnz; k++) {
446         for (j = 0; j < bs; j++) {
447           for (m = 0; m < bs; m++) {
448             PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[idx]));
449             PetscCall(PetscMUMPSIntCast(bs * ajj[k] + j + shift, &col[idx]));
450             idx++;
451           }
452         }
453       }
454     }
455     mumps->irn = row;
456     mumps->jcn = col;
457     mumps->nnz = nz;
458     mumps->val = aa->a;
459   } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, aa->a, nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqbaij_seqaij(), so one needs to copy the memory */
460   else mumps->val = aa->a;                                                      /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */
461   PetscFunctionReturn(PETSC_SUCCESS);
462 }
463 
464 static PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
465 {
466   const PetscInt *ai, *aj, *ajj;
467   PetscInt        bs;
468   PetscInt64      nz, rnz, i, j, k, m;
469   PetscMUMPSInt  *row, *col;
470   PetscScalar    *val;
471   Mat_SeqSBAIJ   *aa  = (Mat_SeqSBAIJ *)A->data;
472   const PetscInt  bs2 = aa->bs2, mbs = aa->mbs;
473 #if defined(PETSC_USE_COMPLEX)
474   PetscBool isset, hermitian;
475 #endif
476 
477   PetscFunctionBegin;
478 #if defined(PETSC_USE_COMPLEX)
479   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
480   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
481 #endif
482   ai = aa->i;
483   aj = aa->j;
484   PetscCall(MatGetBlockSize(A, &bs));
485   if (reuse == MAT_INITIAL_MATRIX) {
486     const PetscInt64 alloc_size = aa->nz * bs2;
487 
488     PetscCall(PetscMalloc2(alloc_size, &row, alloc_size, &col));
489     if (bs > 1) {
490       PetscCall(PetscMalloc1(alloc_size, &mumps->val_alloc));
491       mumps->val = mumps->val_alloc;
492     } else {
493       mumps->val = aa->a;
494     }
495     mumps->irn = row;
496     mumps->jcn = col;
497   } else {
498     row = mumps->irn;
499     col = mumps->jcn;
500   }
501   val = mumps->val;
502 
503   nz = 0;
504   if (bs > 1) {
505     for (i = 0; i < mbs; i++) {
506       rnz = ai[i + 1] - ai[i];
507       ajj = aj + ai[i];
508       for (j = 0; j < rnz; j++) {
509         for (k = 0; k < bs; k++) {
510           for (m = 0; m < bs; m++) {
511             if (ajj[j] > i || k >= m) {
512               if (reuse == MAT_INITIAL_MATRIX) {
513                 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[nz]));
514                 PetscCall(PetscMUMPSIntCast(ajj[j] * bs + k + shift, &col[nz]));
515               }
516               val[nz++] = aa->a[(ai[i] + j) * bs2 + m + k * bs];
517             }
518           }
519         }
520       }
521     }
522   } else if (reuse == MAT_INITIAL_MATRIX) {
523     for (i = 0; i < mbs; i++) {
524       rnz = ai[i + 1] - ai[i];
525       ajj = aj + ai[i];
526       for (j = 0; j < rnz; j++) {
527         PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
528         PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz]));
529         nz++;
530       }
531     }
532     PetscCheck(nz == aa->nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different numbers of nonzeros %" PetscInt64_FMT " != %" PetscInt_FMT, nz, aa->nz);
533   } else if (mumps->nest_vals)
534     PetscCall(PetscArraycpy(mumps->val, aa->a, aa->nz)); /* bs == 1 and MAT_REUSE_MATRIX, MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqsbaij_seqsbaij(), so one needs to copy the memory */
535   else mumps->val = aa->a;                               /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */
536   if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz;
537   PetscFunctionReturn(PETSC_SUCCESS);
538 }
539 
540 static PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
541 {
542   const PetscInt    *ai, *aj, *ajj, *adiag, M = A->rmap->n;
543   PetscInt64         nz, rnz, i, j;
544   const PetscScalar *av, *v1;
545   PetscScalar       *val;
546   PetscMUMPSInt     *row, *col;
547   Mat_SeqAIJ        *aa = (Mat_SeqAIJ *)A->data;
548   PetscBool          missing;
549 #if defined(PETSC_USE_COMPLEX)
550   PetscBool hermitian, isset;
551 #endif
552 
553   PetscFunctionBegin;
554 #if defined(PETSC_USE_COMPLEX)
555   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
556   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
557 #endif
558   PetscCall(MatSeqAIJGetArrayRead(A, &av));
559   ai    = aa->i;
560   aj    = aa->j;
561   adiag = aa->diag;
562   PetscCall(MatMissingDiagonal_SeqAIJ(A, &missing, NULL));
563   if (reuse == MAT_INITIAL_MATRIX) {
564     /* count nz in the upper triangular part of A */
565     nz = 0;
566     if (missing) {
567       for (i = 0; i < M; i++) {
568         if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
569           for (j = ai[i]; j < ai[i + 1]; j++) {
570             if (aj[j] < i) continue;
571             nz++;
572           }
573         } else {
574           nz += ai[i + 1] - adiag[i];
575         }
576       }
577     } else {
578       for (i = 0; i < M; i++) nz += ai[i + 1] - adiag[i];
579     }
580     PetscCall(PetscMalloc2(nz, &row, nz, &col));
581     PetscCall(PetscMalloc1(nz, &val));
582     mumps->nnz = nz;
583     mumps->irn = row;
584     mumps->jcn = col;
585     mumps->val = mumps->val_alloc = val;
586 
587     nz = 0;
588     if (missing) {
589       for (i = 0; i < M; i++) {
590         if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
591           for (j = ai[i]; j < ai[i + 1]; j++) {
592             if (aj[j] < i) continue;
593             PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
594             PetscCall(PetscMUMPSIntCast(aj[j] + shift, &col[nz]));
595             val[nz] = av[j];
596             nz++;
597           }
598         } else {
599           rnz = ai[i + 1] - adiag[i];
600           ajj = aj + adiag[i];
601           v1  = av + adiag[i];
602           for (j = 0; j < rnz; j++) {
603             PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
604             PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz]));
605             val[nz++] = v1[j];
606           }
607         }
608       }
609     } else {
610       for (i = 0; i < M; i++) {
611         rnz = ai[i + 1] - adiag[i];
612         ajj = aj + adiag[i];
613         v1  = av + adiag[i];
614         for (j = 0; j < rnz; j++) {
615           PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
616           PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz]));
617           val[nz++] = v1[j];
618         }
619       }
620     }
621   } else {
622     nz  = 0;
623     val = mumps->val;
624     if (missing) {
625       for (i = 0; i < M; i++) {
626         if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
627           for (j = ai[i]; j < ai[i + 1]; j++) {
628             if (aj[j] < i) continue;
629             val[nz++] = av[j];
630           }
631         } else {
632           rnz = ai[i + 1] - adiag[i];
633           v1  = av + adiag[i];
634           for (j = 0; j < rnz; j++) val[nz++] = v1[j];
635         }
636       }
637     } else {
638       for (i = 0; i < M; i++) {
639         rnz = ai[i + 1] - adiag[i];
640         v1  = av + adiag[i];
641         for (j = 0; j < rnz; j++) val[nz++] = v1[j];
642       }
643     }
644   }
645   PetscCall(MatSeqAIJRestoreArrayRead(A, &av));
646   PetscFunctionReturn(PETSC_SUCCESS);
647 }
648 
649 static PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
650 {
651   const PetscInt    *ai, *aj, *bi, *bj, *garray, *ajj, *bjj;
652   PetscInt           bs;
653   PetscInt64         rstart, nz, i, j, k, m, jj, irow, countA, countB;
654   PetscMUMPSInt     *row, *col;
655   const PetscScalar *av, *bv, *v1, *v2;
656   PetscScalar       *val;
657   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ *)A->data;
658   Mat_SeqSBAIJ      *aa  = (Mat_SeqSBAIJ *)mat->A->data;
659   Mat_SeqBAIJ       *bb  = (Mat_SeqBAIJ *)mat->B->data;
660   const PetscInt     bs2 = aa->bs2, mbs = aa->mbs;
661 #if defined(PETSC_USE_COMPLEX)
662   PetscBool hermitian, isset;
663 #endif
664 
665   PetscFunctionBegin;
666 #if defined(PETSC_USE_COMPLEX)
667   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
668   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
669 #endif
670   PetscCall(MatGetBlockSize(A, &bs));
671   rstart = A->rmap->rstart;
672   ai     = aa->i;
673   aj     = aa->j;
674   bi     = bb->i;
675   bj     = bb->j;
676   av     = aa->a;
677   bv     = bb->a;
678 
679   garray = mat->garray;
680 
681   if (reuse == MAT_INITIAL_MATRIX) {
682     nz = (aa->nz + bb->nz) * bs2; /* just a conservative estimate */
683     PetscCall(PetscMalloc2(nz, &row, nz, &col));
684     PetscCall(PetscMalloc1(nz, &val));
685     /* can not decide the exact mumps->nnz now because of the SBAIJ */
686     mumps->irn = row;
687     mumps->jcn = col;
688     mumps->val = mumps->val_alloc = val;
689   } else {
690     val = mumps->val;
691   }
692 
693   jj   = 0;
694   irow = rstart;
695   for (i = 0; i < mbs; i++) {
696     ajj    = aj + ai[i]; /* ptr to the beginning of this row */
697     countA = ai[i + 1] - ai[i];
698     countB = bi[i + 1] - bi[i];
699     bjj    = bj + bi[i];
700     v1     = av + ai[i] * bs2;
701     v2     = bv + bi[i] * bs2;
702 
703     if (bs > 1) {
704       /* A-part */
705       for (j = 0; j < countA; j++) {
706         for (k = 0; k < bs; k++) {
707           for (m = 0; m < bs; m++) {
708             if (rstart + ajj[j] * bs > irow || k >= m) {
709               if (reuse == MAT_INITIAL_MATRIX) {
710                 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj]));
711                 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] * bs + k + shift, &col[jj]));
712               }
713               val[jj++] = v1[j * bs2 + m + k * bs];
714             }
715           }
716         }
717       }
718 
719       /* B-part */
720       for (j = 0; j < countB; j++) {
721         for (k = 0; k < bs; k++) {
722           for (m = 0; m < bs; m++) {
723             if (reuse == MAT_INITIAL_MATRIX) {
724               PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj]));
725               PetscCall(PetscMUMPSIntCast(garray[bjj[j]] * bs + k + shift, &col[jj]));
726             }
727             val[jj++] = v2[j * bs2 + m + k * bs];
728           }
729         }
730       }
731     } else {
732       /* A-part */
733       for (j = 0; j < countA; j++) {
734         if (reuse == MAT_INITIAL_MATRIX) {
735           PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
736           PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj]));
737         }
738         val[jj++] = v1[j];
739       }
740 
741       /* B-part */
742       for (j = 0; j < countB; j++) {
743         if (reuse == MAT_INITIAL_MATRIX) {
744           PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
745           PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]));
746         }
747         val[jj++] = v2[j];
748       }
749     }
750     irow += bs;
751   }
752   if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = jj;
753   PetscFunctionReturn(PETSC_SUCCESS);
754 }
755 
756 static PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
757 {
758   const PetscInt    *ai, *aj, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj;
759   PetscInt64         rstart, cstart, nz, i, j, jj, irow, countA, countB;
760   PetscMUMPSInt     *row, *col;
761   const PetscScalar *av, *bv, *v1, *v2;
762   PetscScalar       *val;
763   Mat                Ad, Ao;
764   Mat_SeqAIJ        *aa;
765   Mat_SeqAIJ        *bb;
766 
767   PetscFunctionBegin;
768   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray));
769   PetscCall(MatSeqAIJGetArrayRead(Ad, &av));
770   PetscCall(MatSeqAIJGetArrayRead(Ao, &bv));
771 
772   aa = (Mat_SeqAIJ *)(Ad)->data;
773   bb = (Mat_SeqAIJ *)(Ao)->data;
774   ai = aa->i;
775   aj = aa->j;
776   bi = bb->i;
777   bj = bb->j;
778 
779   rstart = A->rmap->rstart;
780   cstart = A->cmap->rstart;
781 
782   if (reuse == MAT_INITIAL_MATRIX) {
783     nz = (PetscInt64)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */
784     PetscCall(PetscMalloc2(nz, &row, nz, &col));
785     PetscCall(PetscMalloc1(nz, &val));
786     mumps->nnz = nz;
787     mumps->irn = row;
788     mumps->jcn = col;
789     mumps->val = mumps->val_alloc = val;
790   } else {
791     val = mumps->val;
792   }
793 
794   jj   = 0;
795   irow = rstart;
796   for (i = 0; i < m; i++) {
797     ajj    = aj + ai[i]; /* ptr to the beginning of this row */
798     countA = ai[i + 1] - ai[i];
799     countB = bi[i + 1] - bi[i];
800     bjj    = bj + bi[i];
801     v1     = av + ai[i];
802     v2     = bv + bi[i];
803 
804     /* A-part */
805     for (j = 0; j < countA; j++) {
806       if (reuse == MAT_INITIAL_MATRIX) {
807         PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
808         PetscCall(PetscMUMPSIntCast(cstart + ajj[j] + shift, &col[jj]));
809       }
810       val[jj++] = v1[j];
811     }
812 
813     /* B-part */
814     for (j = 0; j < countB; j++) {
815       if (reuse == MAT_INITIAL_MATRIX) {
816         PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
817         PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]));
818       }
819       val[jj++] = v2[j];
820     }
821     irow++;
822   }
823   PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av));
824   PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv));
825   PetscFunctionReturn(PETSC_SUCCESS);
826 }
827 
828 static PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
829 {
830   Mat_MPIBAIJ       *mat = (Mat_MPIBAIJ *)A->data;
831   Mat_SeqBAIJ       *aa  = (Mat_SeqBAIJ *)mat->A->data;
832   Mat_SeqBAIJ       *bb  = (Mat_SeqBAIJ *)mat->B->data;
833   const PetscInt    *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j, *ajj, *bjj;
834   const PetscInt    *garray = mat->garray, mbs = mat->mbs, rstart = A->rmap->rstart, cstart = A->cmap->rstart;
835   const PetscInt     bs2 = mat->bs2;
836   PetscInt           bs;
837   PetscInt64         nz, i, j, k, n, jj, irow, countA, countB, idx;
838   PetscMUMPSInt     *row, *col;
839   const PetscScalar *av = aa->a, *bv = bb->a, *v1, *v2;
840   PetscScalar       *val;
841 
842   PetscFunctionBegin;
843   PetscCall(MatGetBlockSize(A, &bs));
844   if (reuse == MAT_INITIAL_MATRIX) {
845     nz = bs2 * (aa->nz + bb->nz);
846     PetscCall(PetscMalloc2(nz, &row, nz, &col));
847     PetscCall(PetscMalloc1(nz, &val));
848     mumps->nnz = nz;
849     mumps->irn = row;
850     mumps->jcn = col;
851     mumps->val = mumps->val_alloc = val;
852   } else {
853     val = mumps->val;
854   }
855 
856   jj   = 0;
857   irow = rstart;
858   for (i = 0; i < mbs; i++) {
859     countA = ai[i + 1] - ai[i];
860     countB = bi[i + 1] - bi[i];
861     ajj    = aj + ai[i];
862     bjj    = bj + bi[i];
863     v1     = av + bs2 * ai[i];
864     v2     = bv + bs2 * bi[i];
865 
866     idx = 0;
867     /* A-part */
868     for (k = 0; k < countA; k++) {
869       for (j = 0; j < bs; j++) {
870         for (n = 0; n < bs; n++) {
871           if (reuse == MAT_INITIAL_MATRIX) {
872             PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj]));
873             PetscCall(PetscMUMPSIntCast(cstart + bs * ajj[k] + j + shift, &col[jj]));
874           }
875           val[jj++] = v1[idx++];
876         }
877       }
878     }
879 
880     idx = 0;
881     /* B-part */
882     for (k = 0; k < countB; k++) {
883       for (j = 0; j < bs; j++) {
884         for (n = 0; n < bs; n++) {
885           if (reuse == MAT_INITIAL_MATRIX) {
886             PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj]));
887             PetscCall(PetscMUMPSIntCast(bs * garray[bjj[k]] + j + shift, &col[jj]));
888           }
889           val[jj++] = v2[idx++];
890         }
891       }
892     }
893     irow += bs;
894   }
895   PetscFunctionReturn(PETSC_SUCCESS);
896 }
897 
898 static PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
899 {
900   const PetscInt    *ai, *aj, *adiag, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj;
901   PetscInt64         rstart, nz, nza, nzb, i, j, jj, irow, countA, countB;
902   PetscMUMPSInt     *row, *col;
903   const PetscScalar *av, *bv, *v1, *v2;
904   PetscScalar       *val;
905   Mat                Ad, Ao;
906   Mat_SeqAIJ        *aa;
907   Mat_SeqAIJ        *bb;
908 #if defined(PETSC_USE_COMPLEX)
909   PetscBool hermitian, isset;
910 #endif
911 
912   PetscFunctionBegin;
913 #if defined(PETSC_USE_COMPLEX)
914   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
915   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
916 #endif
917   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray));
918   PetscCall(MatSeqAIJGetArrayRead(Ad, &av));
919   PetscCall(MatSeqAIJGetArrayRead(Ao, &bv));
920 
921   aa    = (Mat_SeqAIJ *)(Ad)->data;
922   bb    = (Mat_SeqAIJ *)(Ao)->data;
923   ai    = aa->i;
924   aj    = aa->j;
925   adiag = aa->diag;
926   bi    = bb->i;
927   bj    = bb->j;
928 
929   rstart = A->rmap->rstart;
930 
931   if (reuse == MAT_INITIAL_MATRIX) {
932     nza = 0; /* num of upper triangular entries in mat->A, including diagonals */
933     nzb = 0; /* num of upper triangular entries in mat->B */
934     for (i = 0; i < m; i++) {
935       nza += (ai[i + 1] - adiag[i]);
936       countB = bi[i + 1] - bi[i];
937       bjj    = bj + bi[i];
938       for (j = 0; j < countB; j++) {
939         if (garray[bjj[j]] > rstart) nzb++;
940       }
941     }
942 
943     nz = nza + nzb; /* total nz of upper triangular part of mat */
944     PetscCall(PetscMalloc2(nz, &row, nz, &col));
945     PetscCall(PetscMalloc1(nz, &val));
946     mumps->nnz = nz;
947     mumps->irn = row;
948     mumps->jcn = col;
949     mumps->val = mumps->val_alloc = val;
950   } else {
951     val = mumps->val;
952   }
953 
954   jj   = 0;
955   irow = rstart;
956   for (i = 0; i < m; i++) {
957     ajj    = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */
958     v1     = av + adiag[i];
959     countA = ai[i + 1] - adiag[i];
960     countB = bi[i + 1] - bi[i];
961     bjj    = bj + bi[i];
962     v2     = bv + bi[i];
963 
964     /* A-part */
965     for (j = 0; j < countA; j++) {
966       if (reuse == MAT_INITIAL_MATRIX) {
967         PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
968         PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj]));
969       }
970       val[jj++] = v1[j];
971     }
972 
973     /* B-part */
974     for (j = 0; j < countB; j++) {
975       if (garray[bjj[j]] > rstart) {
976         if (reuse == MAT_INITIAL_MATRIX) {
977           PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
978           PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]));
979         }
980         val[jj++] = v2[j];
981       }
982     }
983     irow++;
984   }
985   PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av));
986   PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv));
987   PetscFunctionReturn(PETSC_SUCCESS);
988 }
989 
990 static PetscErrorCode MatConvertToTriples_diagonal_xaij(Mat A, PETSC_UNUSED PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
991 {
992   const PetscScalar *av;
993   const PetscInt     M = A->rmap->n;
994   PetscInt64         i;
995   PetscMUMPSInt     *row, *col;
996   Vec                v;
997 
998   PetscFunctionBegin;
999   PetscCall(MatDiagonalGetDiagonal(A, &v));
1000   PetscCall(VecGetArrayRead(v, &av));
1001   if (reuse == MAT_INITIAL_MATRIX) {
1002     PetscCall(PetscMalloc2(M, &row, M, &col));
1003     for (i = 0; i < M; i++) {
1004       PetscCall(PetscMUMPSIntCast(i + A->rmap->rstart, &row[i]));
1005       col[i] = row[i];
1006     }
1007     mumps->val = (PetscScalar *)av;
1008     mumps->irn = row;
1009     mumps->jcn = col;
1010     mumps->nnz = M;
1011   } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, av, M)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_diagonal_xaij(), so one needs to copy the memory */
1012   else mumps->val = (PetscScalar *)av;                                      /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */
1013   PetscCall(VecRestoreArrayRead(v, &av));
1014   PetscFunctionReturn(PETSC_SUCCESS);
1015 }
1016 
1017 static PetscErrorCode MatConvertToTriples_dense_xaij(Mat A, PETSC_UNUSED PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1018 {
1019   PetscScalar   *v;
1020   const PetscInt m = A->rmap->n, N = A->cmap->N;
1021   PetscInt       lda;
1022   PetscInt64     i, j;
1023   PetscMUMPSInt *row, *col;
1024 
1025   PetscFunctionBegin;
1026   PetscCall(MatDenseGetArray(A, &v));
1027   PetscCall(MatDenseGetLDA(A, &lda));
1028   if (reuse == MAT_INITIAL_MATRIX) {
1029     PetscCall(PetscMalloc2(m * N, &row, m * N, &col));
1030     for (i = 0; i < m; i++) {
1031       col[i] = 0;
1032       PetscCall(PetscMUMPSIntCast(i + A->rmap->rstart, &row[i]));
1033     }
1034     for (j = 1; j < N; j++) {
1035       for (i = 0; i < m; i++) PetscCall(PetscMUMPSIntCast(j, col + i + m * j));
1036       PetscCall(PetscArraycpy(row + m * j, row + m * (j - 1), m));
1037     }
1038     if (lda == m) mumps->val = v;
1039     else {
1040       PetscCall(PetscMalloc1(m * N, &mumps->val));
1041       mumps->val_alloc = mumps->val;
1042       for (j = 0; j < N; j++) PetscCall(PetscArraycpy(mumps->val + m * j, v + lda * j, m));
1043     }
1044     mumps->irn = row;
1045     mumps->jcn = col;
1046     mumps->nnz = m * N;
1047   } else {
1048     if (lda == m && !mumps->nest_vals) mumps->val = v;
1049     else {
1050       for (j = 0; j < N; j++) PetscCall(PetscArraycpy(mumps->val + m * j, v + lda * j, m));
1051     }
1052   }
1053   PetscCall(MatDenseRestoreArray(A, &v));
1054   PetscFunctionReturn(PETSC_SUCCESS);
1055 }
1056 
1057 static PetscErrorCode MatConvertToTriples_nest_xaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1058 {
1059   Mat     **mats;
1060   PetscInt  nr, nc;
1061   PetscBool chol = mumps->sym ? PETSC_TRUE : PETSC_FALSE;
1062 
1063   PetscFunctionBegin;
1064   PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats));
1065   if (reuse == MAT_INITIAL_MATRIX) {
1066     PetscMUMPSInt *irns, *jcns;
1067     PetscScalar   *vals;
1068     PetscInt64     totnnz, cumnnz, maxnnz;
1069     PetscInt      *pjcns_w;
1070     IS            *rows, *cols;
1071     PetscInt     **rows_idx, **cols_idx;
1072 
1073     cumnnz = 0;
1074     maxnnz = 0;
1075     PetscCall(PetscMalloc2(nr * nc + 1, &mumps->nest_vals_start, nr * nc, &mumps->nest_convert_to_triples));
1076     for (PetscInt r = 0; r < nr; r++) {
1077       for (PetscInt c = 0; c < nc; c++) {
1078         Mat sub = mats[r][c];
1079 
1080         mumps->nest_convert_to_triples[r * nc + c] = NULL;
1081         if (chol && c < r) continue; /* skip lower-triangular block for Cholesky */
1082         if (sub) {
1083           PetscErrorCode (*convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *) = NULL;
1084           PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isTrans, isHTrans = PETSC_FALSE, isDiag, isDense;
1085           MatInfo   info;
1086 
1087           PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans));
1088           if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub));
1089           else {
1090             PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans));
1091             if (isHTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub));
1092           }
1093           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ));
1094           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ));
1095           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ));
1096           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ));
1097           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ));
1098           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ));
1099           PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATDIAGONAL, &isDiag));
1100           PetscCall(PetscObjectTypeCompareAny((PetscObject)sub, &isDense, MATSEQDENSE, MATMPIDENSE, NULL));
1101 
1102           if (chol) {
1103             if (r == c) {
1104               if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqsbaij;
1105               else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpisbaij;
1106               else if (isSeqSBAIJ) convert_to_triples = MatConvertToTriples_seqsbaij_seqsbaij;
1107               else if (isMPISBAIJ) convert_to_triples = MatConvertToTriples_mpisbaij_mpisbaij;
1108               else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij;
1109               else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij;
1110             } else {
1111               if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij;
1112               else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij;
1113               else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij;
1114               else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij;
1115               else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij;
1116               else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij;
1117             }
1118           } else {
1119             if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij;
1120             else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij;
1121             else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij;
1122             else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij;
1123             else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij;
1124             else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij;
1125           }
1126           PetscCheck(convert_to_triples, PetscObjectComm((PetscObject)sub), PETSC_ERR_SUP, "Not for block of type %s", ((PetscObject)sub)->type_name);
1127           mumps->nest_convert_to_triples[r * nc + c] = convert_to_triples;
1128           PetscCall(MatGetInfo(sub, MAT_LOCAL, &info));
1129           cumnnz += (PetscInt64)info.nz_used; /* can be overestimated for Cholesky */
1130           maxnnz = PetscMax(maxnnz, info.nz_used);
1131         }
1132       }
1133     }
1134 
1135     /* Allocate total COO */
1136     totnnz = cumnnz;
1137     PetscCall(PetscMalloc2(totnnz, &irns, totnnz, &jcns));
1138     PetscCall(PetscMalloc1(totnnz, &vals));
1139 
1140     /* Handle rows and column maps
1141        We directly map rows and use an SF for the columns */
1142     PetscCall(PetscMalloc4(nr, &rows, nc, &cols, nr, &rows_idx, nc, &cols_idx));
1143     PetscCall(MatNestGetISs(A, rows, cols));
1144     for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetIndices(rows[r], (const PetscInt **)&rows_idx[r]));
1145     for (PetscInt c = 0; c < nc; c++) PetscCall(ISGetIndices(cols[c], (const PetscInt **)&cols_idx[c]));
1146     if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscMalloc1(maxnnz, &pjcns_w));
1147     else (void)maxnnz;
1148 
1149     cumnnz = 0;
1150     for (PetscInt r = 0; r < nr; r++) {
1151       for (PetscInt c = 0; c < nc; c++) {
1152         Mat             sub  = mats[r][c];
1153         const PetscInt *ridx = rows_idx[r];
1154         const PetscInt *cidx = cols_idx[c];
1155         PetscInt        rst;
1156         PetscSF         csf;
1157         PetscBool       isTrans, isHTrans = PETSC_FALSE, swap;
1158         PetscLayout     cmap;
1159 
1160         mumps->nest_vals_start[r * nc + c] = cumnnz;
1161         if (!mumps->nest_convert_to_triples[r * nc + c]) continue;
1162 
1163         /* Extract inner blocks if needed */
1164         PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans));
1165         if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub));
1166         else {
1167           PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans));
1168           if (isHTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub));
1169         }
1170         swap = (PetscBool)(isTrans || isHTrans);
1171 
1172         /* Get column layout to map off-process columns */
1173         PetscCall(MatGetLayouts(sub, NULL, &cmap));
1174 
1175         /* Get row start to map on-process rows */
1176         PetscCall(MatGetOwnershipRange(sub, &rst, NULL));
1177 
1178         /* Directly use the mumps datastructure and use C ordering for now */
1179         PetscCall((*mumps->nest_convert_to_triples[r * nc + c])(sub, 0, MAT_INITIAL_MATRIX, mumps));
1180 
1181         /* Swap the role of rows and columns indices for transposed blocks
1182            since we need values with global final ordering */
1183         if (swap) {
1184           cidx = rows_idx[r];
1185           ridx = cols_idx[c];
1186         }
1187 
1188         /* Communicate column indices
1189            This could have been done with a single SF but it would have complicated the code a lot.
1190            But since we do it only once, we pay the price of setting up an SF for each block */
1191         if (PetscDefined(USE_64BIT_INDICES)) {
1192           for (PetscInt k = 0; k < mumps->nnz; k++) pjcns_w[k] = mumps->jcn[k];
1193         } else pjcns_w = (PetscInt *)mumps->jcn; /* This cast is needed only to silence warnings for 64bit integers builds */
1194         PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &csf));
1195         PetscCall(PetscSFSetGraphLayout(csf, cmap, mumps->nnz, NULL, PETSC_OWN_POINTER, pjcns_w));
1196         PetscCall(PetscSFBcastBegin(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE));
1197         PetscCall(PetscSFBcastEnd(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE));
1198         PetscCall(PetscSFDestroy(&csf));
1199 
1200         /* Import indices: use direct map for rows and mapped indices for columns */
1201         if (swap) {
1202           for (PetscInt k = 0; k < mumps->nnz; k++) {
1203             PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &jcns[cumnnz + k]));
1204             PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &irns[cumnnz + k]));
1205           }
1206         } else {
1207           for (PetscInt k = 0; k < mumps->nnz; k++) {
1208             PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &irns[cumnnz + k]));
1209             PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &jcns[cumnnz + k]));
1210           }
1211         }
1212 
1213         /* Import values to full COO */
1214         PetscCall(PetscArraycpy(vals + cumnnz, mumps->val, mumps->nnz));
1215         if (isHTrans) { /* conjugate the entries */
1216           PetscScalar *v = vals + cumnnz;
1217           for (PetscInt k = 0; k < mumps->nnz; k++) v[k] = PetscConj(v[k]);
1218         }
1219 
1220         /* Shift new starting point and sanity check */
1221         cumnnz += mumps->nnz;
1222         PetscCheck(cumnnz <= totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected number of nonzeros %" PetscInt64_FMT " != %" PetscInt64_FMT, cumnnz, totnnz);
1223 
1224         /* Free scratch memory */
1225         PetscCall(PetscFree2(mumps->irn, mumps->jcn));
1226         PetscCall(PetscFree(mumps->val_alloc));
1227         mumps->val = NULL;
1228         mumps->nnz = 0;
1229       }
1230     }
1231     if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscFree(pjcns_w));
1232     for (PetscInt r = 0; r < nr; r++) PetscCall(ISRestoreIndices(rows[r], (const PetscInt **)&rows_idx[r]));
1233     for (PetscInt c = 0; c < nc; c++) PetscCall(ISRestoreIndices(cols[c], (const PetscInt **)&cols_idx[c]));
1234     PetscCall(PetscFree4(rows, cols, rows_idx, cols_idx));
1235     if (!chol) PetscCheck(cumnnz == totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different number of nonzeros %" PetscInt64_FMT " != %" PetscInt64_FMT, cumnnz, totnnz);
1236     mumps->nest_vals_start[nr * nc] = cumnnz;
1237 
1238     /* Set pointers for final MUMPS data structure */
1239     mumps->nest_vals = vals;
1240     mumps->val_alloc = NULL; /* do not use val_alloc since it may be reallocated with the OMP callpath */
1241     mumps->val       = vals;
1242     mumps->irn       = irns;
1243     mumps->jcn       = jcns;
1244     mumps->nnz       = cumnnz;
1245   } else {
1246     PetscScalar *oval = mumps->nest_vals;
1247     for (PetscInt r = 0; r < nr; r++) {
1248       for (PetscInt c = 0; c < nc; c++) {
1249         PetscBool isTrans, isHTrans = PETSC_FALSE;
1250         Mat       sub  = mats[r][c];
1251         PetscInt  midx = r * nc + c;
1252 
1253         if (!mumps->nest_convert_to_triples[midx]) continue;
1254         PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans));
1255         if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub));
1256         else {
1257           PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans));
1258           if (isHTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub));
1259         }
1260         mumps->val = oval + mumps->nest_vals_start[midx];
1261         PetscCall((*mumps->nest_convert_to_triples[midx])(sub, shift, MAT_REUSE_MATRIX, mumps));
1262         if (isHTrans) {
1263           PetscInt nnz = mumps->nest_vals_start[midx + 1] - mumps->nest_vals_start[midx];
1264           for (PetscInt k = 0; k < nnz; k++) mumps->val[k] = PetscConj(mumps->val[k]);
1265         }
1266       }
1267     }
1268     mumps->val = oval;
1269   }
1270   PetscFunctionReturn(PETSC_SUCCESS);
1271 }
1272 
1273 static PetscErrorCode MatDestroy_MUMPS(Mat A)
1274 {
1275   Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;
1276 
1277   PetscFunctionBegin;
1278   PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc));
1279   PetscCall(VecScatterDestroy(&mumps->scat_rhs));
1280   PetscCall(VecScatterDestroy(&mumps->scat_sol));
1281   PetscCall(VecDestroy(&mumps->b_seq));
1282   PetscCall(VecDestroy(&mumps->x_seq));
1283   PetscCall(PetscFree(mumps->id.perm_in));
1284   PetscCall(PetscFree2(mumps->irn, mumps->jcn));
1285   PetscCall(PetscFree(mumps->val_alloc));
1286   PetscCall(PetscFree(mumps->info));
1287   PetscCall(PetscFree(mumps->ICNTL_pre));
1288   PetscCall(PetscFree(mumps->CNTL_pre));
1289   PetscCall(MatMumpsResetSchur_Private(mumps));
1290   if (mumps->id.job != JOB_NULL) { /* cannot call PetscMUMPS_c() if JOB_INIT has never been called for this instance */
1291     mumps->id.job = JOB_END;
1292     PetscMUMPS_c(mumps);
1293     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in termination: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
1294     if (mumps->mumps_comm != MPI_COMM_NULL) {
1295       if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) PetscCallMPI(MPI_Comm_free(&mumps->mumps_comm));
1296       else PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A), &mumps->mumps_comm));
1297     }
1298   }
1299 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1300   if (mumps->use_petsc_omp_support) {
1301     PetscCall(PetscOmpCtrlDestroy(&mumps->omp_ctrl));
1302     PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf));
1303     PetscCall(PetscFree3(mumps->rhs_nrow, mumps->rhs_recvcounts, mumps->rhs_disps));
1304   }
1305 #endif
1306   PetscCall(PetscFree(mumps->ia_alloc));
1307   PetscCall(PetscFree(mumps->ja_alloc));
1308   PetscCall(PetscFree(mumps->recvcount));
1309   PetscCall(PetscFree(mumps->reqs));
1310   PetscCall(PetscFree(mumps->irhs_loc));
1311   PetscCall(PetscFree2(mumps->nest_vals_start, mumps->nest_convert_to_triples));
1312   PetscCall(PetscFree(mumps->nest_vals));
1313   PetscCall(PetscFree(A->data));
1314 
1315   /* clear composed functions */
1316   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
1317   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorSetSchurIS_C", NULL));
1318   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorCreateSchurComplement_C", NULL));
1319   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetIcntl_C", NULL));
1320   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetIcntl_C", NULL));
1321   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetCntl_C", NULL));
1322   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetCntl_C", NULL));
1323   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfo_C", NULL));
1324   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfog_C", NULL));
1325   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfo_C", NULL));
1326   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfog_C", NULL));
1327   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetNullPivots_C", NULL));
1328   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverse_C", NULL));
1329   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverseTranspose_C", NULL));
1330   PetscFunctionReturn(PETSC_SUCCESS);
1331 }
1332 
1333 /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */
1334 static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A, PetscInt nrhs, const PetscScalar *array)
1335 {
1336   Mat_MUMPS        *mumps   = (Mat_MUMPS *)A->data;
1337   const PetscMPIInt ompsize = mumps->omp_comm_size;
1338   PetscInt          i, m, M, rstart;
1339 
1340   PetscFunctionBegin;
1341   PetscCall(MatGetSize(A, &M, NULL));
1342   PetscCall(MatGetLocalSize(A, &m, NULL));
1343   PetscCheck(M <= PETSC_MUMPS_INT_MAX, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt");
1344   if (ompsize == 1) {
1345     if (!mumps->irhs_loc) {
1346       mumps->nloc_rhs = m;
1347       PetscCall(PetscMalloc1(m, &mumps->irhs_loc));
1348       PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
1349       for (i = 0; i < m; i++) mumps->irhs_loc[i] = rstart + i + 1; /* use 1-based indices */
1350     }
1351     mumps->id.rhs_loc = (MumpsScalar *)array;
1352   } else {
1353 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1354     const PetscInt *ranges;
1355     PetscMPIInt     j, k, sendcount, *petsc_ranks, *omp_ranks;
1356     MPI_Group       petsc_group, omp_group;
1357     PetscScalar    *recvbuf = NULL;
1358 
1359     if (mumps->is_omp_master) {
1360       /* Lazily initialize the omp stuff for distributed rhs */
1361       if (!mumps->irhs_loc) {
1362         PetscCall(PetscMalloc2(ompsize, &omp_ranks, ompsize, &petsc_ranks));
1363         PetscCall(PetscMalloc3(ompsize, &mumps->rhs_nrow, ompsize, &mumps->rhs_recvcounts, ompsize, &mumps->rhs_disps));
1364         PetscCallMPI(MPI_Comm_group(mumps->petsc_comm, &petsc_group));
1365         PetscCallMPI(MPI_Comm_group(mumps->omp_comm, &omp_group));
1366         for (j = 0; j < ompsize; j++) omp_ranks[j] = j;
1367         PetscCallMPI(MPI_Group_translate_ranks(omp_group, ompsize, omp_ranks, petsc_group, petsc_ranks));
1368 
1369         /* Populate mumps->irhs_loc[], rhs_nrow[] */
1370         mumps->nloc_rhs = 0;
1371         PetscCall(MatGetOwnershipRanges(A, &ranges));
1372         for (j = 0; j < ompsize; j++) {
1373           mumps->rhs_nrow[j] = ranges[petsc_ranks[j] + 1] - ranges[petsc_ranks[j]];
1374           mumps->nloc_rhs += mumps->rhs_nrow[j];
1375         }
1376         PetscCall(PetscMalloc1(mumps->nloc_rhs, &mumps->irhs_loc));
1377         for (j = k = 0; j < ompsize; j++) {
1378           for (i = ranges[petsc_ranks[j]]; i < ranges[petsc_ranks[j] + 1]; i++, k++) mumps->irhs_loc[k] = i + 1; /* uses 1-based indices */
1379         }
1380 
1381         PetscCall(PetscFree2(omp_ranks, petsc_ranks));
1382         PetscCallMPI(MPI_Group_free(&petsc_group));
1383         PetscCallMPI(MPI_Group_free(&omp_group));
1384       }
1385 
1386       /* Realloc buffers when current nrhs is bigger than what we have met */
1387       if (nrhs > mumps->max_nrhs) {
1388         PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf));
1389         PetscCall(PetscMalloc2(mumps->nloc_rhs * nrhs, &mumps->rhs_loc, mumps->nloc_rhs * nrhs, &mumps->rhs_recvbuf));
1390         mumps->max_nrhs = nrhs;
1391       }
1392 
1393       /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */
1394       for (j = 0; j < ompsize; j++) PetscCall(PetscMPIIntCast(mumps->rhs_nrow[j] * nrhs, &mumps->rhs_recvcounts[j]));
1395       mumps->rhs_disps[0] = 0;
1396       for (j = 1; j < ompsize; j++) {
1397         mumps->rhs_disps[j] = mumps->rhs_disps[j - 1] + mumps->rhs_recvcounts[j - 1];
1398         PetscCheck(mumps->rhs_disps[j] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscMPIInt overflow!");
1399       }
1400       recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */
1401     }
1402 
1403     PetscCall(PetscMPIIntCast(m * nrhs, &sendcount));
1404     PetscCallMPI(MPI_Gatherv(array, sendcount, MPIU_SCALAR, recvbuf, mumps->rhs_recvcounts, mumps->rhs_disps, MPIU_SCALAR, 0, mumps->omp_comm));
1405 
1406     if (mumps->is_omp_master) {
1407       if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */
1408         PetscScalar *dst, *dstbase = mumps->rhs_loc;
1409         for (j = 0; j < ompsize; j++) {
1410           const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j];
1411           dst                    = dstbase;
1412           for (i = 0; i < nrhs; i++) {
1413             PetscCall(PetscArraycpy(dst, src, mumps->rhs_nrow[j]));
1414             src += mumps->rhs_nrow[j];
1415             dst += mumps->nloc_rhs;
1416           }
1417           dstbase += mumps->rhs_nrow[j];
1418         }
1419       }
1420       mumps->id.rhs_loc = (MumpsScalar *)mumps->rhs_loc;
1421     }
1422 #endif /* PETSC_HAVE_OPENMP_SUPPORT */
1423   }
1424   mumps->id.nrhs     = nrhs;
1425   mumps->id.nloc_rhs = mumps->nloc_rhs;
1426   mumps->id.lrhs_loc = mumps->nloc_rhs;
1427   mumps->id.irhs_loc = mumps->irhs_loc;
1428   PetscFunctionReturn(PETSC_SUCCESS);
1429 }
1430 
1431 static PetscErrorCode MatSolve_MUMPS(Mat A, Vec b, Vec x)
1432 {
1433   Mat_MUMPS         *mumps  = (Mat_MUMPS *)A->data;
1434   const PetscScalar *rarray = NULL;
1435   PetscScalar       *array;
1436   IS                 is_iden, is_petsc;
1437   PetscInt           i;
1438   PetscBool          second_solve = PETSC_FALSE;
1439   static PetscBool   cite1 = PETSC_FALSE, cite2 = PETSC_FALSE;
1440 
1441   PetscFunctionBegin;
1442   PetscCall(PetscCitationsRegister("@article{MUMPS01,\n  author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n  title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n  journal = {SIAM "
1443                                    "Journal on Matrix Analysis and Applications},\n  volume = {23},\n  number = {1},\n  pages = {15--41},\n  year = {2001}\n}\n",
1444                                    &cite1));
1445   PetscCall(PetscCitationsRegister("@article{MUMPS02,\n  author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n  title = {Hybrid scheduling for the parallel solution of linear systems},\n  journal = {Parallel "
1446                                    "Computing},\n  volume = {32},\n  number = {2},\n  pages = {136--156},\n  year = {2006}\n}\n",
1447                                    &cite2));
1448 
1449   PetscCall(VecFlag(x, A->factorerrortype));
1450   if (A->factorerrortype) {
1451     PetscCall(PetscInfo(A, "MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1452     PetscFunctionReturn(PETSC_SUCCESS);
1453   }
1454 
1455   mumps->id.nrhs = 1;
1456   if (mumps->petsc_size > 1) {
1457     if (mumps->ICNTL20 == 10) {
1458       mumps->id.ICNTL(20) = 10; /* dense distributed RHS */
1459       PetscCall(VecGetArrayRead(b, &rarray));
1460       PetscCall(MatMumpsSetUpDistRHSInfo(A, 1, rarray));
1461     } else {
1462       mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential rhs vector*/
1463       PetscCall(VecScatterBegin(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD));
1464       PetscCall(VecScatterEnd(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD));
1465       if (!mumps->myid) {
1466         PetscCall(VecGetArray(mumps->b_seq, &array));
1467         mumps->id.rhs = (MumpsScalar *)array;
1468       }
1469     }
1470   } else {                   /* petsc_size == 1 */
1471     mumps->id.ICNTL(20) = 0; /* dense centralized RHS */
1472     PetscCall(VecCopy(b, x));
1473     PetscCall(VecGetArray(x, &array));
1474     mumps->id.rhs = (MumpsScalar *)array;
1475   }
1476 
1477   /*
1478      handle condensation step of Schur complement (if any)
1479      We set by default ICNTL(26) == -1 when Schur indices have been provided by the user.
1480      According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase
1481      Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system.
1482      This requires an extra call to PetscMUMPS_c and the computation of the factors for S
1483   */
1484   if (mumps->id.size_schur > 0) {
1485     PetscCheck(mumps->petsc_size <= 1, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc");
1486     if (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2) {
1487       second_solve = PETSC_TRUE;
1488       PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE));
1489       mumps->id.ICNTL(26) = 1; /* condensation phase */
1490     } else if (mumps->id.ICNTL(26) == 1) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE));
1491   }
1492   /* solve phase */
1493   mumps->id.job = JOB_SOLVE;
1494   PetscMUMPS_c(mumps);
1495   PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
1496 
1497   /* handle expansion step of Schur complement (if any) */
1498   if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE));
1499   else if (mumps->id.ICNTL(26) == 1) {
1500     PetscCall(MatMumpsSolveSchur_Private(A));
1501     for (i = 0; i < mumps->id.size_schur; ++i) {
1502 #if !defined(PETSC_USE_COMPLEX)
1503       PetscScalar val = mumps->id.redrhs[i];
1504 #else
1505       PetscScalar val = mumps->id.redrhs[i].r + PETSC_i * mumps->id.redrhs[i].i;
1506 #endif
1507       array[mumps->id.listvar_schur[i] - 1] = val;
1508     }
1509   }
1510 
1511   if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */
1512     if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) {
1513       /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */
1514       PetscCall(VecScatterDestroy(&mumps->scat_sol));
1515     }
1516     if (!mumps->scat_sol) { /* create scatter scat_sol */
1517       PetscInt *isol2_loc = NULL;
1518       PetscCall(ISCreateStride(PETSC_COMM_SELF, mumps->id.lsol_loc, 0, 1, &is_iden)); /* from */
1519       PetscCall(PetscMalloc1(mumps->id.lsol_loc, &isol2_loc));
1520       for (i = 0; i < mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i] - 1;                        /* change Fortran style to C style */
1521       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, mumps->id.lsol_loc, isol2_loc, PETSC_OWN_POINTER, &is_petsc)); /* to */
1522       PetscCall(VecScatterCreate(mumps->x_seq, is_iden, x, is_petsc, &mumps->scat_sol));
1523       PetscCall(ISDestroy(&is_iden));
1524       PetscCall(ISDestroy(&is_petsc));
1525       mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */
1526     }
1527 
1528     PetscCall(VecScatterBegin(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD));
1529     PetscCall(VecScatterEnd(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD));
1530   }
1531 
1532   if (mumps->petsc_size > 1) {
1533     if (mumps->ICNTL20 == 10) {
1534       PetscCall(VecRestoreArrayRead(b, &rarray));
1535     } else if (!mumps->myid) {
1536       PetscCall(VecRestoreArray(mumps->b_seq, &array));
1537     }
1538   } else PetscCall(VecRestoreArray(x, &array));
1539 
1540   PetscCall(PetscLogFlops(2.0 * PetscMax(0, (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n)));
1541   PetscFunctionReturn(PETSC_SUCCESS);
1542 }
1543 
1544 static PetscErrorCode MatSolveTranspose_MUMPS(Mat A, Vec b, Vec x)
1545 {
1546   Mat_MUMPS          *mumps = (Mat_MUMPS *)A->data;
1547   const PetscMUMPSInt value = mumps->id.ICNTL(9);
1548 
1549   PetscFunctionBegin;
1550   mumps->id.ICNTL(9) = 0;
1551   PetscCall(MatSolve_MUMPS(A, b, x));
1552   mumps->id.ICNTL(9) = value;
1553   PetscFunctionReturn(PETSC_SUCCESS);
1554 }
1555 
1556 static PetscErrorCode MatMatSolve_MUMPS(Mat A, Mat B, Mat X)
1557 {
1558   Mat                Bt = NULL;
1559   PetscBool          denseX, denseB, flg, flgT;
1560   Mat_MUMPS         *mumps = (Mat_MUMPS *)A->data;
1561   PetscInt           i, nrhs, M;
1562   PetscScalar       *array;
1563   const PetscScalar *rbray;
1564   PetscInt           lsol_loc, nlsol_loc, *idxx, iidx = 0;
1565   PetscMUMPSInt     *isol_loc, *isol_loc_save;
1566   PetscScalar       *bray, *sol_loc, *sol_loc_save;
1567   IS                 is_to, is_from;
1568   PetscInt           k, proc, j, m, myrstart;
1569   const PetscInt    *rstart;
1570   Vec                v_mpi, msol_loc;
1571   VecScatter         scat_sol;
1572   Vec                b_seq;
1573   VecScatter         scat_rhs;
1574   PetscScalar       *aa;
1575   PetscInt           spnr, *ia, *ja;
1576   Mat_MPIAIJ        *b = NULL;
1577 
1578   PetscFunctionBegin;
1579   PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &denseX, MATSEQDENSE, MATMPIDENSE, NULL));
1580   PetscCheck(denseX, PetscObjectComm((PetscObject)X), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix");
1581 
1582   PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &denseB, MATSEQDENSE, MATMPIDENSE, NULL));
1583   if (denseB) {
1584     PetscCheck(B->rmap->n == X->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Matrix B and X must have same row distribution");
1585     mumps->id.ICNTL(20) = 0; /* dense RHS */
1586   } else {                   /* sparse B */
1587     PetscCheck(X != B, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_IDN, "X and B must be different matrices");
1588     PetscCall(PetscObjectTypeCompare((PetscObject)B, MATTRANSPOSEVIRTUAL, &flgT));
1589     if (flgT) { /* input B is transpose of actual RHS matrix,
1590                  because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */
1591       PetscCall(MatTransposeGetMat(B, &Bt));
1592     } else SETERRQ(PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_WRONG, "Matrix B must be MATTRANSPOSEVIRTUAL matrix");
1593     mumps->id.ICNTL(20) = 1; /* sparse RHS */
1594   }
1595 
1596   PetscCall(MatGetSize(B, &M, &nrhs));
1597   mumps->id.nrhs = nrhs;
1598   mumps->id.lrhs = M;
1599   mumps->id.rhs  = NULL;
1600 
1601   if (mumps->petsc_size == 1) {
1602     PetscScalar *aa;
1603     PetscInt     spnr, *ia, *ja;
1604     PetscBool    second_solve = PETSC_FALSE;
1605 
1606     PetscCall(MatDenseGetArray(X, &array));
1607     mumps->id.rhs = (MumpsScalar *)array;
1608 
1609     if (denseB) {
1610       /* copy B to X */
1611       PetscCall(MatDenseGetArrayRead(B, &rbray));
1612       PetscCall(PetscArraycpy(array, rbray, M * nrhs));
1613       PetscCall(MatDenseRestoreArrayRead(B, &rbray));
1614     } else { /* sparse B */
1615       PetscCall(MatSeqAIJGetArray(Bt, &aa));
1616       PetscCall(MatGetRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
1617       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
1618       PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs));
1619       mumps->id.rhs_sparse = (MumpsScalar *)aa;
1620     }
1621     /* handle condensation step of Schur complement (if any) */
1622     if (mumps->id.size_schur > 0) {
1623       if (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2) {
1624         second_solve = PETSC_TRUE;
1625         PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE));
1626         mumps->id.ICNTL(26) = 1; /* condensation phase */
1627       } else if (mumps->id.ICNTL(26) == 1) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE));
1628     }
1629     /* solve phase */
1630     mumps->id.job = JOB_SOLVE;
1631     PetscMUMPS_c(mumps);
1632     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
1633 
1634     /* handle expansion step of Schur complement (if any) */
1635     if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE));
1636     else if (mumps->id.ICNTL(26) == 1) {
1637       PetscCall(MatMumpsSolveSchur_Private(A));
1638       for (j = 0; j < nrhs; ++j)
1639         for (i = 0; i < mumps->id.size_schur; ++i) {
1640 #if !defined(PETSC_USE_COMPLEX)
1641           PetscScalar val = mumps->id.redrhs[i + j * mumps->id.lredrhs];
1642 #else
1643           PetscScalar val = mumps->id.redrhs[i + j * mumps->id.lredrhs].r + PETSC_i * mumps->id.redrhs[i + j * mumps->id.lredrhs].i;
1644 #endif
1645           array[mumps->id.listvar_schur[i] - 1 + j * M] = val;
1646         }
1647     }
1648     if (!denseB) { /* sparse B */
1649       PetscCall(MatSeqAIJRestoreArray(Bt, &aa));
1650       PetscCall(MatRestoreRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
1651       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure");
1652     }
1653     PetscCall(MatDenseRestoreArray(X, &array));
1654     PetscFunctionReturn(PETSC_SUCCESS);
1655   }
1656 
1657   /* parallel case: MUMPS requires rhs B to be centralized on the host! */
1658   PetscCheck(!mumps->id.ICNTL(19), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc");
1659 
1660   /* create msol_loc to hold mumps local solution */
1661   isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */
1662   sol_loc_save  = (PetscScalar *)mumps->id.sol_loc;
1663 
1664   lsol_loc  = mumps->id.lsol_loc;
1665   nlsol_loc = nrhs * lsol_loc; /* length of sol_loc */
1666   PetscCall(PetscMalloc2(nlsol_loc, &sol_loc, lsol_loc, &isol_loc));
1667   mumps->id.sol_loc  = (MumpsScalar *)sol_loc;
1668   mumps->id.isol_loc = isol_loc;
1669 
1670   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nlsol_loc, (PetscScalar *)sol_loc, &msol_loc));
1671 
1672   if (denseB) {
1673     if (mumps->ICNTL20 == 10) {
1674       mumps->id.ICNTL(20) = 10; /* dense distributed RHS */
1675       PetscCall(MatDenseGetArrayRead(B, &rbray));
1676       PetscCall(MatMumpsSetUpDistRHSInfo(A, nrhs, rbray));
1677       PetscCall(MatDenseRestoreArrayRead(B, &rbray));
1678       PetscCall(MatGetLocalSize(B, &m, NULL));
1679       PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhs * M, NULL, &v_mpi));
1680     } else {
1681       mumps->id.ICNTL(20) = 0; /* dense centralized RHS */
1682       /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in
1683         very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank
1684         0, re-arrange B into desired order, which is a local operation.
1685       */
1686 
1687       /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */
1688       /* wrap dense rhs matrix B into a vector v_mpi */
1689       PetscCall(MatGetLocalSize(B, &m, NULL));
1690       PetscCall(MatDenseGetArray(B, &bray));
1691       PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhs * M, (const PetscScalar *)bray, &v_mpi));
1692       PetscCall(MatDenseRestoreArray(B, &bray));
1693 
1694       /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */
1695       if (!mumps->myid) {
1696         PetscInt *idx;
1697         /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */
1698         PetscCall(PetscMalloc1(nrhs * M, &idx));
1699         PetscCall(MatGetOwnershipRanges(B, &rstart));
1700         k = 0;
1701         for (proc = 0; proc < mumps->petsc_size; proc++) {
1702           for (j = 0; j < nrhs; j++) {
1703             for (i = rstart[proc]; i < rstart[proc + 1]; i++) idx[k++] = j * M + i;
1704           }
1705         }
1706 
1707         PetscCall(VecCreateSeq(PETSC_COMM_SELF, nrhs * M, &b_seq));
1708         PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nrhs * M, idx, PETSC_OWN_POINTER, &is_to));
1709         PetscCall(ISCreateStride(PETSC_COMM_SELF, nrhs * M, 0, 1, &is_from));
1710       } else {
1711         PetscCall(VecCreateSeq(PETSC_COMM_SELF, 0, &b_seq));
1712         PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_to));
1713         PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_from));
1714       }
1715       PetscCall(VecScatterCreate(v_mpi, is_from, b_seq, is_to, &scat_rhs));
1716       PetscCall(VecScatterBegin(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD));
1717       PetscCall(ISDestroy(&is_to));
1718       PetscCall(ISDestroy(&is_from));
1719       PetscCall(VecScatterEnd(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD));
1720 
1721       if (!mumps->myid) { /* define rhs on the host */
1722         PetscCall(VecGetArray(b_seq, &bray));
1723         mumps->id.rhs = (MumpsScalar *)bray;
1724         PetscCall(VecRestoreArray(b_seq, &bray));
1725       }
1726     }
1727   } else { /* sparse B */
1728     b = (Mat_MPIAIJ *)Bt->data;
1729 
1730     /* wrap dense X into a vector v_mpi */
1731     PetscCall(MatGetLocalSize(X, &m, NULL));
1732     PetscCall(MatDenseGetArray(X, &bray));
1733     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)X), 1, nrhs * m, nrhs * M, (const PetscScalar *)bray, &v_mpi));
1734     PetscCall(MatDenseRestoreArray(X, &bray));
1735 
1736     if (!mumps->myid) {
1737       PetscCall(MatSeqAIJGetArray(b->A, &aa));
1738       PetscCall(MatGetRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
1739       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
1740       PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs));
1741       mumps->id.rhs_sparse = (MumpsScalar *)aa;
1742     } else {
1743       mumps->id.irhs_ptr    = NULL;
1744       mumps->id.irhs_sparse = NULL;
1745       mumps->id.nz_rhs      = 0;
1746       mumps->id.rhs_sparse  = NULL;
1747     }
1748   }
1749 
1750   /* solve phase */
1751   mumps->id.job = JOB_SOLVE;
1752   PetscMUMPS_c(mumps);
1753   PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
1754 
1755   /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */
1756   PetscCall(MatDenseGetArray(X, &array));
1757   PetscCall(VecPlaceArray(v_mpi, array));
1758 
1759   /* create scatter scat_sol */
1760   PetscCall(MatGetOwnershipRanges(X, &rstart));
1761   /* iidx: index for scatter mumps solution to petsc X */
1762 
1763   PetscCall(ISCreateStride(PETSC_COMM_SELF, nlsol_loc, 0, 1, &is_from));
1764   PetscCall(PetscMalloc1(nlsol_loc, &idxx));
1765   for (i = 0; i < lsol_loc; i++) {
1766     isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */
1767 
1768     for (proc = 0; proc < mumps->petsc_size; proc++) {
1769       if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc + 1]) {
1770         myrstart = rstart[proc];
1771         k        = isol_loc[i] - myrstart;          /* local index on 1st column of petsc vector X */
1772         iidx     = k + myrstart * nrhs;             /* maps mumps isol_loc[i] to petsc index in X */
1773         m        = rstart[proc + 1] - rstart[proc]; /* rows of X for this proc */
1774         break;
1775       }
1776     }
1777 
1778     for (j = 0; j < nrhs; j++) idxx[i + j * lsol_loc] = iidx + j * m;
1779   }
1780   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nlsol_loc, idxx, PETSC_COPY_VALUES, &is_to));
1781   PetscCall(VecScatterCreate(msol_loc, is_from, v_mpi, is_to, &scat_sol));
1782   PetscCall(VecScatterBegin(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD));
1783   PetscCall(ISDestroy(&is_from));
1784   PetscCall(ISDestroy(&is_to));
1785   PetscCall(VecScatterEnd(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD));
1786   PetscCall(MatDenseRestoreArray(X, &array));
1787 
1788   /* free spaces */
1789   mumps->id.sol_loc  = (MumpsScalar *)sol_loc_save;
1790   mumps->id.isol_loc = isol_loc_save;
1791 
1792   PetscCall(PetscFree2(sol_loc, isol_loc));
1793   PetscCall(PetscFree(idxx));
1794   PetscCall(VecDestroy(&msol_loc));
1795   PetscCall(VecDestroy(&v_mpi));
1796   if (!denseB) {
1797     if (!mumps->myid) {
1798       b = (Mat_MPIAIJ *)Bt->data;
1799       PetscCall(MatSeqAIJRestoreArray(b->A, &aa));
1800       PetscCall(MatRestoreRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
1801       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure");
1802     }
1803   } else {
1804     if (mumps->ICNTL20 == 0) {
1805       PetscCall(VecDestroy(&b_seq));
1806       PetscCall(VecScatterDestroy(&scat_rhs));
1807     }
1808   }
1809   PetscCall(VecScatterDestroy(&scat_sol));
1810   PetscCall(PetscLogFlops(nrhs * PetscMax(0, (2.0 * (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n))));
1811   PetscFunctionReturn(PETSC_SUCCESS);
1812 }
1813 
1814 static PetscErrorCode MatMatSolveTranspose_MUMPS(Mat A, Mat B, Mat X)
1815 {
1816   Mat_MUMPS          *mumps = (Mat_MUMPS *)A->data;
1817   const PetscMUMPSInt value = mumps->id.ICNTL(9);
1818 
1819   PetscFunctionBegin;
1820   mumps->id.ICNTL(9) = 0;
1821   PetscCall(MatMatSolve_MUMPS(A, B, X));
1822   mumps->id.ICNTL(9) = value;
1823   PetscFunctionReturn(PETSC_SUCCESS);
1824 }
1825 
1826 static PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A, Mat Bt, Mat X)
1827 {
1828   PetscBool flg;
1829   Mat       B;
1830 
1831   PetscFunctionBegin;
1832   PetscCall(PetscObjectTypeCompareAny((PetscObject)Bt, &flg, MATSEQAIJ, MATMPIAIJ, NULL));
1833   PetscCheck(flg, PetscObjectComm((PetscObject)Bt), PETSC_ERR_ARG_WRONG, "Matrix Bt must be MATAIJ matrix");
1834 
1835   /* Create B=Bt^T that uses Bt's data structure */
1836   PetscCall(MatCreateTranspose(Bt, &B));
1837 
1838   PetscCall(MatMatSolve_MUMPS(A, B, X));
1839   PetscCall(MatDestroy(&B));
1840   PetscFunctionReturn(PETSC_SUCCESS);
1841 }
1842 
1843 #if !defined(PETSC_USE_COMPLEX)
1844 /*
1845   input:
1846    F:        numeric factor
1847   output:
1848    nneg:     total number of negative pivots
1849    nzero:    total number of zero pivots
1850    npos:     (global dimension of F) - nneg - nzero
1851 */
1852 static PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos)
1853 {
1854   Mat_MUMPS  *mumps = (Mat_MUMPS *)F->data;
1855   PetscMPIInt size;
1856 
1857   PetscFunctionBegin;
1858   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)F), &size));
1859   /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */
1860   PetscCheck(size <= 1 || mumps->id.ICNTL(13) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia", mumps->id.INFOG(13));
1861 
1862   if (nneg) *nneg = mumps->id.INFOG(12);
1863   if (nzero || npos) {
1864     PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection");
1865     if (nzero) *nzero = mumps->id.INFOG(28);
1866     if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28));
1867   }
1868   PetscFunctionReturn(PETSC_SUCCESS);
1869 }
1870 #endif
1871 
1872 static PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse, Mat_MUMPS *mumps)
1873 {
1874   PetscInt       i, nreqs;
1875   PetscMUMPSInt *irn, *jcn;
1876   PetscMPIInt    count;
1877   PetscInt64     totnnz, remain;
1878   const PetscInt osize = mumps->omp_comm_size;
1879   PetscScalar   *val;
1880 
1881   PetscFunctionBegin;
1882   if (osize > 1) {
1883     if (reuse == MAT_INITIAL_MATRIX) {
1884       /* master first gathers counts of nonzeros to receive */
1885       if (mumps->is_omp_master) PetscCall(PetscMalloc1(osize, &mumps->recvcount));
1886       PetscCallMPI(MPI_Gather(&mumps->nnz, 1, MPIU_INT64, mumps->recvcount, 1, MPIU_INT64, 0 /*master*/, mumps->omp_comm));
1887 
1888       /* Then each computes number of send/recvs */
1889       if (mumps->is_omp_master) {
1890         /* Start from 1 since self communication is not done in MPI */
1891         nreqs = 0;
1892         for (i = 1; i < osize; i++) nreqs += (mumps->recvcount[i] + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX;
1893       } else {
1894         nreqs = (mumps->nnz + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX;
1895       }
1896       PetscCall(PetscMalloc1(nreqs * 3, &mumps->reqs)); /* Triple the requests since we send irn, jcn and val separately */
1897 
1898       /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others.
1899          MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz
1900          might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size
1901          is very small, the current approach should have no extra overhead compared to MPI_Gatherv.
1902        */
1903       nreqs = 0; /* counter for actual send/recvs */
1904       if (mumps->is_omp_master) {
1905         for (i = 0, totnnz = 0; i < osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */
1906         PetscCall(PetscMalloc2(totnnz, &irn, totnnz, &jcn));
1907         PetscCall(PetscMalloc1(totnnz, &val));
1908 
1909         /* Self communication */
1910         PetscCall(PetscArraycpy(irn, mumps->irn, mumps->nnz));
1911         PetscCall(PetscArraycpy(jcn, mumps->jcn, mumps->nnz));
1912         PetscCall(PetscArraycpy(val, mumps->val, mumps->nnz));
1913 
1914         /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */
1915         PetscCall(PetscFree2(mumps->irn, mumps->jcn));
1916         PetscCall(PetscFree(mumps->val_alloc));
1917         mumps->nnz = totnnz;
1918         mumps->irn = irn;
1919         mumps->jcn = jcn;
1920         mumps->val = mumps->val_alloc = val;
1921 
1922         irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */
1923         jcn += mumps->recvcount[0];
1924         val += mumps->recvcount[0];
1925 
1926         /* Remote communication */
1927         for (i = 1; i < osize; i++) {
1928           count  = PetscMin(mumps->recvcount[i], (PetscMPIInt)PETSC_MPI_INT_MAX);
1929           remain = mumps->recvcount[i] - count;
1930           while (count > 0) {
1931             PetscCallMPI(MPI_Irecv(irn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1932             PetscCallMPI(MPI_Irecv(jcn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1933             PetscCallMPI(MPI_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1934             irn += count;
1935             jcn += count;
1936             val += count;
1937             count = PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX);
1938             remain -= count;
1939           }
1940         }
1941       } else {
1942         irn    = mumps->irn;
1943         jcn    = mumps->jcn;
1944         val    = mumps->val;
1945         count  = PetscMin(mumps->nnz, (PetscMPIInt)PETSC_MPI_INT_MAX);
1946         remain = mumps->nnz - count;
1947         while (count > 0) {
1948           PetscCallMPI(MPI_Isend(irn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1949           PetscCallMPI(MPI_Isend(jcn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1950           PetscCallMPI(MPI_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1951           irn += count;
1952           jcn += count;
1953           val += count;
1954           count = PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX);
1955           remain -= count;
1956         }
1957       }
1958     } else {
1959       nreqs = 0;
1960       if (mumps->is_omp_master) {
1961         val = mumps->val + mumps->recvcount[0];
1962         for (i = 1; i < osize; i++) { /* Remote communication only since self data is already in place */
1963           count  = PetscMin(mumps->recvcount[i], (PetscMPIInt)PETSC_MPI_INT_MAX);
1964           remain = mumps->recvcount[i] - count;
1965           while (count > 0) {
1966             PetscCallMPI(MPI_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1967             val += count;
1968             count = PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX);
1969             remain -= count;
1970           }
1971         }
1972       } else {
1973         val    = mumps->val;
1974         count  = PetscMin(mumps->nnz, (PetscMPIInt)PETSC_MPI_INT_MAX);
1975         remain = mumps->nnz - count;
1976         while (count > 0) {
1977           PetscCallMPI(MPI_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1978           val += count;
1979           count = PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX);
1980           remain -= count;
1981         }
1982       }
1983     }
1984     PetscCallMPI(MPI_Waitall(nreqs, mumps->reqs, MPI_STATUSES_IGNORE));
1985     mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */
1986   }
1987   PetscFunctionReturn(PETSC_SUCCESS);
1988 }
1989 
1990 static PetscErrorCode MatFactorNumeric_MUMPS(Mat F, Mat A, PETSC_UNUSED const MatFactorInfo *info)
1991 {
1992   Mat_MUMPS *mumps = (Mat_MUMPS *)(F)->data;
1993   PetscBool  isMPIAIJ;
1994 
1995   PetscFunctionBegin;
1996   if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) {
1997     if (mumps->id.INFOG(1) == -6) PetscCall(PetscInfo(A, "MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1998     PetscCall(PetscInfo(A, "MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1999     PetscFunctionReturn(PETSC_SUCCESS);
2000   }
2001 
2002   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps));
2003   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX, mumps));
2004 
2005   /* numerical factorization phase */
2006   mumps->id.job = JOB_FACTNUMERIC;
2007   if (!mumps->id.ICNTL(18)) { /* A is centralized */
2008     if (!mumps->myid) mumps->id.a = (MumpsScalar *)mumps->val;
2009   } else {
2010     mumps->id.a_loc = (MumpsScalar *)mumps->val;
2011   }
2012   PetscMUMPS_c(mumps);
2013   if (mumps->id.INFOG(1) < 0) {
2014     PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2));
2015     if (mumps->id.INFOG(1) == -10) {
2016       PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2017       F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
2018     } else if (mumps->id.INFOG(1) == -13) {
2019       PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, cannot allocate required memory %d megabytes\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2020       F->factorerrortype = MAT_FACTOR_OUTMEMORY;
2021     } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) {
2022       PetscCall(PetscInfo(F, "MUMPS error in numerical factorizatione: INFOG(1)=%d, INFO(2)=%d, problem with work array\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2023       F->factorerrortype = MAT_FACTOR_OUTMEMORY;
2024     } else {
2025       PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2026       F->factorerrortype = MAT_FACTOR_OTHER;
2027     }
2028   }
2029   PetscCheck(mumps->myid || mumps->id.ICNTL(16) <= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in numerical factorization: ICNTL(16)=%d " MUMPS_MANUALS, mumps->id.INFOG(16));
2030 
2031   F->assembled = PETSC_TRUE;
2032 
2033   if (F->schur) { /* reset Schur status to unfactored */
2034 #if defined(PETSC_HAVE_CUDA)
2035     F->schur->offloadmask = PETSC_OFFLOAD_CPU;
2036 #endif
2037     if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
2038       mumps->id.ICNTL(19) = 2;
2039       PetscCall(MatTranspose(F->schur, MAT_INPLACE_MATRIX, &F->schur));
2040     }
2041     PetscCall(MatFactorRestoreSchurComplement(F, NULL, MAT_FACTOR_SCHUR_UNFACTORED));
2042   }
2043 
2044   /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */
2045   if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3;
2046 
2047   if (!mumps->is_omp_master) mumps->id.INFO(23) = 0;
2048   if (mumps->petsc_size > 1) {
2049     PetscInt     lsol_loc;
2050     PetscScalar *sol_loc;
2051 
2052     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &isMPIAIJ));
2053 
2054     /* distributed solution; Create x_seq=sol_loc for repeated use */
2055     if (mumps->x_seq) {
2056       PetscCall(VecScatterDestroy(&mumps->scat_sol));
2057       PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc));
2058       PetscCall(VecDestroy(&mumps->x_seq));
2059     }
2060     lsol_loc = mumps->id.INFO(23); /* length of sol_loc */
2061     PetscCall(PetscMalloc2(lsol_loc, &sol_loc, lsol_loc, &mumps->id.isol_loc));
2062     mumps->id.lsol_loc = lsol_loc;
2063     mumps->id.sol_loc  = (MumpsScalar *)sol_loc;
2064     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, lsol_loc, sol_loc, &mumps->x_seq));
2065   }
2066   PetscCall(PetscLogFlops((double)mumps->id.RINFO(2)));
2067   PetscFunctionReturn(PETSC_SUCCESS);
2068 }
2069 
2070 /* Sets MUMPS options from the options database */
2071 static PetscErrorCode MatSetFromOptions_MUMPS(Mat F, Mat A)
2072 {
2073   Mat_MUMPS    *mumps = (Mat_MUMPS *)F->data;
2074   PetscMUMPSInt icntl = 0, size, *listvar_schur;
2075   PetscInt      info[80], i, ninfo = 80, rbs, cbs;
2076   PetscBool     flg = PETSC_FALSE, schur = (PetscBool)(mumps->id.ICNTL(26) == -1);
2077   MumpsScalar  *arr;
2078 
2079   PetscFunctionBegin;
2080   PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "MUMPS Options", "Mat");
2081   if (mumps->id.job == JOB_NULL) { /* MatSetFromOptions_MUMPS() has never been called before */
2082     PetscInt nthreads   = 0;
2083     PetscInt nCNTL_pre  = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0;
2084     PetscInt nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0;
2085 
2086     mumps->petsc_comm = PetscObjectComm((PetscObject)A);
2087     PetscCallMPI(MPI_Comm_size(mumps->petsc_comm, &mumps->petsc_size));
2088     PetscCallMPI(MPI_Comm_rank(mumps->petsc_comm, &mumps->myid)); /* "if (!myid)" still works even if mumps_comm is different */
2089 
2090     PetscCall(PetscOptionsName("-mat_mumps_use_omp_threads", "Convert MPI processes into OpenMP threads", "None", &mumps->use_petsc_omp_support));
2091     if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */
2092     /* do not use PetscOptionsInt() so that the option -mat_mumps_use_omp_threads is not displayed twice in the help */
2093     PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)F)->prefix, "-mat_mumps_use_omp_threads", &nthreads, NULL));
2094     if (mumps->use_petsc_omp_support) {
2095       PetscCheck(PetscDefined(HAVE_OPENMP_SUPPORT), PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "The system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual",
2096                  ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : "");
2097       PetscCheck(!schur, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use -%smat_mumps_use_omp_threads with the Schur complement feature", ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : "");
2098 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
2099       PetscCall(PetscOmpCtrlCreate(mumps->petsc_comm, nthreads, &mumps->omp_ctrl));
2100       PetscCall(PetscOmpCtrlGetOmpComms(mumps->omp_ctrl, &mumps->omp_comm, &mumps->mumps_comm, &mumps->is_omp_master));
2101 #endif
2102     } else {
2103       mumps->omp_comm      = PETSC_COMM_SELF;
2104       mumps->mumps_comm    = mumps->petsc_comm;
2105       mumps->is_omp_master = PETSC_TRUE;
2106     }
2107     PetscCallMPI(MPI_Comm_size(mumps->omp_comm, &mumps->omp_comm_size));
2108     mumps->reqs = NULL;
2109     mumps->tag  = 0;
2110 
2111     if (mumps->mumps_comm != MPI_COMM_NULL) {
2112       if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) {
2113         /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */
2114         MPI_Comm comm;
2115         PetscCallMPI(MPI_Comm_dup(mumps->mumps_comm, &comm));
2116         mumps->mumps_comm = comm;
2117       } else PetscCall(PetscCommGetComm(mumps->petsc_comm, &mumps->mumps_comm));
2118     }
2119 
2120     mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm);
2121     mumps->id.job          = JOB_INIT;
2122     mumps->id.par          = 1; /* host participates factorizaton and solve */
2123     mumps->id.sym          = mumps->sym;
2124 
2125     size          = mumps->id.size_schur;
2126     arr           = mumps->id.schur;
2127     listvar_schur = mumps->id.listvar_schur;
2128     PetscMUMPS_c(mumps);
2129     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
2130 
2131     /* set PETSc-MUMPS default options - override MUMPS default */
2132     mumps->id.ICNTL(3) = 0;
2133     mumps->id.ICNTL(4) = 0;
2134     if (mumps->petsc_size == 1) {
2135       mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */
2136       mumps->id.ICNTL(7)  = 7; /* automatic choice of ordering done by the package */
2137     } else {
2138       mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */
2139       mumps->id.ICNTL(21) = 1; /* distributed solution */
2140     }
2141 
2142     /* restore cached ICNTL and CNTL values */
2143     for (icntl = 0; icntl < nICNTL_pre; ++icntl) mumps->id.ICNTL(mumps->ICNTL_pre[1 + 2 * icntl]) = mumps->ICNTL_pre[2 + 2 * icntl];
2144     for (icntl = 0; icntl < nCNTL_pre; ++icntl) mumps->id.CNTL((PetscInt)mumps->CNTL_pre[1 + 2 * icntl]) = mumps->CNTL_pre[2 + 2 * icntl];
2145     PetscCall(PetscFree(mumps->ICNTL_pre));
2146     PetscCall(PetscFree(mumps->CNTL_pre));
2147 
2148     if (schur) {
2149       mumps->id.size_schur    = size;
2150       mumps->id.schur_lld     = size;
2151       mumps->id.schur         = arr;
2152       mumps->id.listvar_schur = listvar_schur;
2153       if (mumps->petsc_size > 1) {
2154         PetscBool gs; /* gs is false if any rank other than root has non-empty IS */
2155 
2156         mumps->id.ICNTL(19) = 1;                                                                            /* MUMPS returns Schur centralized on the host */
2157         gs                  = mumps->myid ? (mumps->id.size_schur ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */
2158         PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &gs, 1, MPIU_BOOL, MPI_LAND, mumps->petsc_comm));
2159         PetscCheck(gs, PETSC_COMM_SELF, PETSC_ERR_SUP, "MUMPS distributed parallel Schur complements not yet supported from PETSc");
2160       } else {
2161         if (F->factortype == MAT_FACTOR_LU) {
2162           mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */
2163         } else {
2164           mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */
2165         }
2166       }
2167       mumps->id.ICNTL(26) = -1;
2168     }
2169 
2170     /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code.
2171        For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS.
2172      */
2173     PetscCallMPI(MPI_Bcast(mumps->id.icntl, 40, MPI_INT, 0, mumps->omp_comm));
2174     PetscCallMPI(MPI_Bcast(mumps->id.cntl, 15, MPIU_REAL, 0, mumps->omp_comm));
2175 
2176     mumps->scat_rhs = NULL;
2177     mumps->scat_sol = NULL;
2178   }
2179   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_1", "ICNTL(1): output stream for error messages", "None", mumps->id.ICNTL(1), &icntl, &flg));
2180   if (flg) mumps->id.ICNTL(1) = icntl;
2181   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_2", "ICNTL(2): output stream for diagnostic printing, statistics, and warning", "None", mumps->id.ICNTL(2), &icntl, &flg));
2182   if (flg) mumps->id.ICNTL(2) = icntl;
2183   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_3", "ICNTL(3): output stream for global information, collected on the host", "None", mumps->id.ICNTL(3), &icntl, &flg));
2184   if (flg) mumps->id.ICNTL(3) = icntl;
2185 
2186   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_4", "ICNTL(4): level of printing (0 to 4)", "None", mumps->id.ICNTL(4), &icntl, &flg));
2187   if (flg) mumps->id.ICNTL(4) = icntl;
2188   if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */
2189 
2190   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_6", "ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)", "None", mumps->id.ICNTL(6), &icntl, &flg));
2191   if (flg) mumps->id.ICNTL(6) = icntl;
2192 
2193   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_7", "ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)", "None", mumps->id.ICNTL(7), &icntl, &flg));
2194   if (flg) {
2195     PetscCheck(icntl != 1 && icntl >= 0 && icntl <= 7, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto");
2196     mumps->id.ICNTL(7) = icntl;
2197   }
2198 
2199   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_8", "ICNTL(8): scaling strategy (-2 to 8 or 77)", "None", mumps->id.ICNTL(8), &mumps->id.ICNTL(8), NULL));
2200   /* PetscCall(PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL)); handled by MatSolveTranspose_MUMPS() */
2201   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_10", "ICNTL(10): max num of refinements", "None", mumps->id.ICNTL(10), &mumps->id.ICNTL(10), NULL));
2202   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_11", "ICNTL(11): statistics related to an error analysis (via -ksp_view)", "None", mumps->id.ICNTL(11), &mumps->id.ICNTL(11), NULL));
2203   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_12", "ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)", "None", mumps->id.ICNTL(12), &mumps->id.ICNTL(12), NULL));
2204   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_13", "ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting", "None", mumps->id.ICNTL(13), &mumps->id.ICNTL(13), NULL));
2205   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_14", "ICNTL(14): percentage increase in the estimated working space", "None", mumps->id.ICNTL(14), &mumps->id.ICNTL(14), NULL));
2206   PetscCall(MatGetBlockSizes(A, &rbs, &cbs));
2207   if (rbs == cbs && rbs > 1) mumps->id.ICNTL(15) = -rbs;
2208   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_15", "ICNTL(15): compression of the input matrix resulting from a block format", "None", mumps->id.ICNTL(15), &mumps->id.ICNTL(15), &flg));
2209   if (flg) {
2210     PetscCheck(mumps->id.ICNTL(15) <= 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Positive -mat_mumps_icntl_15 not handled");
2211     PetscCheck((-mumps->id.ICNTL(15) % cbs == 0) && (-mumps->id.ICNTL(15) % rbs == 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "The opposite of -mat_mumps_icntl_15 must be a multiple of the column and row blocksizes");
2212   }
2213   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_19", "ICNTL(19): computes the Schur complement", "None", mumps->id.ICNTL(19), &mumps->id.ICNTL(19), NULL));
2214   if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */
2215     PetscCall(MatDestroy(&F->schur));
2216     PetscCall(MatMumpsResetSchur_Private(mumps));
2217   }
2218 
2219   /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps",
2220      and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran
2221      and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...'
2222      This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug,
2223      see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590.
2224      In short, we could not use distributed RHS until with MPICH v4.0b1 or we enabled a workaround in mumps-5.6.2+
2225    */
2226 #if PETSC_PKG_MUMPS_VERSION_GE(5, 6, 2) && defined(PETSC_HAVE_MUMPS_AVOID_MPI_IN_PLACE)
2227   mumps->ICNTL20 = 10;
2228 #elif PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) || (defined(PETSC_HAVE_MPICH) && (MPICH_NUMVERSION < 40000101))
2229   mumps->ICNTL20 = 0; /* Centralized dense RHS*/
2230 #else
2231   mumps->ICNTL20 = 10; /* Distributed dense RHS*/
2232 #endif
2233   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_20", "ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides", "None", mumps->ICNTL20, &mumps->ICNTL20, &flg));
2234   PetscCheck(!flg || mumps->ICNTL20 == 10 || mumps->ICNTL20 == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10", (int)mumps->ICNTL20);
2235 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0)
2236   PetscCheck(!flg || mumps->ICNTL20 != 10, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=10 is not supported before MUMPS-5.3.0");
2237 #endif
2238   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL)); we only use distributed solution vector */
2239 
2240   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_22", "ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)", "None", mumps->id.ICNTL(22), &mumps->id.ICNTL(22), NULL));
2241   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_23", "ICNTL(23): max size of the working memory (MB) that can allocate per processor", "None", mumps->id.ICNTL(23), &mumps->id.ICNTL(23), NULL));
2242   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_24", "ICNTL(24): detection of null pivot rows (0 or 1)", "None", mumps->id.ICNTL(24), &mumps->id.ICNTL(24), NULL));
2243   if (mumps->id.ICNTL(24)) { mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ }
2244 
2245   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_25", "ICNTL(25): computes a solution of a deficient matrix and a null space basis", "None", mumps->id.ICNTL(25), &mumps->id.ICNTL(25), NULL));
2246   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_26", "ICNTL(26): drives the solution phase if a Schur complement matrix", "None", mumps->id.ICNTL(26), &mumps->id.ICNTL(26), NULL));
2247   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_27", "ICNTL(27): controls the blocking size for multiple right-hand sides", "None", mumps->id.ICNTL(27), &mumps->id.ICNTL(27), NULL));
2248   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_28", "ICNTL(28): use 1 for sequential analysis and ICNTL(7) ordering, or 2 for parallel analysis and ICNTL(29) ordering", "None", mumps->id.ICNTL(28), &mumps->id.ICNTL(28), NULL));
2249   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_29", "ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis", "None", mumps->id.ICNTL(29), &mumps->id.ICNTL(29), NULL));
2250   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL)); */ /* call MatMumpsGetInverse() directly */
2251   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_31", "ICNTL(31): indicates which factors may be discarded during factorization", "None", mumps->id.ICNTL(31), &mumps->id.ICNTL(31), NULL));
2252   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elimination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL));  -- not supported by PETSc API */
2253   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_33", "ICNTL(33): compute determinant", "None", mumps->id.ICNTL(33), &mumps->id.ICNTL(33), NULL));
2254   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_35", "ICNTL(35): activates Block Low Rank (BLR) based factorization", "None", mumps->id.ICNTL(35), &mumps->id.ICNTL(35), NULL));
2255   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_36", "ICNTL(36): choice of BLR factorization variant", "None", mumps->id.ICNTL(36), &mumps->id.ICNTL(36), NULL));
2256   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_38", "ICNTL(38): estimated compression rate of LU factors with BLR", "None", mumps->id.ICNTL(38), &mumps->id.ICNTL(38), NULL));
2257   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_48", "ICNTL(48): multithreading with tree parallelism", "None", mumps->id.ICNTL(48), &mumps->id.ICNTL(48), NULL));
2258   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_58", "ICNTL(58): defines options for symbolic factorization", "None", mumps->id.ICNTL(58), &mumps->id.ICNTL(58), NULL));
2259 
2260   PetscCall(PetscOptionsReal("-mat_mumps_cntl_1", "CNTL(1): relative pivoting threshold", "None", mumps->id.CNTL(1), &mumps->id.CNTL(1), NULL));
2261   PetscCall(PetscOptionsReal("-mat_mumps_cntl_2", "CNTL(2): stopping criterion of refinement", "None", mumps->id.CNTL(2), &mumps->id.CNTL(2), NULL));
2262   PetscCall(PetscOptionsReal("-mat_mumps_cntl_3", "CNTL(3): absolute pivoting threshold", "None", mumps->id.CNTL(3), &mumps->id.CNTL(3), NULL));
2263   PetscCall(PetscOptionsReal("-mat_mumps_cntl_4", "CNTL(4): value for static pivoting", "None", mumps->id.CNTL(4), &mumps->id.CNTL(4), NULL));
2264   PetscCall(PetscOptionsReal("-mat_mumps_cntl_5", "CNTL(5): fixation for null pivots", "None", mumps->id.CNTL(5), &mumps->id.CNTL(5), NULL));
2265   PetscCall(PetscOptionsReal("-mat_mumps_cntl_7", "CNTL(7): dropping parameter used during BLR", "None", mumps->id.CNTL(7), &mumps->id.CNTL(7), NULL));
2266 
2267   PetscCall(PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL));
2268 
2269   PetscCall(PetscOptionsIntArray("-mat_mumps_view_info", "request INFO local to each processor", "", info, &ninfo, NULL));
2270   if (ninfo) {
2271     PetscCheck(ninfo <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "number of INFO %" PetscInt_FMT " must <= 80", ninfo);
2272     PetscCall(PetscMalloc1(ninfo, &mumps->info));
2273     mumps->ninfo = ninfo;
2274     for (i = 0; i < ninfo; i++) {
2275       PetscCheck(info[i] >= 0 && info[i] <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "index of INFO %" PetscInt_FMT " must between 1 and 80", ninfo);
2276       mumps->info[i] = info[i];
2277     }
2278   }
2279   PetscOptionsEnd();
2280   PetscFunctionReturn(PETSC_SUCCESS);
2281 }
2282 
2283 static PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F, Mat A, PETSC_UNUSED const MatFactorInfo *info, Mat_MUMPS *mumps)
2284 {
2285   PetscFunctionBegin;
2286   if (mumps->id.INFOG(1) < 0) {
2287     PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in analysis: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
2288     if (mumps->id.INFOG(1) == -6) {
2289       PetscCall(PetscInfo(F, "MUMPS error in analysis: matrix is singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2290       F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT;
2291     } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) {
2292       PetscCall(PetscInfo(F, "MUMPS error in analysis: problem with work array, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2293       F->factorerrortype = MAT_FACTOR_OUTMEMORY;
2294     } else if (mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0) {
2295       PetscCall(PetscInfo(F, "MUMPS error in analysis: empty matrix\n"));
2296     } else {
2297       PetscCall(PetscInfo(F, "MUMPS error in analysis: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS "\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2298       F->factorerrortype = MAT_FACTOR_OTHER;
2299     }
2300   }
2301   if (!mumps->id.n) F->factorerrortype = MAT_FACTOR_NOERROR;
2302   PetscFunctionReturn(PETSC_SUCCESS);
2303 }
2304 
2305 static PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F, Mat A, IS r, PETSC_UNUSED IS c, const MatFactorInfo *info)
2306 {
2307   Mat_MUMPS     *mumps = (Mat_MUMPS *)F->data;
2308   Vec            b;
2309   const PetscInt M = A->rmap->N;
2310 
2311   PetscFunctionBegin;
2312   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
2313     /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */
2314     PetscFunctionReturn(PETSC_SUCCESS);
2315   }
2316 
2317   /* Set MUMPS options from the options database */
2318   PetscCall(MatSetFromOptions_MUMPS(F, A));
2319 
2320   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
2321   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps));
2322 
2323   /* analysis phase */
2324   mumps->id.job = JOB_FACTSYMBOLIC;
2325   mumps->id.n   = M;
2326   switch (mumps->id.ICNTL(18)) {
2327   case 0: /* centralized assembled matrix input */
2328     if (!mumps->myid) {
2329       mumps->id.nnz = mumps->nnz;
2330       mumps->id.irn = mumps->irn;
2331       mumps->id.jcn = mumps->jcn;
2332       if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val;
2333       if (r && mumps->id.ICNTL(7) == 7) {
2334         mumps->id.ICNTL(7) = 1;
2335         if (!mumps->myid) {
2336           const PetscInt *idx;
2337           PetscInt        i;
2338 
2339           PetscCall(PetscMalloc1(M, &mumps->id.perm_in));
2340           PetscCall(ISGetIndices(r, &idx));
2341           for (i = 0; i < M; i++) PetscCall(PetscMUMPSIntCast(idx[i] + 1, &mumps->id.perm_in[i])); /* perm_in[]: start from 1, not 0! */
2342           PetscCall(ISRestoreIndices(r, &idx));
2343         }
2344       }
2345     }
2346     break;
2347   case 3: /* distributed assembled matrix input (size>1) */
2348     mumps->id.nnz_loc = mumps->nnz;
2349     mumps->id.irn_loc = mumps->irn;
2350     mumps->id.jcn_loc = mumps->jcn;
2351     if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val;
2352     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2353       PetscCall(MatCreateVecs(A, NULL, &b));
2354       PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq));
2355       PetscCall(VecDestroy(&b));
2356     }
2357     break;
2358   }
2359   PetscMUMPS_c(mumps);
2360   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps));
2361 
2362   F->ops->lufactornumeric   = MatFactorNumeric_MUMPS;
2363   F->ops->solve             = MatSolve_MUMPS;
2364   F->ops->solvetranspose    = MatSolveTranspose_MUMPS;
2365   F->ops->matsolve          = MatMatSolve_MUMPS;
2366   F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS;
2367   F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS;
2368 
2369   mumps->matstruc = SAME_NONZERO_PATTERN;
2370   PetscFunctionReturn(PETSC_SUCCESS);
2371 }
2372 
2373 /* Note the Petsc r and c permutations are ignored */
2374 static PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F, Mat A, PETSC_UNUSED IS r, PETSC_UNUSED IS c, const MatFactorInfo *info)
2375 {
2376   Mat_MUMPS     *mumps = (Mat_MUMPS *)F->data;
2377   Vec            b;
2378   const PetscInt M = A->rmap->N;
2379 
2380   PetscFunctionBegin;
2381   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
2382     /* F is assembled by a previous call of MatLUFactorSymbolic_BAIJMUMPS() */
2383     PetscFunctionReturn(PETSC_SUCCESS);
2384   }
2385 
2386   /* Set MUMPS options from the options database */
2387   PetscCall(MatSetFromOptions_MUMPS(F, A));
2388 
2389   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
2390   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps));
2391 
2392   /* analysis phase */
2393   mumps->id.job = JOB_FACTSYMBOLIC;
2394   mumps->id.n   = M;
2395   switch (mumps->id.ICNTL(18)) {
2396   case 0: /* centralized assembled matrix input */
2397     if (!mumps->myid) {
2398       mumps->id.nnz = mumps->nnz;
2399       mumps->id.irn = mumps->irn;
2400       mumps->id.jcn = mumps->jcn;
2401       if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val;
2402     }
2403     break;
2404   case 3: /* distributed assembled matrix input (size>1) */
2405     mumps->id.nnz_loc = mumps->nnz;
2406     mumps->id.irn_loc = mumps->irn;
2407     mumps->id.jcn_loc = mumps->jcn;
2408     if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val;
2409     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2410       PetscCall(MatCreateVecs(A, NULL, &b));
2411       PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq));
2412       PetscCall(VecDestroy(&b));
2413     }
2414     break;
2415   }
2416   PetscMUMPS_c(mumps);
2417   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps));
2418 
2419   F->ops->lufactornumeric   = MatFactorNumeric_MUMPS;
2420   F->ops->solve             = MatSolve_MUMPS;
2421   F->ops->solvetranspose    = MatSolveTranspose_MUMPS;
2422   F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS;
2423 
2424   mumps->matstruc = SAME_NONZERO_PATTERN;
2425   PetscFunctionReturn(PETSC_SUCCESS);
2426 }
2427 
2428 /* Note the Petsc r permutation and factor info are ignored */
2429 static PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F, Mat A, PETSC_UNUSED IS r, const MatFactorInfo *info)
2430 {
2431   Mat_MUMPS     *mumps = (Mat_MUMPS *)F->data;
2432   Vec            b;
2433   const PetscInt M = A->rmap->N;
2434 
2435   PetscFunctionBegin;
2436   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
2437     /* F is assembled by a previous call of MatCholeskyFactorSymbolic_MUMPS() */
2438     PetscFunctionReturn(PETSC_SUCCESS);
2439   }
2440 
2441   /* Set MUMPS options from the options database */
2442   PetscCall(MatSetFromOptions_MUMPS(F, A));
2443 
2444   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
2445   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps));
2446 
2447   /* analysis phase */
2448   mumps->id.job = JOB_FACTSYMBOLIC;
2449   mumps->id.n   = M;
2450   switch (mumps->id.ICNTL(18)) {
2451   case 0: /* centralized assembled matrix input */
2452     if (!mumps->myid) {
2453       mumps->id.nnz = mumps->nnz;
2454       mumps->id.irn = mumps->irn;
2455       mumps->id.jcn = mumps->jcn;
2456       if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val;
2457     }
2458     break;
2459   case 3: /* distributed assembled matrix input (size>1) */
2460     mumps->id.nnz_loc = mumps->nnz;
2461     mumps->id.irn_loc = mumps->irn;
2462     mumps->id.jcn_loc = mumps->jcn;
2463     if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val;
2464     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2465       PetscCall(MatCreateVecs(A, NULL, &b));
2466       PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq));
2467       PetscCall(VecDestroy(&b));
2468     }
2469     break;
2470   }
2471   PetscMUMPS_c(mumps);
2472   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps));
2473 
2474   F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS;
2475   F->ops->solve                 = MatSolve_MUMPS;
2476   F->ops->solvetranspose        = MatSolve_MUMPS;
2477   F->ops->matsolve              = MatMatSolve_MUMPS;
2478   F->ops->mattransposesolve     = MatMatTransposeSolve_MUMPS;
2479   F->ops->matsolvetranspose     = MatMatSolveTranspose_MUMPS;
2480 #if defined(PETSC_USE_COMPLEX)
2481   F->ops->getinertia = NULL;
2482 #else
2483   F->ops->getinertia = MatGetInertia_SBAIJMUMPS;
2484 #endif
2485 
2486   mumps->matstruc = SAME_NONZERO_PATTERN;
2487   PetscFunctionReturn(PETSC_SUCCESS);
2488 }
2489 
2490 static PetscErrorCode MatView_MUMPS(Mat A, PetscViewer viewer)
2491 {
2492   PetscBool         iascii;
2493   PetscViewerFormat format;
2494   Mat_MUMPS        *mumps = (Mat_MUMPS *)A->data;
2495 
2496   PetscFunctionBegin;
2497   /* check if matrix is mumps type */
2498   if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(PETSC_SUCCESS);
2499 
2500   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
2501   if (iascii) {
2502     PetscCall(PetscViewerGetFormat(viewer, &format));
2503     if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
2504       PetscCall(PetscViewerASCIIPrintf(viewer, "MUMPS run parameters:\n"));
2505       if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
2506         PetscCall(PetscViewerASCIIPrintf(viewer, "  SYM (matrix type):                   %d\n", mumps->id.sym));
2507         PetscCall(PetscViewerASCIIPrintf(viewer, "  PAR (host participation):            %d\n", mumps->id.par));
2508         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(1) (output for error):         %d\n", mumps->id.ICNTL(1)));
2509         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(2) (output of diagnostic msg): %d\n", mumps->id.ICNTL(2)));
2510         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(3) (output for global info):   %d\n", mumps->id.ICNTL(3)));
2511         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(4) (level of printing):        %d\n", mumps->id.ICNTL(4)));
2512         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(5) (input mat struct):         %d\n", mumps->id.ICNTL(5)));
2513         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(6) (matrix prescaling):        %d\n", mumps->id.ICNTL(6)));
2514         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(7) (sequential matrix ordering):%d\n", mumps->id.ICNTL(7)));
2515         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(8) (scaling strategy):         %d\n", mumps->id.ICNTL(8)));
2516         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(10) (max num of refinements):  %d\n", mumps->id.ICNTL(10)));
2517         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(11) (error analysis):          %d\n", mumps->id.ICNTL(11)));
2518         if (mumps->id.ICNTL(11) > 0) {
2519           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(4) (inf norm of input mat):        %g\n", (double)mumps->id.RINFOG(4)));
2520           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(5) (inf norm of solution):         %g\n", (double)mumps->id.RINFOG(5)));
2521           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(6) (inf norm of residual):         %g\n", (double)mumps->id.RINFOG(6)));
2522           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(7),RINFOG(8) (backward error est): %g, %g\n", (double)mumps->id.RINFOG(7), (double)mumps->id.RINFOG(8)));
2523           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(9) (error estimate):               %g\n", (double)mumps->id.RINFOG(9)));
2524           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n", (double)mumps->id.RINFOG(10), (double)mumps->id.RINFOG(11)));
2525         }
2526         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(12) (efficiency control):                         %d\n", mumps->id.ICNTL(12)));
2527         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(13) (sequential factorization of the root node):  %d\n", mumps->id.ICNTL(13)));
2528         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(14) (percentage of estimated workspace increase): %d\n", mumps->id.ICNTL(14)));
2529         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(15) (compression of the input matrix):            %d\n", mumps->id.ICNTL(15)));
2530         /* ICNTL(15-17) not used */
2531         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(18) (input mat struct):                           %d\n", mumps->id.ICNTL(18)));
2532         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(19) (Schur complement info):                      %d\n", mumps->id.ICNTL(19)));
2533         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(20) (RHS sparse pattern):                         %d\n", mumps->id.ICNTL(20)));
2534         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(21) (solution struct):                            %d\n", mumps->id.ICNTL(21)));
2535         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(22) (in-core/out-of-core facility):               %d\n", mumps->id.ICNTL(22)));
2536         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(23) (max size of memory can be allocated locally):%d\n", mumps->id.ICNTL(23)));
2537 
2538         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(24) (detection of null pivot rows):               %d\n", mumps->id.ICNTL(24)));
2539         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(25) (computation of a null space basis):          %d\n", mumps->id.ICNTL(25)));
2540         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(26) (Schur options for RHS or solution):          %d\n", mumps->id.ICNTL(26)));
2541         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(27) (blocking size for multiple RHS):             %d\n", mumps->id.ICNTL(27)));
2542         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(28) (use parallel or sequential ordering):        %d\n", mumps->id.ICNTL(28)));
2543         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(29) (parallel ordering):                          %d\n", mumps->id.ICNTL(29)));
2544 
2545         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(30) (user-specified set of entries in inv(A)):    %d\n", mumps->id.ICNTL(30)));
2546         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(31) (factors is discarded in the solve phase):    %d\n", mumps->id.ICNTL(31)));
2547         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(33) (compute determinant):                        %d\n", mumps->id.ICNTL(33)));
2548         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(35) (activate BLR based factorization):           %d\n", mumps->id.ICNTL(35)));
2549         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(36) (choice of BLR factorization variant):        %d\n", mumps->id.ICNTL(36)));
2550         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(38) (estimated compression rate of LU factors):   %d\n", mumps->id.ICNTL(38)));
2551         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(48) (multithreading with tree parallelism):       %d\n", mumps->id.ICNTL(48)));
2552         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(58) (options for symbolic factorization):         %d\n", mumps->id.ICNTL(58)));
2553 
2554         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(1) (relative pivoting threshold):      %g\n", (double)mumps->id.CNTL(1)));
2555         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(2) (stopping criterion of refinement): %g\n", (double)mumps->id.CNTL(2)));
2556         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(3) (absolute pivoting threshold):      %g\n", (double)mumps->id.CNTL(3)));
2557         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(4) (value of static pivoting):         %g\n", (double)mumps->id.CNTL(4)));
2558         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(5) (fixation for null pivots):         %g\n", (double)mumps->id.CNTL(5)));
2559         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(7) (dropping parameter for BLR):       %g\n", (double)mumps->id.CNTL(7)));
2560 
2561         /* information local to each processor */
2562         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(1) (local estimated flops for the elimination after analysis):\n"));
2563         PetscCall(PetscViewerASCIIPushSynchronized(viewer));
2564         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %g\n", mumps->myid, (double)mumps->id.RINFO(1)));
2565         PetscCall(PetscViewerFlush(viewer));
2566         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(2) (local estimated flops for the assembly after factorization):\n"));
2567         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %g\n", mumps->myid, (double)mumps->id.RINFO(2)));
2568         PetscCall(PetscViewerFlush(viewer));
2569         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(3) (local estimated flops for the elimination after factorization):\n"));
2570         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %g\n", mumps->myid, (double)mumps->id.RINFO(3)));
2571         PetscCall(PetscViewerFlush(viewer));
2572 
2573         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization):\n"));
2574         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(15)));
2575         PetscCall(PetscViewerFlush(viewer));
2576 
2577         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization):\n"));
2578         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(16)));
2579         PetscCall(PetscViewerFlush(viewer));
2580 
2581         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(23) (num of pivots eliminated on this processor after factorization):\n"));
2582         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(23)));
2583         PetscCall(PetscViewerFlush(viewer));
2584 
2585         if (mumps->ninfo && mumps->ninfo <= 80) {
2586           PetscInt i;
2587           for (i = 0; i < mumps->ninfo; i++) {
2588             PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(%" PetscInt_FMT "):\n", mumps->info[i]));
2589             PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(mumps->info[i])));
2590             PetscCall(PetscViewerFlush(viewer));
2591           }
2592         }
2593         PetscCall(PetscViewerASCIIPopSynchronized(viewer));
2594       } else PetscCall(PetscViewerASCIIPrintf(viewer, "  Use -%sksp_view ::ascii_info_detail to display information for all processes\n", ((PetscObject)A)->prefix ? ((PetscObject)A)->prefix : ""));
2595 
2596       if (mumps->myid == 0) { /* information from the host */
2597         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFOG(1) (global estimated flops for the elimination after analysis): %g\n", (double)mumps->id.RINFOG(1)));
2598         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFOG(2) (global estimated flops for the assembly after factorization): %g\n", (double)mumps->id.RINFOG(2)));
2599         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFOG(3) (global estimated flops for the elimination after factorization): %g\n", (double)mumps->id.RINFOG(3)));
2600         PetscCall(PetscViewerASCIIPrintf(viewer, "  (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n", (double)mumps->id.RINFOG(12), (double)mumps->id.RINFOG(13), mumps->id.INFOG(34)));
2601 
2602         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(3)));
2603         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(4)));
2604         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(5) (estimated maximum front size in the complete tree): %d\n", mumps->id.INFOG(5)));
2605         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(6) (number of nodes in the complete tree): %d\n", mumps->id.INFOG(6)));
2606         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(7) (ordering option effectively used after analysis): %d\n", mumps->id.INFOG(7)));
2607         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n", mumps->id.INFOG(8)));
2608         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n", mumps->id.INFOG(9)));
2609         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(10) (total integer space store the matrix factors after factorization): %d\n", mumps->id.INFOG(10)));
2610         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(11) (order of largest frontal matrix after factorization): %d\n", mumps->id.INFOG(11)));
2611         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(12) (number of off-diagonal pivots): %d\n", mumps->id.INFOG(12)));
2612         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(13) (number of delayed pivots after factorization): %d\n", mumps->id.INFOG(13)));
2613         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(14) (number of memory compress after factorization): %d\n", mumps->id.INFOG(14)));
2614         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(15) (number of steps of iterative refinement after solution): %d\n", mumps->id.INFOG(15)));
2615         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n", mumps->id.INFOG(16)));
2616         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n", mumps->id.INFOG(17)));
2617         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n", mumps->id.INFOG(18)));
2618         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n", mumps->id.INFOG(19)));
2619         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(20) (estimated number of entries in the factors): %d\n", mumps->id.INFOG(20)));
2620         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n", mumps->id.INFOG(21)));
2621         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n", mumps->id.INFOG(22)));
2622         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n", mumps->id.INFOG(23)));
2623         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n", mumps->id.INFOG(24)));
2624         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n", mumps->id.INFOG(25)));
2625         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(28) (after factorization: number of null pivots encountered): %d\n", mumps->id.INFOG(28)));
2626         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n", mumps->id.INFOG(29)));
2627         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n", mumps->id.INFOG(30), mumps->id.INFOG(31)));
2628         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(32) (after analysis: type of analysis done): %d\n", mumps->id.INFOG(32)));
2629         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(33) (value used for ICNTL(8)): %d\n", mumps->id.INFOG(33)));
2630         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(34) (exponent of the determinant if determinant is requested): %d\n", mumps->id.INFOG(34)));
2631         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n", mumps->id.INFOG(35)));
2632         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(36)));
2633         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n", mumps->id.INFOG(37)));
2634         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(38)));
2635         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n", mumps->id.INFOG(39)));
2636       }
2637     }
2638   }
2639   PetscFunctionReturn(PETSC_SUCCESS);
2640 }
2641 
2642 static PetscErrorCode MatGetInfo_MUMPS(Mat A, PETSC_UNUSED MatInfoType flag, MatInfo *info)
2643 {
2644   Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;
2645 
2646   PetscFunctionBegin;
2647   info->block_size        = 1.0;
2648   info->nz_allocated      = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20);
2649   info->nz_used           = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20);
2650   info->nz_unneeded       = 0.0;
2651   info->assemblies        = 0.0;
2652   info->mallocs           = 0.0;
2653   info->memory            = 0.0;
2654   info->fill_ratio_given  = 0;
2655   info->fill_ratio_needed = 0;
2656   info->factor_mallocs    = 0;
2657   PetscFunctionReturn(PETSC_SUCCESS);
2658 }
2659 
2660 static PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is)
2661 {
2662   Mat_MUMPS         *mumps = (Mat_MUMPS *)F->data;
2663   const PetscScalar *arr;
2664   const PetscInt    *idxs;
2665   PetscInt           size, i;
2666 
2667   PetscFunctionBegin;
2668   PetscCall(ISGetLocalSize(is, &size));
2669   /* Schur complement matrix */
2670   PetscCall(MatDestroy(&F->schur));
2671   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size, size, NULL, &F->schur));
2672   PetscCall(MatDenseGetArrayRead(F->schur, &arr));
2673   mumps->id.schur      = (MumpsScalar *)arr;
2674   mumps->id.size_schur = size;
2675   mumps->id.schur_lld  = size;
2676   PetscCall(MatDenseRestoreArrayRead(F->schur, &arr));
2677   if (mumps->sym == 1) PetscCall(MatSetOption(F->schur, MAT_SPD, PETSC_TRUE));
2678 
2679   /* MUMPS expects Fortran style indices */
2680   PetscCall(PetscFree(mumps->id.listvar_schur));
2681   PetscCall(PetscMalloc1(size, &mumps->id.listvar_schur));
2682   PetscCall(ISGetIndices(is, &idxs));
2683   for (i = 0; i < size; i++) PetscCall(PetscMUMPSIntCast(idxs[i] + 1, &mumps->id.listvar_schur[i]));
2684   PetscCall(ISRestoreIndices(is, &idxs));
2685   /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */
2686   mumps->id.ICNTL(26) = -1;
2687   PetscFunctionReturn(PETSC_SUCCESS);
2688 }
2689 
2690 static PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F, Mat *S)
2691 {
2692   Mat          St;
2693   Mat_MUMPS   *mumps = (Mat_MUMPS *)F->data;
2694   PetscScalar *array;
2695 
2696   PetscFunctionBegin;
2697   PetscCheck(mumps->id.ICNTL(19), PetscObjectComm((PetscObject)F), PETSC_ERR_ORDER, "Schur complement mode not selected! Call MatFactorSetSchurIS() to enable it");
2698   PetscCall(MatCreate(PETSC_COMM_SELF, &St));
2699   PetscCall(MatSetSizes(St, PETSC_DECIDE, PETSC_DECIDE, mumps->id.size_schur, mumps->id.size_schur));
2700   PetscCall(MatSetType(St, MATDENSE));
2701   PetscCall(MatSetUp(St));
2702   PetscCall(MatDenseGetArray(St, &array));
2703   if (!mumps->sym) {                /* MUMPS always return a full matrix */
2704     if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
2705       PetscInt i, j, N = mumps->id.size_schur;
2706       for (i = 0; i < N; i++) {
2707         for (j = 0; j < N; j++) {
2708 #if !defined(PETSC_USE_COMPLEX)
2709           PetscScalar val = mumps->id.schur[i * N + j];
2710 #else
2711           PetscScalar val = mumps->id.schur[i * N + j].r + PETSC_i * mumps->id.schur[i * N + j].i;
2712 #endif
2713           array[j * N + i] = val;
2714         }
2715       }
2716     } else { /* stored by columns */
2717       PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur));
2718     }
2719   } else {                          /* either full or lower-triangular (not packed) */
2720     if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */
2721       PetscInt i, j, N = mumps->id.size_schur;
2722       for (i = 0; i < N; i++) {
2723         for (j = i; j < N; j++) {
2724 #if !defined(PETSC_USE_COMPLEX)
2725           PetscScalar val = mumps->id.schur[i * N + j];
2726 #else
2727           PetscScalar val = mumps->id.schur[i * N + j].r + PETSC_i * mumps->id.schur[i * N + j].i;
2728 #endif
2729           array[i * N + j] = array[j * N + i] = val;
2730         }
2731       }
2732     } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */
2733       PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur));
2734     } else { /* ICNTL(19) == 1 lower triangular stored by rows */
2735       PetscInt i, j, N = mumps->id.size_schur;
2736       for (i = 0; i < N; i++) {
2737         for (j = 0; j < i + 1; j++) {
2738 #if !defined(PETSC_USE_COMPLEX)
2739           PetscScalar val = mumps->id.schur[i * N + j];
2740 #else
2741           PetscScalar val = mumps->id.schur[i * N + j].r + PETSC_i * mumps->id.schur[i * N + j].i;
2742 #endif
2743           array[i * N + j] = array[j * N + i] = val;
2744         }
2745       }
2746     }
2747   }
2748   PetscCall(MatDenseRestoreArray(St, &array));
2749   *S = St;
2750   PetscFunctionReturn(PETSC_SUCCESS);
2751 }
2752 
2753 static PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt ival)
2754 {
2755   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2756 
2757   PetscFunctionBegin;
2758   if (mumps->id.job == JOB_NULL) {                                       /* need to cache icntl and ival since PetscMUMPS_c() has never been called */
2759     PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; /* number of already cached ICNTL */
2760     for (i = 0; i < nICNTL_pre; ++i)
2761       if (mumps->ICNTL_pre[1 + 2 * i] == icntl) break; /* is this ICNTL already cached? */
2762     if (i == nICNTL_pre) {                             /* not already cached */
2763       if (i > 0) PetscCall(PetscRealloc(sizeof(PetscMUMPSInt) * (2 * nICNTL_pre + 3), &mumps->ICNTL_pre));
2764       else PetscCall(PetscCalloc(sizeof(PetscMUMPSInt) * 3, &mumps->ICNTL_pre));
2765       mumps->ICNTL_pre[0]++;
2766     }
2767     mumps->ICNTL_pre[1 + 2 * i] = icntl;
2768     PetscCall(PetscMUMPSIntCast(ival, mumps->ICNTL_pre + 2 + 2 * i));
2769   } else PetscCall(PetscMUMPSIntCast(ival, &mumps->id.ICNTL(icntl)));
2770   PetscFunctionReturn(PETSC_SUCCESS);
2771 }
2772 
2773 static PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt *ival)
2774 {
2775   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2776 
2777   PetscFunctionBegin;
2778   if (mumps->id.job == JOB_NULL) {
2779     PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0;
2780     *ival = 0;
2781     for (i = 0; i < nICNTL_pre; ++i) {
2782       if (mumps->ICNTL_pre[1 + 2 * i] == icntl) *ival = mumps->ICNTL_pre[2 + 2 * i];
2783     }
2784   } else *ival = mumps->id.ICNTL(icntl);
2785   PetscFunctionReturn(PETSC_SUCCESS);
2786 }
2787 
2788 /*@
2789   MatMumpsSetIcntl - Set MUMPS parameter ICNTL() <https://mumps-solver.org/index.php?page=doc>
2790 
2791   Logically Collective
2792 
2793   Input Parameters:
2794 + F     - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2795 . icntl - index of MUMPS parameter array ICNTL()
2796 - ival  - value of MUMPS ICNTL(icntl)
2797 
2798   Options Database Key:
2799 . -mat_mumps_icntl_<icntl> <ival> - change the option numbered icntl to ival
2800 
2801   Level: beginner
2802 
2803 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2804 @*/
2805 PetscErrorCode MatMumpsSetIcntl(Mat F, PetscInt icntl, PetscInt ival)
2806 {
2807   PetscFunctionBegin;
2808   PetscValidType(F, 1);
2809   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2810   PetscValidLogicalCollectiveInt(F, icntl, 2);
2811   PetscValidLogicalCollectiveInt(F, ival, 3);
2812   PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 48 || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl);
2813   PetscTryMethod(F, "MatMumpsSetIcntl_C", (Mat, PetscInt, PetscInt), (F, icntl, ival));
2814   PetscFunctionReturn(PETSC_SUCCESS);
2815 }
2816 
2817 /*@
2818   MatMumpsGetIcntl - Get MUMPS parameter ICNTL() <https://mumps-solver.org/index.php?page=doc>
2819 
2820   Logically Collective
2821 
2822   Input Parameters:
2823 + F     - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2824 - icntl - index of MUMPS parameter array ICNTL()
2825 
2826   Output Parameter:
2827 . ival - value of MUMPS ICNTL(icntl)
2828 
2829   Level: beginner
2830 
2831 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2832 @*/
2833 PetscErrorCode MatMumpsGetIcntl(Mat F, PetscInt icntl, PetscInt *ival)
2834 {
2835   PetscFunctionBegin;
2836   PetscValidType(F, 1);
2837   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2838   PetscValidLogicalCollectiveInt(F, icntl, 2);
2839   PetscAssertPointer(ival, 3);
2840   PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 48 || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl);
2841   PetscUseMethod(F, "MatMumpsGetIcntl_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival));
2842   PetscFunctionReturn(PETSC_SUCCESS);
2843 }
2844 
2845 static PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal val)
2846 {
2847   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2848 
2849   PetscFunctionBegin;
2850   if (mumps->id.job == JOB_NULL) {
2851     PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0;
2852     for (i = 0; i < nCNTL_pre; ++i)
2853       if (mumps->CNTL_pre[1 + 2 * i] == icntl) break;
2854     if (i == nCNTL_pre) {
2855       if (i > 0) PetscCall(PetscRealloc(sizeof(PetscReal) * (2 * nCNTL_pre + 3), &mumps->CNTL_pre));
2856       else PetscCall(PetscCalloc(sizeof(PetscReal) * 3, &mumps->CNTL_pre));
2857       mumps->CNTL_pre[0]++;
2858     }
2859     mumps->CNTL_pre[1 + 2 * i] = icntl;
2860     mumps->CNTL_pre[2 + 2 * i] = val;
2861   } else mumps->id.CNTL(icntl) = val;
2862   PetscFunctionReturn(PETSC_SUCCESS);
2863 }
2864 
2865 static PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal *val)
2866 {
2867   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2868 
2869   PetscFunctionBegin;
2870   if (mumps->id.job == JOB_NULL) {
2871     PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0;
2872     *val = 0.0;
2873     for (i = 0; i < nCNTL_pre; ++i) {
2874       if (mumps->CNTL_pre[1 + 2 * i] == icntl) *val = mumps->CNTL_pre[2 + 2 * i];
2875     }
2876   } else *val = mumps->id.CNTL(icntl);
2877   PetscFunctionReturn(PETSC_SUCCESS);
2878 }
2879 
2880 /*@
2881   MatMumpsSetCntl - Set MUMPS parameter CNTL() <https://mumps-solver.org/index.php?page=doc>
2882 
2883   Logically Collective
2884 
2885   Input Parameters:
2886 + F     - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2887 . icntl - index of MUMPS parameter array CNTL()
2888 - val   - value of MUMPS CNTL(icntl)
2889 
2890   Options Database Key:
2891 . -mat_mumps_cntl_<icntl> <val> - change the option numbered icntl to ival
2892 
2893   Level: beginner
2894 
2895 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2896 @*/
2897 PetscErrorCode MatMumpsSetCntl(Mat F, PetscInt icntl, PetscReal val)
2898 {
2899   PetscFunctionBegin;
2900   PetscValidType(F, 1);
2901   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2902   PetscValidLogicalCollectiveInt(F, icntl, 2);
2903   PetscValidLogicalCollectiveReal(F, val, 3);
2904   PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl);
2905   PetscTryMethod(F, "MatMumpsSetCntl_C", (Mat, PetscInt, PetscReal), (F, icntl, val));
2906   PetscFunctionReturn(PETSC_SUCCESS);
2907 }
2908 
2909 /*@
2910   MatMumpsGetCntl - Get MUMPS parameter CNTL() <https://mumps-solver.org/index.php?page=doc>
2911 
2912   Logically Collective
2913 
2914   Input Parameters:
2915 + F     - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2916 - icntl - index of MUMPS parameter array CNTL()
2917 
2918   Output Parameter:
2919 . val - value of MUMPS CNTL(icntl)
2920 
2921   Level: beginner
2922 
2923 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2924 @*/
2925 PetscErrorCode MatMumpsGetCntl(Mat F, PetscInt icntl, PetscReal *val)
2926 {
2927   PetscFunctionBegin;
2928   PetscValidType(F, 1);
2929   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2930   PetscValidLogicalCollectiveInt(F, icntl, 2);
2931   PetscAssertPointer(val, 3);
2932   PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl);
2933   PetscUseMethod(F, "MatMumpsGetCntl_C", (Mat, PetscInt, PetscReal *), (F, icntl, val));
2934   PetscFunctionReturn(PETSC_SUCCESS);
2935 }
2936 
2937 static PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F, PetscInt icntl, PetscInt *info)
2938 {
2939   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2940 
2941   PetscFunctionBegin;
2942   *info = mumps->id.INFO(icntl);
2943   PetscFunctionReturn(PETSC_SUCCESS);
2944 }
2945 
2946 static PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F, PetscInt icntl, PetscInt *infog)
2947 {
2948   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2949 
2950   PetscFunctionBegin;
2951   *infog = mumps->id.INFOG(icntl);
2952   PetscFunctionReturn(PETSC_SUCCESS);
2953 }
2954 
2955 static PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfo)
2956 {
2957   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2958 
2959   PetscFunctionBegin;
2960   *rinfo = mumps->id.RINFO(icntl);
2961   PetscFunctionReturn(PETSC_SUCCESS);
2962 }
2963 
2964 static PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfog)
2965 {
2966   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2967 
2968   PetscFunctionBegin;
2969   *rinfog = mumps->id.RINFOG(icntl);
2970   PetscFunctionReturn(PETSC_SUCCESS);
2971 }
2972 
2973 static PetscErrorCode MatMumpsGetNullPivots_MUMPS(Mat F, PetscInt *size, PetscInt **array)
2974 {
2975   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2976 
2977   PetscFunctionBegin;
2978   PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection");
2979   *size  = 0;
2980   *array = NULL;
2981   if (!mumps->myid) {
2982     *size = mumps->id.INFOG(28);
2983     PetscCall(PetscMalloc1(*size, array));
2984     for (int i = 0; i < *size; i++) (*array)[i] = mumps->id.pivnul_list[i] - 1;
2985   }
2986   PetscFunctionReturn(PETSC_SUCCESS);
2987 }
2988 
2989 static PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F, Mat spRHS)
2990 {
2991   Mat          Bt = NULL, Btseq = NULL;
2992   PetscBool    flg;
2993   Mat_MUMPS   *mumps = (Mat_MUMPS *)F->data;
2994   PetscScalar *aa;
2995   PetscInt     spnr, *ia, *ja, M, nrhs;
2996 
2997   PetscFunctionBegin;
2998   PetscAssertPointer(spRHS, 2);
2999   PetscCall(PetscObjectTypeCompare((PetscObject)spRHS, MATTRANSPOSEVIRTUAL, &flg));
3000   if (flg) {
3001     PetscCall(MatTransposeGetMat(spRHS, &Bt));
3002   } else SETERRQ(PetscObjectComm((PetscObject)spRHS), PETSC_ERR_ARG_WRONG, "Matrix spRHS must be type MATTRANSPOSEVIRTUAL matrix");
3003 
3004   PetscCall(MatMumpsSetIcntl(F, 30, 1));
3005 
3006   if (mumps->petsc_size > 1) {
3007     Mat_MPIAIJ *b = (Mat_MPIAIJ *)Bt->data;
3008     Btseq         = b->A;
3009   } else {
3010     Btseq = Bt;
3011   }
3012 
3013   PetscCall(MatGetSize(spRHS, &M, &nrhs));
3014   mumps->id.nrhs = nrhs;
3015   mumps->id.lrhs = M;
3016   mumps->id.rhs  = NULL;
3017 
3018   if (!mumps->myid) {
3019     PetscCall(MatSeqAIJGetArray(Btseq, &aa));
3020     PetscCall(MatGetRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
3021     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
3022     PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs));
3023     mumps->id.rhs_sparse = (MumpsScalar *)aa;
3024   } else {
3025     mumps->id.irhs_ptr    = NULL;
3026     mumps->id.irhs_sparse = NULL;
3027     mumps->id.nz_rhs      = 0;
3028     mumps->id.rhs_sparse  = NULL;
3029   }
3030   mumps->id.ICNTL(20) = 1; /* rhs is sparse */
3031   mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */
3032 
3033   /* solve phase */
3034   mumps->id.job = JOB_SOLVE;
3035   PetscMUMPS_c(mumps);
3036   PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2));
3037 
3038   if (!mumps->myid) {
3039     PetscCall(MatSeqAIJRestoreArray(Btseq, &aa));
3040     PetscCall(MatRestoreRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
3041     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
3042   }
3043   PetscFunctionReturn(PETSC_SUCCESS);
3044 }
3045 
3046 /*@
3047   MatMumpsGetInverse - Get user-specified set of entries in inverse of `A` <https://mumps-solver.org/index.php?page=doc>
3048 
3049   Logically Collective
3050 
3051   Input Parameter:
3052 . F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
3053 
3054   Output Parameter:
3055 . spRHS - sequential sparse matrix in `MATTRANSPOSEVIRTUAL` format with requested entries of inverse of `A`
3056 
3057   Level: beginner
3058 
3059 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()`
3060 @*/
3061 PetscErrorCode MatMumpsGetInverse(Mat F, Mat spRHS)
3062 {
3063   PetscFunctionBegin;
3064   PetscValidType(F, 1);
3065   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3066   PetscUseMethod(F, "MatMumpsGetInverse_C", (Mat, Mat), (F, spRHS));
3067   PetscFunctionReturn(PETSC_SUCCESS);
3068 }
3069 
3070 static PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F, Mat spRHST)
3071 {
3072   Mat spRHS;
3073 
3074   PetscFunctionBegin;
3075   PetscCall(MatCreateTranspose(spRHST, &spRHS));
3076   PetscCall(MatMumpsGetInverse_MUMPS(F, spRHS));
3077   PetscCall(MatDestroy(&spRHS));
3078   PetscFunctionReturn(PETSC_SUCCESS);
3079 }
3080 
3081 /*@
3082   MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix $A^T $ <https://mumps-solver.org/index.php?page=doc>
3083 
3084   Logically Collective
3085 
3086   Input Parameter:
3087 . F - the factored matrix of A obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
3088 
3089   Output Parameter:
3090 . spRHST - sequential sparse matrix in `MATAIJ` format containing the requested entries of inverse of `A`^T
3091 
3092   Level: beginner
3093 
3094 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()`, `MatMumpsGetInverse()`
3095 @*/
3096 PetscErrorCode MatMumpsGetInverseTranspose(Mat F, Mat spRHST)
3097 {
3098   PetscBool flg;
3099 
3100   PetscFunctionBegin;
3101   PetscValidType(F, 1);
3102   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3103   PetscCall(PetscObjectTypeCompareAny((PetscObject)spRHST, &flg, MATSEQAIJ, MATMPIAIJ, NULL));
3104   PetscCheck(flg, PetscObjectComm((PetscObject)spRHST), PETSC_ERR_ARG_WRONG, "Matrix spRHST must be MATAIJ matrix");
3105 
3106   PetscUseMethod(F, "MatMumpsGetInverseTranspose_C", (Mat, Mat), (F, spRHST));
3107   PetscFunctionReturn(PETSC_SUCCESS);
3108 }
3109 
3110 /*@
3111   MatMumpsGetInfo - Get MUMPS parameter INFO() <https://mumps-solver.org/index.php?page=doc>
3112 
3113   Logically Collective
3114 
3115   Input Parameters:
3116 + F     - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
3117 - icntl - index of MUMPS parameter array INFO()
3118 
3119   Output Parameter:
3120 . ival - value of MUMPS INFO(icntl)
3121 
3122   Level: beginner
3123 
3124 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
3125 @*/
3126 PetscErrorCode MatMumpsGetInfo(Mat F, PetscInt icntl, PetscInt *ival)
3127 {
3128   PetscFunctionBegin;
3129   PetscValidType(F, 1);
3130   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3131   PetscAssertPointer(ival, 3);
3132   PetscUseMethod(F, "MatMumpsGetInfo_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival));
3133   PetscFunctionReturn(PETSC_SUCCESS);
3134 }
3135 
3136 /*@
3137   MatMumpsGetInfog - Get MUMPS parameter INFOG() <https://mumps-solver.org/index.php?page=doc>
3138 
3139   Logically Collective
3140 
3141   Input Parameters:
3142 + F     - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
3143 - icntl - index of MUMPS parameter array INFOG()
3144 
3145   Output Parameter:
3146 . ival - value of MUMPS INFOG(icntl)
3147 
3148   Level: beginner
3149 
3150 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
3151 @*/
3152 PetscErrorCode MatMumpsGetInfog(Mat F, PetscInt icntl, PetscInt *ival)
3153 {
3154   PetscFunctionBegin;
3155   PetscValidType(F, 1);
3156   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3157   PetscAssertPointer(ival, 3);
3158   PetscUseMethod(F, "MatMumpsGetInfog_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival));
3159   PetscFunctionReturn(PETSC_SUCCESS);
3160 }
3161 
3162 /*@
3163   MatMumpsGetRinfo - Get MUMPS parameter RINFO() <https://mumps-solver.org/index.php?page=doc>
3164 
3165   Logically Collective
3166 
3167   Input Parameters:
3168 + F     - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
3169 - icntl - index of MUMPS parameter array RINFO()
3170 
3171   Output Parameter:
3172 . val - value of MUMPS RINFO(icntl)
3173 
3174   Level: beginner
3175 
3176 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfog()`
3177 @*/
3178 PetscErrorCode MatMumpsGetRinfo(Mat F, PetscInt icntl, PetscReal *val)
3179 {
3180   PetscFunctionBegin;
3181   PetscValidType(F, 1);
3182   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3183   PetscAssertPointer(val, 3);
3184   PetscUseMethod(F, "MatMumpsGetRinfo_C", (Mat, PetscInt, PetscReal *), (F, icntl, val));
3185   PetscFunctionReturn(PETSC_SUCCESS);
3186 }
3187 
3188 /*@
3189   MatMumpsGetRinfog - Get MUMPS parameter RINFOG() <https://mumps-solver.org/index.php?page=doc>
3190 
3191   Logically Collective
3192 
3193   Input Parameters:
3194 + F     - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
3195 - icntl - index of MUMPS parameter array RINFOG()
3196 
3197   Output Parameter:
3198 . val - value of MUMPS RINFOG(icntl)
3199 
3200   Level: beginner
3201 
3202 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`
3203 @*/
3204 PetscErrorCode MatMumpsGetRinfog(Mat F, PetscInt icntl, PetscReal *val)
3205 {
3206   PetscFunctionBegin;
3207   PetscValidType(F, 1);
3208   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3209   PetscAssertPointer(val, 3);
3210   PetscUseMethod(F, "MatMumpsGetRinfog_C", (Mat, PetscInt, PetscReal *), (F, icntl, val));
3211   PetscFunctionReturn(PETSC_SUCCESS);
3212 }
3213 
3214 /*@
3215   MatMumpsGetNullPivots - Get MUMPS parameter PIVNUL_LIST() <https://mumps-solver.org/index.php?page=doc>
3216 
3217   Logically Collective
3218 
3219   Input Parameter:
3220 . F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
3221 
3222   Output Parameters:
3223 + size  - local size of the array. The size of the array is non-zero only on the host.
3224 - array - array of rows with null pivot, these rows follow 0-based indexing. The array gets allocated within the function and the user is responsible
3225            for freeing this array.
3226 
3227   Level: beginner
3228 
3229 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`
3230 @*/
3231 PetscErrorCode MatMumpsGetNullPivots(Mat F, PetscInt *size, PetscInt **array)
3232 {
3233   PetscFunctionBegin;
3234   PetscValidType(F, 1);
3235   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3236   PetscAssertPointer(size, 2);
3237   PetscAssertPointer(array, 3);
3238   PetscUseMethod(F, "MatMumpsGetNullPivots_C", (Mat, PetscInt *, PetscInt **), (F, size, array));
3239   PetscFunctionReturn(PETSC_SUCCESS);
3240 }
3241 
3242 /*MC
3243   MATSOLVERMUMPS -  A matrix type providing direct solvers (LU and Cholesky) for
3244   distributed and sequential matrices via the external package MUMPS <https://mumps-solver.org/index.php?page=doc>
3245 
3246   Works with `MATAIJ` and `MATSBAIJ` matrices
3247 
3248   Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS
3249 
3250   Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode.
3251   See details below.
3252 
3253   Use `-pc_type cholesky` or `lu` `-pc_factor_mat_solver_type mumps` to use this direct solver
3254 
3255   Options Database Keys:
3256 +  -mat_mumps_icntl_1   - ICNTL(1): output stream for error messages
3257 .  -mat_mumps_icntl_2   - ICNTL(2): output stream for diagnostic printing, statistics, and warning
3258 .  -mat_mumps_icntl_3   - ICNTL(3): output stream for global information, collected on the host
3259 .  -mat_mumps_icntl_4   - ICNTL(4): level of printing (0 to 4)
3260 .  -mat_mumps_icntl_6   - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)
3261 .  -mat_mumps_icntl_7   - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto
3262                           Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only)
3263 .  -mat_mumps_icntl_8   - ICNTL(8): scaling strategy (-2 to 8 or 77)
3264 .  -mat_mumps_icntl_10  - ICNTL(10): max num of refinements
3265 .  -mat_mumps_icntl_11  - ICNTL(11): statistics related to an error analysis (via -ksp_view)
3266 .  -mat_mumps_icntl_12  - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)
3267 .  -mat_mumps_icntl_13  - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting
3268 .  -mat_mumps_icntl_14  - ICNTL(14): percentage increase in the estimated working space
3269 .  -mat_mumps_icntl_15  - ICNTL(15): compression of the input matrix resulting from a block format
3270 .  -mat_mumps_icntl_19  - ICNTL(19): computes the Schur complement
3271 .  -mat_mumps_icntl_20  - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS
3272 .  -mat_mumps_icntl_22  - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)
3273 .  -mat_mumps_icntl_23  - ICNTL(23): max size of the working memory (MB) that can allocate per processor
3274 .  -mat_mumps_icntl_24  - ICNTL(24): detection of null pivot rows (0 or 1)
3275 .  -mat_mumps_icntl_25  - ICNTL(25): compute a solution of a deficient matrix and a null space basis
3276 .  -mat_mumps_icntl_26  - ICNTL(26): drives the solution phase if a Schur complement matrix
3277 .  -mat_mumps_icntl_28  - ICNTL(28): use 1 for sequential analysis and ICNTL(7) ordering, or 2 for parallel analysis and ICNTL(29) ordering
3278 .  -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis
3279 .  -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A)
3280 .  -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization
3281 .  -mat_mumps_icntl_33 - ICNTL(33): compute determinant
3282 .  -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature
3283 .  -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant
3284 .  -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR
3285 .  -mat_mumps_icntl_48 - ICNTL(48): multithreading with tree parallelism
3286 .  -mat_mumps_icntl_58 - ICNTL(58): options for symbolic factorization
3287 .  -mat_mumps_cntl_1   - CNTL(1): relative pivoting threshold
3288 .  -mat_mumps_cntl_2   - CNTL(2): stopping criterion of refinement
3289 .  -mat_mumps_cntl_3   - CNTL(3): absolute pivoting threshold
3290 .  -mat_mumps_cntl_4   - CNTL(4): value for static pivoting
3291 .  -mat_mumps_cntl_5   - CNTL(5): fixation for null pivots
3292 .  -mat_mumps_cntl_7   - CNTL(7): precision of the dropping parameter used during BLR factorization
3293 -  -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS.
3294                                    Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual.
3295 
3296   Level: beginner
3297 
3298   Notes:
3299   MUMPS Cholesky does not handle (complex) Hermitian matrices (see User's Guide at <https://mumps-solver.org/index.php?page=doc>) so using it will
3300   error if the matrix is Hermitian.
3301 
3302   When used within a `KSP`/`PC` solve the options are prefixed with that of the `PC`. Otherwise one can set the options prefix by calling
3303   `MatSetOptionsPrefixFactor()` on the matrix from which the factor was obtained or `MatSetOptionsPrefix()` on the factor matrix.
3304 
3305   When a MUMPS factorization fails inside a KSP solve, for example with a `KSP_DIVERGED_PC_FAILED`, one can find the MUMPS information about
3306   the failure with
3307 .vb
3308           KSPGetPC(ksp,&pc);
3309           PCFactorGetMatrix(pc,&mat);
3310           MatMumpsGetInfo(mat,....);
3311           MatMumpsGetInfog(mat,....); etc.
3312 .ve
3313     Or run with `-ksp_error_if_not_converged` and the program will be stopped and the information printed in the error message.
3314 
3315   MUMPS provides 64-bit integer support in two build modes:
3316   full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and
3317   requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI).
3318 
3319   selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices,
3320   MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and
3321   columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit
3322   integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS.
3323 
3324   With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc.
3325 
3326   Two modes to run MUMPS/PETSc with OpenMP
3327 .vb
3328      Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP
3329      threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test".
3330 .ve
3331 
3332 .vb
3333      -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example,
3334     if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16"
3335 .ve
3336 
3337    To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part
3338    (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with `--with-openmp` `--download-hwloc`
3339    (or `--with-hwloc`), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS
3340    libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS
3341    (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided).
3342 
3343    If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI
3344    processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of
3345    size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm
3346    are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set
3347    by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs.
3348    In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets,
3349    if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind
3350    MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half
3351    cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the
3352    problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding.
3353    For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and
3354    examine the mapping result.
3355 
3356    PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set `OMP_PROC_BIND` and `OMP_PLACES` in job scripts,
3357    for example, export `OMP_PLACES`=threads and export `OMP_PROC_BIND`=spread. One does not need to export `OMP_NUM_THREADS`=m in job scripts as PETSc
3358    calls `omp_set_num_threads`(m) internally before calling MUMPS.
3359 
3360    See {cite}`heroux2011bi` and {cite}`gutierrez2017accommodating`
3361 
3362 .seealso: [](ch_matrices), `Mat`, `PCFactorSetMatSolverType()`, `MatSolverType`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`, `KSPGetPC()`, `PCFactorGetMatrix()`
3363 M*/
3364 
3365 static PetscErrorCode MatFactorGetSolverType_mumps(PETSC_UNUSED Mat A, MatSolverType *type)
3366 {
3367   PetscFunctionBegin;
3368   *type = MATSOLVERMUMPS;
3369   PetscFunctionReturn(PETSC_SUCCESS);
3370 }
3371 
3372 /* MatGetFactor for Seq and MPI AIJ matrices */
3373 static PetscErrorCode MatGetFactor_aij_mumps(Mat A, MatFactorType ftype, Mat *F)
3374 {
3375   Mat         B;
3376   Mat_MUMPS  *mumps;
3377   PetscBool   isSeqAIJ, isDiag, isDense;
3378   PetscMPIInt size;
3379 
3380   PetscFunctionBegin;
3381 #if defined(PETSC_USE_COMPLEX)
3382   if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) {
3383     PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n"));
3384     *F = NULL;
3385     PetscFunctionReturn(PETSC_SUCCESS);
3386   }
3387 #endif
3388   /* Create the factorization matrix */
3389   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isSeqAIJ));
3390   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATDIAGONAL, &isDiag));
3391   PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &isDense, MATSEQDENSE, MATMPIDENSE, NULL));
3392   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
3393   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
3394   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name));
3395   PetscCall(MatSetUp(B));
3396 
3397   PetscCall(PetscNew(&mumps));
3398 
3399   B->ops->view    = MatView_MUMPS;
3400   B->ops->getinfo = MatGetInfo_MUMPS;
3401 
3402   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
3403   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
3404   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
3405   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
3406   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
3407   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
3408   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
3409   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
3410   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
3411   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
3412   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
3413   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
3414   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
3415   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
3416 
3417   if (ftype == MAT_FACTOR_LU) {
3418     B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
3419     B->factortype            = MAT_FACTOR_LU;
3420     if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij;
3421     else if (isDiag) mumps->ConvertToTriples = MatConvertToTriples_diagonal_xaij;
3422     else if (isDense) mumps->ConvertToTriples = MatConvertToTriples_dense_xaij;
3423     else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij;
3424     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]));
3425     mumps->sym = 0;
3426   } else {
3427     B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
3428     B->factortype                  = MAT_FACTOR_CHOLESKY;
3429     if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij;
3430     else if (isDiag) mumps->ConvertToTriples = MatConvertToTriples_diagonal_xaij;
3431     else if (isDense) mumps->ConvertToTriples = MatConvertToTriples_dense_xaij;
3432     else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij;
3433     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY]));
3434 #if defined(PETSC_USE_COMPLEX)
3435     mumps->sym = 2;
3436 #else
3437     if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1;
3438     else mumps->sym = 2;
3439 #endif
3440   }
3441 
3442   /* set solvertype */
3443   PetscCall(PetscFree(B->solvertype));
3444   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
3445   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
3446   if (size == 1) {
3447     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3448     B->canuseordering = PETSC_TRUE;
3449   }
3450   B->ops->destroy = MatDestroy_MUMPS;
3451   B->data         = (void *)mumps;
3452 
3453   *F               = B;
3454   mumps->id.job    = JOB_NULL;
3455   mumps->ICNTL_pre = NULL;
3456   mumps->CNTL_pre  = NULL;
3457   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
3458   PetscFunctionReturn(PETSC_SUCCESS);
3459 }
3460 
3461 /* MatGetFactor for Seq and MPI SBAIJ matrices */
3462 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A, PETSC_UNUSED MatFactorType ftype, Mat *F)
3463 {
3464   Mat         B;
3465   Mat_MUMPS  *mumps;
3466   PetscBool   isSeqSBAIJ;
3467   PetscMPIInt size;
3468 
3469   PetscFunctionBegin;
3470 #if defined(PETSC_USE_COMPLEX)
3471   if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) {
3472     PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n"));
3473     *F = NULL;
3474     PetscFunctionReturn(PETSC_SUCCESS);
3475   }
3476 #endif
3477   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
3478   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
3479   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name));
3480   PetscCall(MatSetUp(B));
3481 
3482   PetscCall(PetscNew(&mumps));
3483   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &isSeqSBAIJ));
3484   if (isSeqSBAIJ) {
3485     mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij;
3486   } else {
3487     mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij;
3488   }
3489 
3490   B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
3491   B->ops->view                   = MatView_MUMPS;
3492   B->ops->getinfo                = MatGetInfo_MUMPS;
3493 
3494   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
3495   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
3496   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
3497   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
3498   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
3499   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
3500   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
3501   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
3502   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
3503   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
3504   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
3505   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
3506   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
3507   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
3508 
3509   B->factortype = MAT_FACTOR_CHOLESKY;
3510 #if defined(PETSC_USE_COMPLEX)
3511   mumps->sym = 2;
3512 #else
3513   if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1;
3514   else mumps->sym = 2;
3515 #endif
3516 
3517   /* set solvertype */
3518   PetscCall(PetscFree(B->solvertype));
3519   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
3520   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
3521   if (size == 1) {
3522     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3523     B->canuseordering = PETSC_TRUE;
3524   }
3525   PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY]));
3526   B->ops->destroy = MatDestroy_MUMPS;
3527   B->data         = (void *)mumps;
3528 
3529   *F               = B;
3530   mumps->id.job    = JOB_NULL;
3531   mumps->ICNTL_pre = NULL;
3532   mumps->CNTL_pre  = NULL;
3533   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
3534   PetscFunctionReturn(PETSC_SUCCESS);
3535 }
3536 
3537 static PetscErrorCode MatGetFactor_baij_mumps(Mat A, MatFactorType ftype, Mat *F)
3538 {
3539   Mat         B;
3540   Mat_MUMPS  *mumps;
3541   PetscBool   isSeqBAIJ;
3542   PetscMPIInt size;
3543 
3544   PetscFunctionBegin;
3545   /* Create the factorization matrix */
3546   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &isSeqBAIJ));
3547   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
3548   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
3549   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name));
3550   PetscCall(MatSetUp(B));
3551 
3552   PetscCall(PetscNew(&mumps));
3553   if (ftype == MAT_FACTOR_LU) {
3554     B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS;
3555     B->factortype            = MAT_FACTOR_LU;
3556     if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij;
3557     else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij;
3558     mumps->sym = 0;
3559     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]));
3560   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead");
3561 
3562   B->ops->view    = MatView_MUMPS;
3563   B->ops->getinfo = MatGetInfo_MUMPS;
3564 
3565   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
3566   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
3567   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
3568   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
3569   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
3570   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
3571   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
3572   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
3573   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
3574   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
3575   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
3576   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
3577   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
3578   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
3579 
3580   /* set solvertype */
3581   PetscCall(PetscFree(B->solvertype));
3582   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
3583   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
3584   if (size == 1) {
3585     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3586     B->canuseordering = PETSC_TRUE;
3587   }
3588   B->ops->destroy = MatDestroy_MUMPS;
3589   B->data         = (void *)mumps;
3590 
3591   *F               = B;
3592   mumps->id.job    = JOB_NULL;
3593   mumps->ICNTL_pre = NULL;
3594   mumps->CNTL_pre  = NULL;
3595   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
3596   PetscFunctionReturn(PETSC_SUCCESS);
3597 }
3598 
3599 /* MatGetFactor for Seq and MPI SELL matrices */
3600 static PetscErrorCode MatGetFactor_sell_mumps(Mat A, MatFactorType ftype, Mat *F)
3601 {
3602   Mat         B;
3603   Mat_MUMPS  *mumps;
3604   PetscBool   isSeqSELL;
3605   PetscMPIInt size;
3606 
3607   PetscFunctionBegin;
3608   /* Create the factorization matrix */
3609   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSELL, &isSeqSELL));
3610   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
3611   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
3612   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name));
3613   PetscCall(MatSetUp(B));
3614 
3615   PetscCall(PetscNew(&mumps));
3616 
3617   B->ops->view    = MatView_MUMPS;
3618   B->ops->getinfo = MatGetInfo_MUMPS;
3619 
3620   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
3621   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
3622   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
3623   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
3624   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
3625   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
3626   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
3627   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
3628   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
3629   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
3630   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
3631   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
3632 
3633   if (ftype == MAT_FACTOR_LU) {
3634     B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
3635     B->factortype            = MAT_FACTOR_LU;
3636     if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij;
3637     else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented");
3638     mumps->sym = 0;
3639     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]));
3640   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented");
3641 
3642   /* set solvertype */
3643   PetscCall(PetscFree(B->solvertype));
3644   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
3645   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
3646   if (size == 1) {
3647     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization  */
3648     B->canuseordering = PETSC_TRUE;
3649   }
3650   B->ops->destroy = MatDestroy_MUMPS;
3651   B->data         = (void *)mumps;
3652 
3653   *F               = B;
3654   mumps->id.job    = JOB_NULL;
3655   mumps->ICNTL_pre = NULL;
3656   mumps->CNTL_pre  = NULL;
3657   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
3658   PetscFunctionReturn(PETSC_SUCCESS);
3659 }
3660 
3661 /* MatGetFactor for MATNEST matrices */
3662 static PetscErrorCode MatGetFactor_nest_mumps(Mat A, MatFactorType ftype, Mat *F)
3663 {
3664   Mat         B, **mats;
3665   Mat_MUMPS  *mumps;
3666   PetscInt    nr, nc;
3667   PetscMPIInt size;
3668   PetscBool   flg = PETSC_TRUE;
3669 
3670   PetscFunctionBegin;
3671 #if defined(PETSC_USE_COMPLEX)
3672   if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) {
3673     PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n"));
3674     *F = NULL;
3675     PetscFunctionReturn(PETSC_SUCCESS);
3676   }
3677 #endif
3678 
3679   /* Return if some condition is not satisfied */
3680   *F = NULL;
3681   PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats));
3682   if (ftype == MAT_FACTOR_CHOLESKY) {
3683     IS       *rows, *cols;
3684     PetscInt *m, *M;
3685 
3686     PetscCheck(nr == nc, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MAT_FACTOR_CHOLESKY not supported for nest sizes %" PetscInt_FMT " != %" PetscInt_FMT ". Use MAT_FACTOR_LU.", nr, nc);
3687     PetscCall(PetscMalloc2(nr, &rows, nc, &cols));
3688     PetscCall(MatNestGetISs(A, rows, cols));
3689     for (PetscInt r = 0; flg && r < nr; r++) PetscCall(ISEqualUnsorted(rows[r], cols[r], &flg));
3690     if (!flg) {
3691       PetscCall(PetscFree2(rows, cols));
3692       PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for unequal row and column maps. Use MAT_FACTOR_LU.\n"));
3693       PetscFunctionReturn(PETSC_SUCCESS);
3694     }
3695     PetscCall(PetscMalloc2(nr, &m, nr, &M));
3696     for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetMinMax(rows[r], &m[r], &M[r]));
3697     for (PetscInt r = 0; flg && r < nr; r++)
3698       for (PetscInt k = r + 1; flg && k < nr; k++)
3699         if ((m[k] <= m[r] && m[r] <= M[k]) || (m[k] <= M[r] && M[r] <= M[k])) flg = PETSC_FALSE;
3700     PetscCall(PetscFree2(m, M));
3701     PetscCall(PetscFree2(rows, cols));
3702     if (!flg) {
3703       PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for intersecting row maps. Use MAT_FACTOR_LU.\n"));
3704       PetscFunctionReturn(PETSC_SUCCESS);
3705     }
3706   }
3707 
3708   for (PetscInt r = 0; r < nr; r++) {
3709     for (PetscInt c = 0; c < nc; c++) {
3710       Mat       sub = mats[r][c];
3711       PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isTrans, isDiag, isDense;
3712 
3713       if (!sub || (ftype == MAT_FACTOR_CHOLESKY && c < r)) continue;
3714       PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans));
3715       if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub));
3716       else {
3717         PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isTrans));
3718         if (isTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub));
3719       }
3720       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ));
3721       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ));
3722       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ));
3723       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ));
3724       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ));
3725       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ));
3726       PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATDIAGONAL, &isDiag));
3727       PetscCall(PetscObjectTypeCompareAny((PetscObject)sub, &isDense, MATSEQDENSE, MATMPIDENSE, NULL));
3728       if (ftype == MAT_FACTOR_CHOLESKY) {
3729         if (r == c) {
3730           if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isSeqSBAIJ && !isMPISBAIJ && !isDiag && !isDense) {
3731             PetscCall(PetscInfo(sub, "MAT_FACTOR_CHOLESKY not supported for diagonal block of type %s.\n", ((PetscObject)sub)->type_name));
3732             flg = PETSC_FALSE;
3733           }
3734         } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isDiag && !isDense) {
3735           PetscCall(PetscInfo(sub, "MAT_FACTOR_CHOLESKY not supported for off-diagonal block of type %s.\n", ((PetscObject)sub)->type_name));
3736           flg = PETSC_FALSE;
3737         }
3738       } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isDiag && !isDense) {
3739         PetscCall(PetscInfo(sub, "MAT_FACTOR_LU not supported for block of type %s.\n", ((PetscObject)sub)->type_name));
3740         flg = PETSC_FALSE;
3741       }
3742     }
3743   }
3744   if (!flg) PetscFunctionReturn(PETSC_SUCCESS);
3745 
3746   /* Create the factorization matrix */
3747   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
3748   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
3749   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name));
3750   PetscCall(MatSetUp(B));
3751 
3752   PetscCall(PetscNew(&mumps));
3753 
3754   B->ops->view    = MatView_MUMPS;
3755   B->ops->getinfo = MatGetInfo_MUMPS;
3756 
3757   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
3758   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
3759   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
3760   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
3761   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
3762   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
3763   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
3764   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
3765   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
3766   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
3767   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
3768   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
3769   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
3770   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
3771 
3772   if (ftype == MAT_FACTOR_LU) {
3773     B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
3774     B->factortype            = MAT_FACTOR_LU;
3775     mumps->sym               = 0;
3776   } else {
3777     B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
3778     B->factortype                  = MAT_FACTOR_CHOLESKY;
3779 #if defined(PETSC_USE_COMPLEX)
3780     mumps->sym = 2;
3781 #else
3782     if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1;
3783     else mumps->sym = 2;
3784 #endif
3785   }
3786   mumps->ConvertToTriples = MatConvertToTriples_nest_xaij;
3787   PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[ftype]));
3788 
3789   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
3790   if (size == 1) {
3791     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3792     B->canuseordering = PETSC_TRUE;
3793   }
3794 
3795   /* set solvertype */
3796   PetscCall(PetscFree(B->solvertype));
3797   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
3798   B->ops->destroy = MatDestroy_MUMPS;
3799   B->data         = (void *)mumps;
3800 
3801   *F               = B;
3802   mumps->id.job    = JOB_NULL;
3803   mumps->ICNTL_pre = NULL;
3804   mumps->CNTL_pre  = NULL;
3805   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
3806   PetscFunctionReturn(PETSC_SUCCESS);
3807 }
3808 
3809 PETSC_INTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void)
3810 {
3811   PetscFunctionBegin;
3812   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
3813   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
3814   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps));
3815   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps));
3816   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPISBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps));
3817   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
3818   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
3819   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps));
3820   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps));
3821   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps));
3822   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSELL, MAT_FACTOR_LU, MatGetFactor_sell_mumps));
3823   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATDIAGONAL, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
3824   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATDIAGONAL, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
3825   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQDENSE, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
3826   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQDENSE, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
3827   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIDENSE, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
3828   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIDENSE, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
3829   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_LU, MatGetFactor_nest_mumps));
3830   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_CHOLESKY, MatGetFactor_nest_mumps));
3831   PetscFunctionReturn(PETSC_SUCCESS);
3832 }
3833