xref: /petsc/src/mat/impls/aij/mpi/mumps/mumps.c (revision e8e188d2d8d8bcc000607a9a2a524e07d1c8a16a)
1 
2 /*
3     Provides an interface to the MUMPS sparse solver
4 */
5 #include <petscpkg_version.h>
6 #include <petscsf.h>
7 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I  "petscmat.h"  I*/
8 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
9 #include <../src/mat/impls/sell/mpi/mpisell.h>
10 
11 #define MUMPS_MANUALS "(see users manual https://mumps-solver.org/index.php?page=doc \"Error and warning diagnostics\")"
12 
13 EXTERN_C_BEGIN
14 #if defined(PETSC_USE_COMPLEX)
15   #if defined(PETSC_USE_REAL_SINGLE)
16     #include <cmumps_c.h>
17   #else
18     #include <zmumps_c.h>
19   #endif
20 #else
21   #if defined(PETSC_USE_REAL_SINGLE)
22     #include <smumps_c.h>
23   #else
24     #include <dmumps_c.h>
25   #endif
26 #endif
27 EXTERN_C_END
28 #define JOB_INIT         -1
29 #define JOB_NULL         0
30 #define JOB_FACTSYMBOLIC 1
31 #define JOB_FACTNUMERIC  2
32 #define JOB_SOLVE        3
33 #define JOB_END          -2
34 
35 /* calls to MUMPS */
36 #if defined(PETSC_USE_COMPLEX)
37   #if defined(PETSC_USE_REAL_SINGLE)
38     #define MUMPS_c cmumps_c
39   #else
40     #define MUMPS_c zmumps_c
41   #endif
42 #else
43   #if defined(PETSC_USE_REAL_SINGLE)
44     #define MUMPS_c smumps_c
45   #else
46     #define MUMPS_c dmumps_c
47   #endif
48 #endif
49 
50 /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for
51    number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the
52    naming convention in PetscMPIInt, PetscBLASInt etc.
53 */
54 typedef MUMPS_INT PetscMUMPSInt;
55 
56 #if PETSC_PKG_MUMPS_VERSION_GE(5, 3, 0)
57   #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */
58     #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out"
59   #endif
60 #else
61   #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */
62     #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out"
63   #endif
64 #endif
65 
66 #define MPIU_MUMPSINT       MPI_INT
67 #define PETSC_MUMPS_INT_MAX 2147483647
68 #define PETSC_MUMPS_INT_MIN -2147483648
69 
70 /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/
71 static inline PetscErrorCode PetscMUMPSIntCast(PetscInt a, PetscMUMPSInt *b)
72 {
73   PetscFunctionBegin;
74 #if PetscDefined(USE_64BIT_INDICES)
75   PetscAssert(a <= PETSC_MUMPS_INT_MAX && a >= PETSC_MUMPS_INT_MIN, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt");
76 #endif
77   *b = (PetscMUMPSInt)(a);
78   PetscFunctionReturn(PETSC_SUCCESS);
79 }
80 
81 /* Put these utility routines here since they are only used in this file */
82 static inline PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems *PetscOptionsObject, const char opt[], const char text[], const char man[], PetscMUMPSInt currentvalue, PetscMUMPSInt *value, PetscBool *set, PetscMUMPSInt lb, PetscMUMPSInt ub)
83 {
84   PetscInt  myval;
85   PetscBool myset;
86   PetscFunctionBegin;
87   /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */
88   PetscCall(PetscOptionsInt_Private(PetscOptionsObject, opt, text, man, (PetscInt)currentvalue, &myval, &myset, lb, ub));
89   if (myset) PetscCall(PetscMUMPSIntCast(myval, value));
90   if (set) *set = myset;
91   PetscFunctionReturn(PETSC_SUCCESS);
92 }
93 #define PetscOptionsMUMPSInt(a, b, c, d, e, f) PetscOptionsMUMPSInt_Private(PetscOptionsObject, a, b, c, d, e, f, PETSC_MUMPS_INT_MIN, PETSC_MUMPS_INT_MAX)
94 
95 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */
96 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
97   #define PetscMUMPS_c(mumps) \
98     do { \
99       if (mumps->use_petsc_omp_support) { \
100         if (mumps->is_omp_master) { \
101           PetscCall(PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl)); \
102           PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \
103           PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \
104           PetscCall(PetscFPTrapPop()); \
105           PetscCall(PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl)); \
106         } \
107         PetscCall(PetscOmpCtrlBarrier(mumps->omp_ctrl)); \
108         /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific      \
109          to processes, so we only Bcast info[1], an error code and leave others (since they do not have   \
110          an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82.                   \
111          omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \
112       */ \
113         PetscCallMPI(MPI_Bcast(mumps->id.infog, PETSC_STATIC_ARRAY_LENGTH(mumps->id.infog), MPIU_MUMPSINT, 0, mumps->omp_comm)); \
114         PetscCallMPI(MPI_Bcast(mumps->id.rinfog, PETSC_STATIC_ARRAY_LENGTH(mumps->id.rinfog), MPIU_REAL, 0, mumps->omp_comm)); \
115         PetscCallMPI(MPI_Bcast(mumps->id.info, PETSC_STATIC_ARRAY_LENGTH(mumps->id.info), MPIU_MUMPSINT, 0, mumps->omp_comm)); \
116         PetscCallMPI(MPI_Bcast(mumps->id.rinfo, PETSC_STATIC_ARRAY_LENGTH(mumps->id.rinfo), MPIU_REAL, 0, mumps->omp_comm)); \
117       } else { \
118         PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \
119         PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \
120         PetscCall(PetscFPTrapPop()); \
121       } \
122     } while (0)
123 #else
124   #define PetscMUMPS_c(mumps) \
125     do { \
126       PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \
127       PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \
128       PetscCall(PetscFPTrapPop()); \
129     } while (0)
130 #endif
131 
132 /* declare MumpsScalar */
133 #if defined(PETSC_USE_COMPLEX)
134   #if defined(PETSC_USE_REAL_SINGLE)
135     #define MumpsScalar mumps_complex
136   #else
137     #define MumpsScalar mumps_double_complex
138   #endif
139 #else
140   #define MumpsScalar PetscScalar
141 #endif
142 
143 /* macros s.t. indices match MUMPS documentation */
144 #define ICNTL(I)  icntl[(I)-1]
145 #define CNTL(I)   cntl[(I)-1]
146 #define INFOG(I)  infog[(I)-1]
147 #define INFO(I)   info[(I)-1]
148 #define RINFOG(I) rinfog[(I)-1]
149 #define RINFO(I)  rinfo[(I)-1]
150 
151 typedef struct Mat_MUMPS Mat_MUMPS;
152 struct Mat_MUMPS {
153 #if defined(PETSC_USE_COMPLEX)
154   #if defined(PETSC_USE_REAL_SINGLE)
155   CMUMPS_STRUC_C id;
156   #else
157   ZMUMPS_STRUC_C id;
158   #endif
159 #else
160   #if defined(PETSC_USE_REAL_SINGLE)
161   SMUMPS_STRUC_C id;
162   #else
163   DMUMPS_STRUC_C id;
164   #endif
165 #endif
166 
167   MatStructure   matstruc;
168   PetscMPIInt    myid, petsc_size;
169   PetscMUMPSInt *irn, *jcn;       /* the (i,j,v) triplets passed to mumps. */
170   PetscScalar   *val, *val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */
171   PetscInt64     nnz;             /* number of nonzeros. The type is called selective 64-bit in mumps */
172   PetscMUMPSInt  sym;
173   MPI_Comm       mumps_comm;
174   PetscMUMPSInt *ICNTL_pre;
175   PetscReal     *CNTL_pre;
176   PetscMUMPSInt  ICNTL9_pre;         /* check if ICNTL(9) is changed from previous MatSolve */
177   VecScatter     scat_rhs, scat_sol; /* used by MatSolve() */
178   PetscMUMPSInt  ICNTL20;            /* use centralized (0) or distributed (10) dense RHS */
179   PetscMUMPSInt  lrhs_loc, nloc_rhs, *irhs_loc;
180 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
181   PetscInt    *rhs_nrow, max_nrhs;
182   PetscMPIInt *rhs_recvcounts, *rhs_disps;
183   PetscScalar *rhs_loc, *rhs_recvbuf;
184 #endif
185   Vec            b_seq, x_seq;
186   PetscInt       ninfo, *info; /* which INFO to display */
187   PetscInt       sizeredrhs;
188   PetscScalar   *schur_sol;
189   PetscInt       schur_sizesol;
190   PetscMUMPSInt *ia_alloc, *ja_alloc; /* work arrays used for the CSR struct for sparse rhs */
191   PetscInt64     cur_ilen, cur_jlen;  /* current len of ia_alloc[], ja_alloc[] */
192   PetscErrorCode (*ConvertToTriples)(Mat, PetscInt, MatReuse, Mat_MUMPS *);
193 
194   /* Support for MATNEST */
195   PetscErrorCode (**nest_convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *);
196   PetscInt64  *nest_vals_start;
197   PetscScalar *nest_vals;
198 
199   /* stuff used by petsc/mumps OpenMP support*/
200   PetscBool    use_petsc_omp_support;
201   PetscOmpCtrl omp_ctrl;             /* an OpenMP controller that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */
202   MPI_Comm     petsc_comm, omp_comm; /* petsc_comm is petsc matrix's comm */
203   PetscInt64  *recvcount;            /* a collection of nnz on omp_master */
204   PetscMPIInt  tag, omp_comm_size;
205   PetscBool    is_omp_master; /* is this rank the master of omp_comm */
206   MPI_Request *reqs;
207 };
208 
209 /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt.
210    Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices.
211  */
212 static PetscErrorCode PetscMUMPSIntCSRCast(Mat_MUMPS *mumps, PetscInt nrow, PetscInt *ia, PetscInt *ja, PetscMUMPSInt **ia_mumps, PetscMUMPSInt **ja_mumps, PetscMUMPSInt *nnz_mumps)
213 {
214   PetscInt nnz = ia[nrow] - 1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscInt64 since mumps only uses PetscMUMPSInt for rhs */
215 
216   PetscFunctionBegin;
217 #if defined(PETSC_USE_64BIT_INDICES)
218   {
219     PetscInt i;
220     if (nrow + 1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */
221       PetscCall(PetscFree(mumps->ia_alloc));
222       PetscCall(PetscMalloc1(nrow + 1, &mumps->ia_alloc));
223       mumps->cur_ilen = nrow + 1;
224     }
225     if (nnz > mumps->cur_jlen) {
226       PetscCall(PetscFree(mumps->ja_alloc));
227       PetscCall(PetscMalloc1(nnz, &mumps->ja_alloc));
228       mumps->cur_jlen = nnz;
229     }
230     for (i = 0; i < nrow + 1; i++) PetscCall(PetscMUMPSIntCast(ia[i], &(mumps->ia_alloc[i])));
231     for (i = 0; i < nnz; i++) PetscCall(PetscMUMPSIntCast(ja[i], &(mumps->ja_alloc[i])));
232     *ia_mumps = mumps->ia_alloc;
233     *ja_mumps = mumps->ja_alloc;
234   }
235 #else
236   *ia_mumps = ia;
237   *ja_mumps = ja;
238 #endif
239   PetscCall(PetscMUMPSIntCast(nnz, nnz_mumps));
240   PetscFunctionReturn(PETSC_SUCCESS);
241 }
242 
243 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS *mumps)
244 {
245   PetscFunctionBegin;
246   PetscCall(PetscFree(mumps->id.listvar_schur));
247   PetscCall(PetscFree(mumps->id.redrhs));
248   PetscCall(PetscFree(mumps->schur_sol));
249   mumps->id.size_schur = 0;
250   mumps->id.schur_lld  = 0;
251   mumps->id.ICNTL(19)  = 0;
252   PetscFunctionReturn(PETSC_SUCCESS);
253 }
254 
255 /* solve with rhs in mumps->id.redrhs and return in the same location */
256 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F)
257 {
258   Mat_MUMPS           *mumps = (Mat_MUMPS *)F->data;
259   Mat                  S, B, X;
260   MatFactorSchurStatus schurstatus;
261   PetscInt             sizesol;
262 
263   PetscFunctionBegin;
264   PetscCall(MatFactorFactorizeSchurComplement(F));
265   PetscCall(MatFactorGetSchurComplement(F, &S, &schurstatus));
266   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &B));
267   PetscCall(MatSetType(B, ((PetscObject)S)->type_name));
268 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
269   PetscCall(MatBindToCPU(B, S->boundtocpu));
270 #endif
271   switch (schurstatus) {
272   case MAT_FACTOR_SCHUR_FACTORED:
273     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &X));
274     PetscCall(MatSetType(X, ((PetscObject)S)->type_name));
275 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
276     PetscCall(MatBindToCPU(X, S->boundtocpu));
277 #endif
278     if (!mumps->id.ICNTL(9)) { /* transpose solve */
279       PetscCall(MatMatSolveTranspose(S, B, X));
280     } else {
281       PetscCall(MatMatSolve(S, B, X));
282     }
283     break;
284   case MAT_FACTOR_SCHUR_INVERTED:
285     sizesol = mumps->id.nrhs * mumps->id.size_schur;
286     if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) {
287       PetscCall(PetscFree(mumps->schur_sol));
288       PetscCall(PetscMalloc1(sizesol, &mumps->schur_sol));
289       mumps->schur_sizesol = sizesol;
290     }
291     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->schur_sol, &X));
292     PetscCall(MatSetType(X, ((PetscObject)S)->type_name));
293 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
294     PetscCall(MatBindToCPU(X, S->boundtocpu));
295 #endif
296     PetscCall(MatProductCreateWithMat(S, B, NULL, X));
297     if (!mumps->id.ICNTL(9)) { /* transpose solve */
298       PetscCall(MatProductSetType(X, MATPRODUCT_AtB));
299     } else {
300       PetscCall(MatProductSetType(X, MATPRODUCT_AB));
301     }
302     PetscCall(MatProductSetFromOptions(X));
303     PetscCall(MatProductSymbolic(X));
304     PetscCall(MatProductNumeric(X));
305 
306     PetscCall(MatCopy(X, B, SAME_NONZERO_PATTERN));
307     break;
308   default:
309     SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unhandled MatFactorSchurStatus %d", F->schur_status);
310   }
311   PetscCall(MatFactorRestoreSchurComplement(F, &S, schurstatus));
312   PetscCall(MatDestroy(&B));
313   PetscCall(MatDestroy(&X));
314   PetscFunctionReturn(PETSC_SUCCESS);
315 }
316 
317 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion)
318 {
319   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
320 
321   PetscFunctionBegin;
322   if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */
323     PetscFunctionReturn(PETSC_SUCCESS);
324   }
325   if (!expansion) { /* prepare for the condensation step */
326     PetscInt sizeredrhs = mumps->id.nrhs * mumps->id.size_schur;
327     /* allocate MUMPS internal array to store reduced right-hand sides */
328     if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) {
329       PetscCall(PetscFree(mumps->id.redrhs));
330       mumps->id.lredrhs = mumps->id.size_schur;
331       PetscCall(PetscMalloc1(mumps->id.nrhs * mumps->id.lredrhs, &mumps->id.redrhs));
332       mumps->sizeredrhs = mumps->id.nrhs * mumps->id.lredrhs;
333     }
334   } else { /* prepare for the expansion step */
335     /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */
336     PetscCall(MatMumpsSolveSchur_Private(F));
337     mumps->id.ICNTL(26) = 2; /* expansion phase */
338     PetscMUMPS_c(mumps);
339     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
340     /* restore defaults */
341     mumps->id.ICNTL(26) = -1;
342     /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */
343     if (mumps->id.nrhs > 1) {
344       PetscCall(PetscFree(mumps->id.redrhs));
345       mumps->id.lredrhs = 0;
346       mumps->sizeredrhs = 0;
347     }
348   }
349   PetscFunctionReturn(PETSC_SUCCESS);
350 }
351 
352 /*
353   MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz]
354 
355   input:
356     A       - matrix in aij,baij or sbaij format
357     shift   - 0: C style output triple; 1: Fortran style output triple.
358     reuse   - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple
359               MAT_REUSE_MATRIX:   only the values in v array are updated
360   output:
361     nnz     - dim of r, c, and v (number of local nonzero entries of A)
362     r, c, v - row and col index, matrix values (matrix triples)
363 
364   The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is
365   freed with PetscFree(mumps->irn);  This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means
366   that the PetscMalloc() cannot easily be replaced with a PetscMalloc3().
367 
368  */
369 
370 static PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
371 {
372   const PetscScalar *av;
373   const PetscInt    *ai, *aj, *ajj, M = A->rmap->n;
374   PetscInt64         nz, rnz, i, j, k;
375   PetscMUMPSInt     *row, *col;
376   Mat_SeqAIJ        *aa = (Mat_SeqAIJ *)A->data;
377 
378   PetscFunctionBegin;
379   PetscCall(MatSeqAIJGetArrayRead(A, &av));
380   if (reuse == MAT_INITIAL_MATRIX) {
381     nz = aa->nz;
382     ai = aa->i;
383     aj = aa->j;
384     PetscCall(PetscMalloc2(nz, &row, nz, &col));
385     for (i = k = 0; i < M; i++) {
386       rnz = ai[i + 1] - ai[i];
387       ajj = aj + ai[i];
388       for (j = 0; j < rnz; j++) {
389         PetscCall(PetscMUMPSIntCast(i + shift, &row[k]));
390         PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[k]));
391         k++;
392       }
393     }
394     mumps->val = (PetscScalar *)av;
395     mumps->irn = row;
396     mumps->jcn = col;
397     mumps->nnz = nz;
398   } else PetscCall(PetscArraycpy(mumps->val, av, aa->nz));
399   PetscCall(MatSeqAIJRestoreArrayRead(A, &av));
400   PetscFunctionReturn(PETSC_SUCCESS);
401 }
402 
403 static PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
404 {
405   PetscInt64     nz, i, j, k, r;
406   Mat_SeqSELL   *a = (Mat_SeqSELL *)A->data;
407   PetscMUMPSInt *row, *col;
408 
409   PetscFunctionBegin;
410   nz = a->sliidx[a->totalslices];
411   if (reuse == MAT_INITIAL_MATRIX) {
412     PetscCall(PetscMalloc2(nz, &row, nz, &col));
413     for (i = k = 0; i < a->totalslices; i++) {
414       for (j = a->sliidx[i], r = 0; j < a->sliidx[i + 1]; j++, r = ((r + 1) & 0x07)) PetscCall(PetscMUMPSIntCast(8 * i + r + shift, &row[k++]));
415     }
416     for (i = 0; i < nz; i++) PetscCall(PetscMUMPSIntCast(a->colidx[i] + shift, &col[i]));
417     mumps->irn = row;
418     mumps->jcn = col;
419     mumps->nnz = nz;
420     mumps->val = a->val;
421   } else PetscCall(PetscArraycpy(mumps->val, a->val, nz));
422   PetscFunctionReturn(PETSC_SUCCESS);
423 }
424 
425 static PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
426 {
427   Mat_SeqBAIJ    *aa = (Mat_SeqBAIJ *)A->data;
428   const PetscInt *ai, *aj, *ajj, bs2 = aa->bs2;
429   PetscInt64      M, nz = bs2 * aa->nz, idx = 0, rnz, i, j, k, m;
430   PetscInt        bs;
431   PetscMUMPSInt  *row, *col;
432 
433   PetscFunctionBegin;
434   if (reuse == MAT_INITIAL_MATRIX) {
435     PetscCall(MatGetBlockSize(A, &bs));
436     M  = A->rmap->N / bs;
437     ai = aa->i;
438     aj = aa->j;
439     PetscCall(PetscMalloc2(nz, &row, nz, &col));
440     for (i = 0; i < M; i++) {
441       ajj = aj + ai[i];
442       rnz = ai[i + 1] - ai[i];
443       for (k = 0; k < rnz; k++) {
444         for (j = 0; j < bs; j++) {
445           for (m = 0; m < bs; m++) {
446             PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[idx]));
447             PetscCall(PetscMUMPSIntCast(bs * ajj[k] + j + shift, &col[idx]));
448             idx++;
449           }
450         }
451       }
452     }
453     mumps->irn = row;
454     mumps->jcn = col;
455     mumps->nnz = nz;
456     mumps->val = aa->a;
457   } else PetscCall(PetscArraycpy(mumps->val, aa->a, nz));
458   PetscFunctionReturn(PETSC_SUCCESS);
459 }
460 
461 static PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
462 {
463   const PetscInt *ai, *aj, *ajj;
464   PetscInt        bs;
465   PetscInt64      nz, rnz, i, j, k, m;
466   PetscMUMPSInt  *row, *col;
467   PetscScalar    *val;
468   Mat_SeqSBAIJ   *aa  = (Mat_SeqSBAIJ *)A->data;
469   const PetscInt  bs2 = aa->bs2, mbs = aa->mbs;
470 #if defined(PETSC_USE_COMPLEX)
471   PetscBool isset, hermitian;
472 #endif
473 
474   PetscFunctionBegin;
475 #if defined(PETSC_USE_COMPLEX)
476   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
477   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
478 #endif
479   ai = aa->i;
480   aj = aa->j;
481   PetscCall(MatGetBlockSize(A, &bs));
482   if (reuse == MAT_INITIAL_MATRIX) {
483     const PetscInt64 alloc_size = aa->nz * bs2;
484 
485     PetscCall(PetscMalloc2(alloc_size, &row, alloc_size, &col));
486     if (bs > 1) {
487       PetscCall(PetscMalloc1(alloc_size, &mumps->val_alloc));
488       mumps->val = mumps->val_alloc;
489     } else {
490       mumps->val = aa->a;
491     }
492     mumps->irn = row;
493     mumps->jcn = col;
494   } else {
495     row = mumps->irn;
496     col = mumps->jcn;
497   }
498   val = mumps->val;
499 
500   nz = 0;
501   if (bs > 1) {
502     for (i = 0; i < mbs; i++) {
503       rnz = ai[i + 1] - ai[i];
504       ajj = aj + ai[i];
505       for (j = 0; j < rnz; j++) {
506         for (k = 0; k < bs; k++) {
507           for (m = 0; m < bs; m++) {
508             if (ajj[j] > i || k >= m) {
509               if (reuse == MAT_INITIAL_MATRIX) {
510                 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[nz]));
511                 PetscCall(PetscMUMPSIntCast(ajj[j] * bs + k + shift, &col[nz]));
512               }
513               val[nz++] = aa->a[(ai[i] + j) * bs2 + m + k * bs];
514             }
515           }
516         }
517       }
518     }
519   } else if (reuse == MAT_INITIAL_MATRIX) {
520     for (i = 0; i < mbs; i++) {
521       rnz = ai[i + 1] - ai[i];
522       ajj = aj + ai[i];
523       for (j = 0; j < rnz; j++) {
524         PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
525         PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz]));
526         nz++;
527       }
528     }
529     PetscCheck(nz == aa->nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different numbers of nonzeros %" PetscInt64_FMT " != %" PetscInt_FMT, nz, aa->nz);
530   } else PetscCall(PetscArraycpy(mumps->val, aa->a, aa->nz)); /* bs == 1 and MAT_REUSE_MATRIX */
531   if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz;
532   PetscFunctionReturn(PETSC_SUCCESS);
533 }
534 
535 static PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
536 {
537   const PetscInt    *ai, *aj, *ajj, *adiag, M = A->rmap->n;
538   PetscInt64         nz, rnz, i, j;
539   const PetscScalar *av, *v1;
540   PetscScalar       *val;
541   PetscMUMPSInt     *row, *col;
542   Mat_SeqAIJ        *aa = (Mat_SeqAIJ *)A->data;
543   PetscBool          missing;
544 #if defined(PETSC_USE_COMPLEX)
545   PetscBool hermitian, isset;
546 #endif
547 
548   PetscFunctionBegin;
549 #if defined(PETSC_USE_COMPLEX)
550   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
551   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
552 #endif
553   PetscCall(MatSeqAIJGetArrayRead(A, &av));
554   ai    = aa->i;
555   aj    = aa->j;
556   adiag = aa->diag;
557   PetscCall(MatMissingDiagonal_SeqAIJ(A, &missing, NULL));
558   if (reuse == MAT_INITIAL_MATRIX) {
559     /* count nz in the upper triangular part of A */
560     nz = 0;
561     if (missing) {
562       for (i = 0; i < M; i++) {
563         if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
564           for (j = ai[i]; j < ai[i + 1]; j++) {
565             if (aj[j] < i) continue;
566             nz++;
567           }
568         } else {
569           nz += ai[i + 1] - adiag[i];
570         }
571       }
572     } else {
573       for (i = 0; i < M; i++) nz += ai[i + 1] - adiag[i];
574     }
575     PetscCall(PetscMalloc2(nz, &row, nz, &col));
576     PetscCall(PetscMalloc1(nz, &val));
577     mumps->nnz = nz;
578     mumps->irn = row;
579     mumps->jcn = col;
580     mumps->val = mumps->val_alloc = val;
581 
582     nz = 0;
583     if (missing) {
584       for (i = 0; i < M; i++) {
585         if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
586           for (j = ai[i]; j < ai[i + 1]; j++) {
587             if (aj[j] < i) continue;
588             PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
589             PetscCall(PetscMUMPSIntCast(aj[j] + shift, &col[nz]));
590             val[nz] = av[j];
591             nz++;
592           }
593         } else {
594           rnz = ai[i + 1] - adiag[i];
595           ajj = aj + adiag[i];
596           v1  = av + adiag[i];
597           for (j = 0; j < rnz; j++) {
598             PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
599             PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz]));
600             val[nz++] = v1[j];
601           }
602         }
603       }
604     } else {
605       for (i = 0; i < M; i++) {
606         rnz = ai[i + 1] - adiag[i];
607         ajj = aj + adiag[i];
608         v1  = av + adiag[i];
609         for (j = 0; j < rnz; j++) {
610           PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
611           PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz]));
612           val[nz++] = v1[j];
613         }
614       }
615     }
616   } else {
617     nz  = 0;
618     val = mumps->val;
619     if (missing) {
620       for (i = 0; i < M; i++) {
621         if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
622           for (j = ai[i]; j < ai[i + 1]; j++) {
623             if (aj[j] < i) continue;
624             val[nz++] = av[j];
625           }
626         } else {
627           rnz = ai[i + 1] - adiag[i];
628           v1  = av + adiag[i];
629           for (j = 0; j < rnz; j++) val[nz++] = v1[j];
630         }
631       }
632     } else {
633       for (i = 0; i < M; i++) {
634         rnz = ai[i + 1] - adiag[i];
635         v1  = av + adiag[i];
636         for (j = 0; j < rnz; j++) val[nz++] = v1[j];
637       }
638     }
639   }
640   PetscCall(MatSeqAIJRestoreArrayRead(A, &av));
641   PetscFunctionReturn(PETSC_SUCCESS);
642 }
643 
644 static PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
645 {
646   const PetscInt    *ai, *aj, *bi, *bj, *garray, *ajj, *bjj;
647   PetscInt           bs;
648   PetscInt64         rstart, nz, i, j, k, m, jj, irow, countA, countB;
649   PetscMUMPSInt     *row, *col;
650   const PetscScalar *av, *bv, *v1, *v2;
651   PetscScalar       *val;
652   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ *)A->data;
653   Mat_SeqSBAIJ      *aa  = (Mat_SeqSBAIJ *)(mat->A)->data;
654   Mat_SeqBAIJ       *bb  = (Mat_SeqBAIJ *)(mat->B)->data;
655   const PetscInt     bs2 = aa->bs2, mbs = aa->mbs;
656 #if defined(PETSC_USE_COMPLEX)
657   PetscBool hermitian, isset;
658 #endif
659 
660   PetscFunctionBegin;
661 #if defined(PETSC_USE_COMPLEX)
662   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
663   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
664 #endif
665   PetscCall(MatGetBlockSize(A, &bs));
666   rstart = A->rmap->rstart;
667   ai     = aa->i;
668   aj     = aa->j;
669   bi     = bb->i;
670   bj     = bb->j;
671   av     = aa->a;
672   bv     = bb->a;
673 
674   garray = mat->garray;
675 
676   if (reuse == MAT_INITIAL_MATRIX) {
677     nz = (aa->nz + bb->nz) * bs2; /* just a conservative estimate */
678     PetscCall(PetscMalloc2(nz, &row, nz, &col));
679     PetscCall(PetscMalloc1(nz, &val));
680     /* can not decide the exact mumps->nnz now because of the SBAIJ */
681     mumps->irn = row;
682     mumps->jcn = col;
683     mumps->val = mumps->val_alloc = val;
684   } else {
685     val = mumps->val;
686   }
687 
688   jj   = 0;
689   irow = rstart;
690   for (i = 0; i < mbs; i++) {
691     ajj    = aj + ai[i]; /* ptr to the beginning of this row */
692     countA = ai[i + 1] - ai[i];
693     countB = bi[i + 1] - bi[i];
694     bjj    = bj + bi[i];
695     v1     = av + ai[i] * bs2;
696     v2     = bv + bi[i] * bs2;
697 
698     if (bs > 1) {
699       /* A-part */
700       for (j = 0; j < countA; j++) {
701         for (k = 0; k < bs; k++) {
702           for (m = 0; m < bs; m++) {
703             if (rstart + ajj[j] * bs > irow || k >= m) {
704               if (reuse == MAT_INITIAL_MATRIX) {
705                 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj]));
706                 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] * bs + k + shift, &col[jj]));
707               }
708               val[jj++] = v1[j * bs2 + m + k * bs];
709             }
710           }
711         }
712       }
713 
714       /* B-part */
715       for (j = 0; j < countB; j++) {
716         for (k = 0; k < bs; k++) {
717           for (m = 0; m < bs; m++) {
718             if (reuse == MAT_INITIAL_MATRIX) {
719               PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj]));
720               PetscCall(PetscMUMPSIntCast(garray[bjj[j]] * bs + k + shift, &col[jj]));
721             }
722             val[jj++] = v2[j * bs2 + m + k * bs];
723           }
724         }
725       }
726     } else {
727       /* A-part */
728       for (j = 0; j < countA; j++) {
729         if (reuse == MAT_INITIAL_MATRIX) {
730           PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
731           PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj]));
732         }
733         val[jj++] = v1[j];
734       }
735 
736       /* B-part */
737       for (j = 0; j < countB; j++) {
738         if (reuse == MAT_INITIAL_MATRIX) {
739           PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
740           PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]));
741         }
742         val[jj++] = v2[j];
743       }
744     }
745     irow += bs;
746   }
747   if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = jj;
748   PetscFunctionReturn(PETSC_SUCCESS);
749 }
750 
751 static PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
752 {
753   const PetscInt    *ai, *aj, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj;
754   PetscInt64         rstart, cstart, nz, i, j, jj, irow, countA, countB;
755   PetscMUMPSInt     *row, *col;
756   const PetscScalar *av, *bv, *v1, *v2;
757   PetscScalar       *val;
758   Mat                Ad, Ao;
759   Mat_SeqAIJ        *aa;
760   Mat_SeqAIJ        *bb;
761 
762   PetscFunctionBegin;
763   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray));
764   PetscCall(MatSeqAIJGetArrayRead(Ad, &av));
765   PetscCall(MatSeqAIJGetArrayRead(Ao, &bv));
766 
767   aa = (Mat_SeqAIJ *)(Ad)->data;
768   bb = (Mat_SeqAIJ *)(Ao)->data;
769   ai = aa->i;
770   aj = aa->j;
771   bi = bb->i;
772   bj = bb->j;
773 
774   rstart = A->rmap->rstart;
775   cstart = A->cmap->rstart;
776 
777   if (reuse == MAT_INITIAL_MATRIX) {
778     nz = (PetscInt64)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */
779     PetscCall(PetscMalloc2(nz, &row, nz, &col));
780     PetscCall(PetscMalloc1(nz, &val));
781     mumps->nnz = nz;
782     mumps->irn = row;
783     mumps->jcn = col;
784     mumps->val = mumps->val_alloc = val;
785   } else {
786     val = mumps->val;
787   }
788 
789   jj   = 0;
790   irow = rstart;
791   for (i = 0; i < m; i++) {
792     ajj    = aj + ai[i]; /* ptr to the beginning of this row */
793     countA = ai[i + 1] - ai[i];
794     countB = bi[i + 1] - bi[i];
795     bjj    = bj + bi[i];
796     v1     = av + ai[i];
797     v2     = bv + bi[i];
798 
799     /* A-part */
800     for (j = 0; j < countA; j++) {
801       if (reuse == MAT_INITIAL_MATRIX) {
802         PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
803         PetscCall(PetscMUMPSIntCast(cstart + ajj[j] + shift, &col[jj]));
804       }
805       val[jj++] = v1[j];
806     }
807 
808     /* B-part */
809     for (j = 0; j < countB; j++) {
810       if (reuse == MAT_INITIAL_MATRIX) {
811         PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
812         PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]));
813       }
814       val[jj++] = v2[j];
815     }
816     irow++;
817   }
818   PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av));
819   PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv));
820   PetscFunctionReturn(PETSC_SUCCESS);
821 }
822 
823 static PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
824 {
825   Mat_MPIBAIJ       *mat = (Mat_MPIBAIJ *)A->data;
826   Mat_SeqBAIJ       *aa  = (Mat_SeqBAIJ *)(mat->A)->data;
827   Mat_SeqBAIJ       *bb  = (Mat_SeqBAIJ *)(mat->B)->data;
828   const PetscInt    *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j, *ajj, *bjj;
829   const PetscInt    *garray = mat->garray, mbs = mat->mbs, rstart = A->rmap->rstart, cstart = A->cmap->rstart;
830   const PetscInt     bs2 = mat->bs2;
831   PetscInt           bs;
832   PetscInt64         nz, i, j, k, n, jj, irow, countA, countB, idx;
833   PetscMUMPSInt     *row, *col;
834   const PetscScalar *av = aa->a, *bv = bb->a, *v1, *v2;
835   PetscScalar       *val;
836 
837   PetscFunctionBegin;
838   PetscCall(MatGetBlockSize(A, &bs));
839   if (reuse == MAT_INITIAL_MATRIX) {
840     nz = bs2 * (aa->nz + bb->nz);
841     PetscCall(PetscMalloc2(nz, &row, nz, &col));
842     PetscCall(PetscMalloc1(nz, &val));
843     mumps->nnz = nz;
844     mumps->irn = row;
845     mumps->jcn = col;
846     mumps->val = mumps->val_alloc = val;
847   } else {
848     val = mumps->val;
849   }
850 
851   jj   = 0;
852   irow = rstart;
853   for (i = 0; i < mbs; i++) {
854     countA = ai[i + 1] - ai[i];
855     countB = bi[i + 1] - bi[i];
856     ajj    = aj + ai[i];
857     bjj    = bj + bi[i];
858     v1     = av + bs2 * ai[i];
859     v2     = bv + bs2 * bi[i];
860 
861     idx = 0;
862     /* A-part */
863     for (k = 0; k < countA; k++) {
864       for (j = 0; j < bs; j++) {
865         for (n = 0; n < bs; n++) {
866           if (reuse == MAT_INITIAL_MATRIX) {
867             PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj]));
868             PetscCall(PetscMUMPSIntCast(cstart + bs * ajj[k] + j + shift, &col[jj]));
869           }
870           val[jj++] = v1[idx++];
871         }
872       }
873     }
874 
875     idx = 0;
876     /* B-part */
877     for (k = 0; k < countB; k++) {
878       for (j = 0; j < bs; j++) {
879         for (n = 0; n < bs; n++) {
880           if (reuse == MAT_INITIAL_MATRIX) {
881             PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj]));
882             PetscCall(PetscMUMPSIntCast(bs * garray[bjj[k]] + j + shift, &col[jj]));
883           }
884           val[jj++] = v2[idx++];
885         }
886       }
887     }
888     irow += bs;
889   }
890   PetscFunctionReturn(PETSC_SUCCESS);
891 }
892 
893 static PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
894 {
895   const PetscInt    *ai, *aj, *adiag, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj;
896   PetscInt64         rstart, nz, nza, nzb, i, j, jj, irow, countA, countB;
897   PetscMUMPSInt     *row, *col;
898   const PetscScalar *av, *bv, *v1, *v2;
899   PetscScalar       *val;
900   Mat                Ad, Ao;
901   Mat_SeqAIJ        *aa;
902   Mat_SeqAIJ        *bb;
903 #if defined(PETSC_USE_COMPLEX)
904   PetscBool hermitian, isset;
905 #endif
906 
907   PetscFunctionBegin;
908 #if defined(PETSC_USE_COMPLEX)
909   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
910   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
911 #endif
912   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray));
913   PetscCall(MatSeqAIJGetArrayRead(Ad, &av));
914   PetscCall(MatSeqAIJGetArrayRead(Ao, &bv));
915 
916   aa    = (Mat_SeqAIJ *)(Ad)->data;
917   bb    = (Mat_SeqAIJ *)(Ao)->data;
918   ai    = aa->i;
919   aj    = aa->j;
920   adiag = aa->diag;
921   bi    = bb->i;
922   bj    = bb->j;
923 
924   rstart = A->rmap->rstart;
925 
926   if (reuse == MAT_INITIAL_MATRIX) {
927     nza = 0; /* num of upper triangular entries in mat->A, including diagonals */
928     nzb = 0; /* num of upper triangular entries in mat->B */
929     for (i = 0; i < m; i++) {
930       nza += (ai[i + 1] - adiag[i]);
931       countB = bi[i + 1] - bi[i];
932       bjj    = bj + bi[i];
933       for (j = 0; j < countB; j++) {
934         if (garray[bjj[j]] > rstart) nzb++;
935       }
936     }
937 
938     nz = nza + nzb; /* total nz of upper triangular part of mat */
939     PetscCall(PetscMalloc2(nz, &row, nz, &col));
940     PetscCall(PetscMalloc1(nz, &val));
941     mumps->nnz = nz;
942     mumps->irn = row;
943     mumps->jcn = col;
944     mumps->val = mumps->val_alloc = val;
945   } else {
946     val = mumps->val;
947   }
948 
949   jj   = 0;
950   irow = rstart;
951   for (i = 0; i < m; i++) {
952     ajj    = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */
953     v1     = av + adiag[i];
954     countA = ai[i + 1] - adiag[i];
955     countB = bi[i + 1] - bi[i];
956     bjj    = bj + bi[i];
957     v2     = bv + bi[i];
958 
959     /* A-part */
960     for (j = 0; j < countA; j++) {
961       if (reuse == MAT_INITIAL_MATRIX) {
962         PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
963         PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj]));
964       }
965       val[jj++] = v1[j];
966     }
967 
968     /* B-part */
969     for (j = 0; j < countB; j++) {
970       if (garray[bjj[j]] > rstart) {
971         if (reuse == MAT_INITIAL_MATRIX) {
972           PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
973           PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]));
974         }
975         val[jj++] = v2[j];
976       }
977     }
978     irow++;
979   }
980   PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av));
981   PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv));
982   PetscFunctionReturn(PETSC_SUCCESS);
983 }
984 
985 static PetscErrorCode MatConvertToTriples_nest_xaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
986 {
987   Mat     **mats;
988   PetscInt  nr, nc;
989   PetscBool chol = mumps->sym ? PETSC_TRUE : PETSC_FALSE;
990 
991   PetscFunctionBegin;
992   PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats));
993   if (reuse == MAT_INITIAL_MATRIX) {
994     PetscMUMPSInt *irns, *jcns;
995     PetscScalar   *vals;
996     PetscInt64     totnnz, cumnnz, maxnnz;
997     PetscInt      *pjcns_w;
998     IS            *rows, *cols;
999     PetscInt     **rows_idx, **cols_idx;
1000 
1001     cumnnz = 0;
1002     maxnnz = 0;
1003     PetscCall(PetscMalloc2(nr * nc + 1, &mumps->nest_vals_start, nr * nc, &mumps->nest_convert_to_triples));
1004     for (PetscInt r = 0; r < nr; r++) {
1005       for (PetscInt c = 0; c < nc; c++) {
1006         Mat sub = mats[r][c];
1007 
1008         mumps->nest_convert_to_triples[r * nc + c] = NULL;
1009         if (chol && c < r) continue; /* skip lower-triangular block for Cholesky */
1010         if (sub) {
1011           PetscErrorCode (*convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *) = NULL;
1012           PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isTrans, isHTrans = PETSC_FALSE;
1013           MatInfo   info;
1014 
1015           PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans));
1016           if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub));
1017           else {
1018             PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans));
1019             if (isHTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub));
1020           }
1021           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ));
1022           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ));
1023           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ));
1024           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ));
1025           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ));
1026           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ));
1027 
1028           if (chol) {
1029             if (r == c) {
1030               if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqsbaij;
1031               else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpisbaij;
1032               else if (isSeqSBAIJ) convert_to_triples = MatConvertToTriples_seqsbaij_seqsbaij;
1033               else if (isMPISBAIJ) convert_to_triples = MatConvertToTriples_mpisbaij_mpisbaij;
1034             } else {
1035               if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij;
1036               else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij;
1037               else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij;
1038               else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij;
1039             }
1040           } else {
1041             if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij;
1042             else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij;
1043             else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij;
1044             else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij;
1045           }
1046           PetscCheck(convert_to_triples, PetscObjectComm((PetscObject)sub), PETSC_ERR_SUP, "Not for block of type %s", ((PetscObject)sub)->type_name);
1047           mumps->nest_convert_to_triples[r * nc + c] = convert_to_triples;
1048           PetscCall(MatGetInfo(sub, MAT_LOCAL, &info));
1049           cumnnz += (PetscInt64)info.nz_used; /* can be overestimated for Cholesky */
1050           maxnnz = PetscMax(maxnnz, info.nz_used);
1051         }
1052       }
1053     }
1054 
1055     /* Allocate total COO */
1056     totnnz = cumnnz;
1057     PetscCall(PetscMalloc2(totnnz, &irns, totnnz, &jcns));
1058     PetscCall(PetscMalloc1(totnnz, &vals));
1059 
1060     /* Handle rows and column maps
1061        We directly map rows and use an SF for the columns */
1062     PetscCall(PetscMalloc4(nr, &rows, nc, &cols, nr, &rows_idx, nc, &cols_idx));
1063     PetscCall(MatNestGetISs(A, rows, cols));
1064     for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetIndices(rows[r], (const PetscInt **)&rows_idx[r]));
1065     for (PetscInt c = 0; c < nc; c++) PetscCall(ISGetIndices(cols[c], (const PetscInt **)&cols_idx[c]));
1066     if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscMalloc1(maxnnz, &pjcns_w));
1067     else (void)maxnnz;
1068 
1069     cumnnz = 0;
1070     for (PetscInt r = 0; r < nr; r++) {
1071       for (PetscInt c = 0; c < nc; c++) {
1072         Mat             sub  = mats[r][c];
1073         const PetscInt *ridx = rows_idx[r];
1074         const PetscInt *cidx = cols_idx[c];
1075         PetscInt        rst;
1076         PetscSF         csf;
1077         PetscBool       isTrans, isHTrans = PETSC_FALSE, swap;
1078         PetscLayout     cmap;
1079 
1080         mumps->nest_vals_start[r * nc + c] = cumnnz;
1081         if (!mumps->nest_convert_to_triples[r * nc + c]) continue;
1082 
1083         /* Extract inner blocks if needed */
1084         PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans));
1085         if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub));
1086         else {
1087           PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans));
1088           if (isHTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub));
1089         }
1090         swap = (PetscBool)(isTrans || isHTrans);
1091 
1092         /* Get column layout to map off-process columns */
1093         PetscCall(MatGetLayouts(sub, NULL, &cmap));
1094 
1095         /* Get row start to map on-process rows */
1096         PetscCall(MatGetOwnershipRange(sub, &rst, NULL));
1097 
1098         /* Directly use the mumps datastructure and use C ordering for now */
1099         PetscCall((*mumps->nest_convert_to_triples[r * nc + c])(sub, 0, MAT_INITIAL_MATRIX, mumps));
1100 
1101         /* Swap the role of rows and columns indices for transposed blocks
1102            since we need values with global final ordering */
1103         if (swap) {
1104           cidx = rows_idx[r];
1105           ridx = cols_idx[c];
1106         }
1107 
1108         /* Communicate column indices
1109            This could have been done with a single SF but it would have complicated the code a lot.
1110            But since we do it only once, we pay the price of setting up an SF for each block */
1111         if (PetscDefined(USE_64BIT_INDICES)) {
1112           for (PetscInt k = 0; k < mumps->nnz; k++) pjcns_w[k] = mumps->jcn[k];
1113         } else pjcns_w = (PetscInt *)(mumps->jcn); /* This cast is needed only to silence warnings for 64bit integers builds */
1114         PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &csf));
1115         PetscCall(PetscSFSetGraphLayout(csf, cmap, mumps->nnz, NULL, PETSC_OWN_POINTER, pjcns_w));
1116         PetscCall(PetscSFBcastBegin(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE));
1117         PetscCall(PetscSFBcastEnd(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE));
1118         PetscCall(PetscSFDestroy(&csf));
1119 
1120         /* Import indices: use direct map for rows and mapped indices for columns */
1121         if (swap) {
1122           for (PetscInt k = 0; k < mumps->nnz; k++) {
1123             PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &jcns[cumnnz + k]));
1124             PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &irns[cumnnz + k]));
1125           }
1126         } else {
1127           for (PetscInt k = 0; k < mumps->nnz; k++) {
1128             PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &irns[cumnnz + k]));
1129             PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &jcns[cumnnz + k]));
1130           }
1131         }
1132 
1133         /* Import values to full COO */
1134         PetscCall(PetscArraycpy(vals + cumnnz, mumps->val, mumps->nnz));
1135         if (isHTrans) { /* conjugate the entries */
1136           PetscScalar *v = vals + cumnnz;
1137           for (PetscInt k = 0; k < mumps->nnz; k++) v[k] = PetscConj(v[k]);
1138         }
1139 
1140         /* Shift new starting point and sanity check */
1141         cumnnz += mumps->nnz;
1142         PetscCheck(cumnnz <= totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected number of nonzeros %" PetscInt64_FMT " != %" PetscInt64_FMT, cumnnz, totnnz);
1143 
1144         /* Free scratch memory */
1145         PetscCall(PetscFree2(mumps->irn, mumps->jcn));
1146         PetscCall(PetscFree(mumps->val_alloc));
1147         mumps->val = NULL;
1148         mumps->nnz = 0;
1149       }
1150     }
1151     if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscFree(pjcns_w));
1152     for (PetscInt r = 0; r < nr; r++) PetscCall(ISRestoreIndices(rows[r], (const PetscInt **)&rows_idx[r]));
1153     for (PetscInt c = 0; c < nc; c++) PetscCall(ISRestoreIndices(cols[c], (const PetscInt **)&cols_idx[c]));
1154     PetscCall(PetscFree4(rows, cols, rows_idx, cols_idx));
1155     if (!chol) PetscCheck(cumnnz == totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different number of nonzeros %" PetscInt64_FMT " != %" PetscInt64_FMT, cumnnz, totnnz);
1156     mumps->nest_vals_start[nr * nc] = cumnnz;
1157 
1158     /* Set pointers for final MUMPS data structure */
1159     mumps->nest_vals = vals;
1160     mumps->val_alloc = NULL; /* do not use val_alloc since it may be reallocated with the OMP callpath */
1161     mumps->val       = vals;
1162     mumps->irn       = irns;
1163     mumps->jcn       = jcns;
1164     mumps->nnz       = cumnnz;
1165   } else {
1166     PetscScalar *oval = mumps->nest_vals;
1167     for (PetscInt r = 0; r < nr; r++) {
1168       for (PetscInt c = 0; c < nc; c++) {
1169         PetscBool isTrans, isHTrans = PETSC_FALSE;
1170         Mat       sub  = mats[r][c];
1171         PetscInt  midx = r * nc + c;
1172 
1173         if (!mumps->nest_convert_to_triples[midx]) continue;
1174         PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans));
1175         if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub));
1176         else {
1177           PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans));
1178           if (isHTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub));
1179         }
1180         mumps->val = oval + mumps->nest_vals_start[midx];
1181         PetscCall((*mumps->nest_convert_to_triples[midx])(sub, shift, MAT_REUSE_MATRIX, mumps));
1182         if (isHTrans) {
1183           PetscInt nnz = mumps->nest_vals_start[midx + 1] - mumps->nest_vals_start[midx];
1184           for (PetscInt k = 0; k < nnz; k++) mumps->val[k] = PetscConj(mumps->val[k]);
1185         }
1186       }
1187     }
1188     mumps->val = oval;
1189   }
1190   PetscFunctionReturn(PETSC_SUCCESS);
1191 }
1192 
1193 static PetscErrorCode MatDestroy_MUMPS(Mat A)
1194 {
1195   Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;
1196 
1197   PetscFunctionBegin;
1198   PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc));
1199   PetscCall(VecScatterDestroy(&mumps->scat_rhs));
1200   PetscCall(VecScatterDestroy(&mumps->scat_sol));
1201   PetscCall(VecDestroy(&mumps->b_seq));
1202   PetscCall(VecDestroy(&mumps->x_seq));
1203   PetscCall(PetscFree(mumps->id.perm_in));
1204   PetscCall(PetscFree2(mumps->irn, mumps->jcn));
1205   PetscCall(PetscFree(mumps->val_alloc));
1206   PetscCall(PetscFree(mumps->info));
1207   PetscCall(PetscFree(mumps->ICNTL_pre));
1208   PetscCall(PetscFree(mumps->CNTL_pre));
1209   PetscCall(MatMumpsResetSchur_Private(mumps));
1210   if (mumps->id.job != JOB_NULL) { /* cannot call PetscMUMPS_c() if JOB_INIT has never been called for this instance */
1211     mumps->id.job = JOB_END;
1212     PetscMUMPS_c(mumps);
1213     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in termination: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
1214     if (mumps->mumps_comm != MPI_COMM_NULL) {
1215       if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) PetscCallMPI(MPI_Comm_free(&mumps->mumps_comm));
1216       else PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A), &mumps->mumps_comm));
1217     }
1218   }
1219 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1220   if (mumps->use_petsc_omp_support) {
1221     PetscCall(PetscOmpCtrlDestroy(&mumps->omp_ctrl));
1222     PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf));
1223     PetscCall(PetscFree3(mumps->rhs_nrow, mumps->rhs_recvcounts, mumps->rhs_disps));
1224   }
1225 #endif
1226   PetscCall(PetscFree(mumps->ia_alloc));
1227   PetscCall(PetscFree(mumps->ja_alloc));
1228   PetscCall(PetscFree(mumps->recvcount));
1229   PetscCall(PetscFree(mumps->reqs));
1230   PetscCall(PetscFree(mumps->irhs_loc));
1231   PetscCall(PetscFree2(mumps->nest_vals_start, mumps->nest_convert_to_triples));
1232   PetscCall(PetscFree(mumps->nest_vals));
1233   PetscCall(PetscFree(A->data));
1234 
1235   /* clear composed functions */
1236   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
1237   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorSetSchurIS_C", NULL));
1238   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorCreateSchurComplement_C", NULL));
1239   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetIcntl_C", NULL));
1240   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetIcntl_C", NULL));
1241   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetCntl_C", NULL));
1242   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetCntl_C", NULL));
1243   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfo_C", NULL));
1244   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfog_C", NULL));
1245   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfo_C", NULL));
1246   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfog_C", NULL));
1247   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetNullPivots_C", NULL));
1248   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverse_C", NULL));
1249   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverseTranspose_C", NULL));
1250   PetscFunctionReturn(PETSC_SUCCESS);
1251 }
1252 
1253 /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */
1254 static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A, PetscInt nrhs, const PetscScalar *array)
1255 {
1256   Mat_MUMPS        *mumps   = (Mat_MUMPS *)A->data;
1257   const PetscMPIInt ompsize = mumps->omp_comm_size;
1258   PetscInt          i, m, M, rstart;
1259 
1260   PetscFunctionBegin;
1261   PetscCall(MatGetSize(A, &M, NULL));
1262   PetscCall(MatGetLocalSize(A, &m, NULL));
1263   PetscCheck(M <= PETSC_MUMPS_INT_MAX, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt");
1264   if (ompsize == 1) {
1265     if (!mumps->irhs_loc) {
1266       mumps->nloc_rhs = m;
1267       PetscCall(PetscMalloc1(m, &mumps->irhs_loc));
1268       PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
1269       for (i = 0; i < m; i++) mumps->irhs_loc[i] = rstart + i + 1; /* use 1-based indices */
1270     }
1271     mumps->id.rhs_loc = (MumpsScalar *)array;
1272   } else {
1273 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1274     const PetscInt *ranges;
1275     PetscMPIInt     j, k, sendcount, *petsc_ranks, *omp_ranks;
1276     MPI_Group       petsc_group, omp_group;
1277     PetscScalar    *recvbuf = NULL;
1278 
1279     if (mumps->is_omp_master) {
1280       /* Lazily initialize the omp stuff for distributed rhs */
1281       if (!mumps->irhs_loc) {
1282         PetscCall(PetscMalloc2(ompsize, &omp_ranks, ompsize, &petsc_ranks));
1283         PetscCall(PetscMalloc3(ompsize, &mumps->rhs_nrow, ompsize, &mumps->rhs_recvcounts, ompsize, &mumps->rhs_disps));
1284         PetscCallMPI(MPI_Comm_group(mumps->petsc_comm, &petsc_group));
1285         PetscCallMPI(MPI_Comm_group(mumps->omp_comm, &omp_group));
1286         for (j = 0; j < ompsize; j++) omp_ranks[j] = j;
1287         PetscCallMPI(MPI_Group_translate_ranks(omp_group, ompsize, omp_ranks, petsc_group, petsc_ranks));
1288 
1289         /* Populate mumps->irhs_loc[], rhs_nrow[] */
1290         mumps->nloc_rhs = 0;
1291         PetscCall(MatGetOwnershipRanges(A, &ranges));
1292         for (j = 0; j < ompsize; j++) {
1293           mumps->rhs_nrow[j] = ranges[petsc_ranks[j] + 1] - ranges[petsc_ranks[j]];
1294           mumps->nloc_rhs += mumps->rhs_nrow[j];
1295         }
1296         PetscCall(PetscMalloc1(mumps->nloc_rhs, &mumps->irhs_loc));
1297         for (j = k = 0; j < ompsize; j++) {
1298           for (i = ranges[petsc_ranks[j]]; i < ranges[petsc_ranks[j] + 1]; i++, k++) mumps->irhs_loc[k] = i + 1; /* uses 1-based indices */
1299         }
1300 
1301         PetscCall(PetscFree2(omp_ranks, petsc_ranks));
1302         PetscCallMPI(MPI_Group_free(&petsc_group));
1303         PetscCallMPI(MPI_Group_free(&omp_group));
1304       }
1305 
1306       /* Realloc buffers when current nrhs is bigger than what we have met */
1307       if (nrhs > mumps->max_nrhs) {
1308         PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf));
1309         PetscCall(PetscMalloc2(mumps->nloc_rhs * nrhs, &mumps->rhs_loc, mumps->nloc_rhs * nrhs, &mumps->rhs_recvbuf));
1310         mumps->max_nrhs = nrhs;
1311       }
1312 
1313       /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */
1314       for (j = 0; j < ompsize; j++) PetscCall(PetscMPIIntCast(mumps->rhs_nrow[j] * nrhs, &mumps->rhs_recvcounts[j]));
1315       mumps->rhs_disps[0] = 0;
1316       for (j = 1; j < ompsize; j++) {
1317         mumps->rhs_disps[j] = mumps->rhs_disps[j - 1] + mumps->rhs_recvcounts[j - 1];
1318         PetscCheck(mumps->rhs_disps[j] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscMPIInt overflow!");
1319       }
1320       recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */
1321     }
1322 
1323     PetscCall(PetscMPIIntCast(m * nrhs, &sendcount));
1324     PetscCallMPI(MPI_Gatherv(array, sendcount, MPIU_SCALAR, recvbuf, mumps->rhs_recvcounts, mumps->rhs_disps, MPIU_SCALAR, 0, mumps->omp_comm));
1325 
1326     if (mumps->is_omp_master) {
1327       if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */
1328         PetscScalar *dst, *dstbase = mumps->rhs_loc;
1329         for (j = 0; j < ompsize; j++) {
1330           const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j];
1331           dst                    = dstbase;
1332           for (i = 0; i < nrhs; i++) {
1333             PetscCall(PetscArraycpy(dst, src, mumps->rhs_nrow[j]));
1334             src += mumps->rhs_nrow[j];
1335             dst += mumps->nloc_rhs;
1336           }
1337           dstbase += mumps->rhs_nrow[j];
1338         }
1339       }
1340       mumps->id.rhs_loc = (MumpsScalar *)mumps->rhs_loc;
1341     }
1342 #endif /* PETSC_HAVE_OPENMP_SUPPORT */
1343   }
1344   mumps->id.nrhs     = nrhs;
1345   mumps->id.nloc_rhs = mumps->nloc_rhs;
1346   mumps->id.lrhs_loc = mumps->nloc_rhs;
1347   mumps->id.irhs_loc = mumps->irhs_loc;
1348   PetscFunctionReturn(PETSC_SUCCESS);
1349 }
1350 
1351 static PetscErrorCode MatSolve_MUMPS(Mat A, Vec b, Vec x)
1352 {
1353   Mat_MUMPS         *mumps  = (Mat_MUMPS *)A->data;
1354   const PetscScalar *rarray = NULL;
1355   PetscScalar       *array;
1356   IS                 is_iden, is_petsc;
1357   PetscInt           i;
1358   PetscBool          second_solve = PETSC_FALSE;
1359   static PetscBool   cite1 = PETSC_FALSE, cite2 = PETSC_FALSE;
1360 
1361   PetscFunctionBegin;
1362   PetscCall(PetscCitationsRegister("@article{MUMPS01,\n  author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n  title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n  journal = {SIAM "
1363                                    "Journal on Matrix Analysis and Applications},\n  volume = {23},\n  number = {1},\n  pages = {15--41},\n  year = {2001}\n}\n",
1364                                    &cite1));
1365   PetscCall(PetscCitationsRegister("@article{MUMPS02,\n  author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n  title = {Hybrid scheduling for the parallel solution of linear systems},\n  journal = {Parallel "
1366                                    "Computing},\n  volume = {32},\n  number = {2},\n  pages = {136--156},\n  year = {2006}\n}\n",
1367                                    &cite2));
1368 
1369   if (A->factorerrortype) {
1370     PetscCall(PetscInfo(A, "MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1371     PetscCall(VecSetInf(x));
1372     PetscFunctionReturn(PETSC_SUCCESS);
1373   }
1374 
1375   mumps->id.nrhs = 1;
1376   if (mumps->petsc_size > 1) {
1377     if (mumps->ICNTL20 == 10) {
1378       mumps->id.ICNTL(20) = 10; /* dense distributed RHS */
1379       PetscCall(VecGetArrayRead(b, &rarray));
1380       PetscCall(MatMumpsSetUpDistRHSInfo(A, 1, rarray));
1381     } else {
1382       mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential rhs vector*/
1383       PetscCall(VecScatterBegin(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD));
1384       PetscCall(VecScatterEnd(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD));
1385       if (!mumps->myid) {
1386         PetscCall(VecGetArray(mumps->b_seq, &array));
1387         mumps->id.rhs = (MumpsScalar *)array;
1388       }
1389     }
1390   } else {                   /* petsc_size == 1 */
1391     mumps->id.ICNTL(20) = 0; /* dense centralized RHS */
1392     PetscCall(VecCopy(b, x));
1393     PetscCall(VecGetArray(x, &array));
1394     mumps->id.rhs = (MumpsScalar *)array;
1395   }
1396 
1397   /*
1398      handle condensation step of Schur complement (if any)
1399      We set by default ICNTL(26) == -1 when Schur indices have been provided by the user.
1400      According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase
1401      Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system.
1402      This requires an extra call to PetscMUMPS_c and the computation of the factors for S
1403   */
1404   if (mumps->id.size_schur > 0) {
1405     PetscCheck(mumps->petsc_size <= 1, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc");
1406     if (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2) {
1407       second_solve = PETSC_TRUE;
1408       PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE));
1409       mumps->id.ICNTL(26) = 1; /* condensation phase */
1410     } else if (mumps->id.ICNTL(26) == 1) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE));
1411   }
1412   /* solve phase */
1413   mumps->id.job = JOB_SOLVE;
1414   PetscMUMPS_c(mumps);
1415   PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
1416 
1417   /* handle expansion step of Schur complement (if any) */
1418   if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE));
1419   else if (mumps->id.ICNTL(26) == 1) {
1420     PetscCall(MatMumpsSolveSchur_Private(A));
1421     for (i = 0; i < mumps->id.size_schur; ++i) {
1422 #if !defined(PETSC_USE_COMPLEX)
1423       PetscScalar val = mumps->id.redrhs[i];
1424 #else
1425       PetscScalar val = mumps->id.redrhs[i].r + PETSC_i * mumps->id.redrhs[i].i;
1426 #endif
1427       array[mumps->id.listvar_schur[i] - 1] = val;
1428     }
1429   }
1430 
1431   if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */
1432     if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) {
1433       /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */
1434       PetscCall(VecScatterDestroy(&mumps->scat_sol));
1435     }
1436     if (!mumps->scat_sol) { /* create scatter scat_sol */
1437       PetscInt *isol2_loc = NULL;
1438       PetscCall(ISCreateStride(PETSC_COMM_SELF, mumps->id.lsol_loc, 0, 1, &is_iden)); /* from */
1439       PetscCall(PetscMalloc1(mumps->id.lsol_loc, &isol2_loc));
1440       for (i = 0; i < mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i] - 1;                        /* change Fortran style to C style */
1441       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, mumps->id.lsol_loc, isol2_loc, PETSC_OWN_POINTER, &is_petsc)); /* to */
1442       PetscCall(VecScatterCreate(mumps->x_seq, is_iden, x, is_petsc, &mumps->scat_sol));
1443       PetscCall(ISDestroy(&is_iden));
1444       PetscCall(ISDestroy(&is_petsc));
1445       mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */
1446     }
1447 
1448     PetscCall(VecScatterBegin(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD));
1449     PetscCall(VecScatterEnd(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD));
1450   }
1451 
1452   if (mumps->petsc_size > 1) {
1453     if (mumps->ICNTL20 == 10) {
1454       PetscCall(VecRestoreArrayRead(b, &rarray));
1455     } else if (!mumps->myid) {
1456       PetscCall(VecRestoreArray(mumps->b_seq, &array));
1457     }
1458   } else PetscCall(VecRestoreArray(x, &array));
1459 
1460   PetscCall(PetscLogFlops(2.0 * PetscMax(0, (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n)));
1461   PetscFunctionReturn(PETSC_SUCCESS);
1462 }
1463 
1464 static PetscErrorCode MatSolveTranspose_MUMPS(Mat A, Vec b, Vec x)
1465 {
1466   Mat_MUMPS          *mumps = (Mat_MUMPS *)A->data;
1467   const PetscMUMPSInt value = mumps->id.ICNTL(9);
1468 
1469   PetscFunctionBegin;
1470   mumps->id.ICNTL(9) = 0;
1471   PetscCall(MatSolve_MUMPS(A, b, x));
1472   mumps->id.ICNTL(9) = value;
1473   PetscFunctionReturn(PETSC_SUCCESS);
1474 }
1475 
1476 static PetscErrorCode MatMatSolve_MUMPS(Mat A, Mat B, Mat X)
1477 {
1478   Mat                Bt = NULL;
1479   PetscBool          denseX, denseB, flg, flgT;
1480   Mat_MUMPS         *mumps = (Mat_MUMPS *)A->data;
1481   PetscInt           i, nrhs, M;
1482   PetscScalar       *array;
1483   const PetscScalar *rbray;
1484   PetscInt           lsol_loc, nlsol_loc, *idxx, iidx = 0;
1485   PetscMUMPSInt     *isol_loc, *isol_loc_save;
1486   PetscScalar       *bray, *sol_loc, *sol_loc_save;
1487   IS                 is_to, is_from;
1488   PetscInt           k, proc, j, m, myrstart;
1489   const PetscInt    *rstart;
1490   Vec                v_mpi, msol_loc;
1491   VecScatter         scat_sol;
1492   Vec                b_seq;
1493   VecScatter         scat_rhs;
1494   PetscScalar       *aa;
1495   PetscInt           spnr, *ia, *ja;
1496   Mat_MPIAIJ        *b = NULL;
1497 
1498   PetscFunctionBegin;
1499   PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &denseX, MATSEQDENSE, MATMPIDENSE, NULL));
1500   PetscCheck(denseX, PetscObjectComm((PetscObject)X), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix");
1501 
1502   PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &denseB, MATSEQDENSE, MATMPIDENSE, NULL));
1503   if (denseB) {
1504     PetscCheck(B->rmap->n == X->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Matrix B and X must have same row distribution");
1505     mumps->id.ICNTL(20) = 0; /* dense RHS */
1506   } else {                   /* sparse B */
1507     PetscCheck(X != B, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_IDN, "X and B must be different matrices");
1508     PetscCall(PetscObjectTypeCompare((PetscObject)B, MATTRANSPOSEVIRTUAL, &flgT));
1509     if (flgT) { /* input B is transpose of actual RHS matrix,
1510                  because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */
1511       PetscCall(MatTransposeGetMat(B, &Bt));
1512     } else SETERRQ(PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_WRONG, "Matrix B must be MATTRANSPOSEVIRTUAL matrix");
1513     mumps->id.ICNTL(20) = 1; /* sparse RHS */
1514   }
1515 
1516   PetscCall(MatGetSize(B, &M, &nrhs));
1517   mumps->id.nrhs = nrhs;
1518   mumps->id.lrhs = M;
1519   mumps->id.rhs  = NULL;
1520 
1521   if (mumps->petsc_size == 1) {
1522     PetscScalar *aa;
1523     PetscInt     spnr, *ia, *ja;
1524     PetscBool    second_solve = PETSC_FALSE;
1525 
1526     PetscCall(MatDenseGetArray(X, &array));
1527     mumps->id.rhs = (MumpsScalar *)array;
1528 
1529     if (denseB) {
1530       /* copy B to X */
1531       PetscCall(MatDenseGetArrayRead(B, &rbray));
1532       PetscCall(PetscArraycpy(array, rbray, M * nrhs));
1533       PetscCall(MatDenseRestoreArrayRead(B, &rbray));
1534     } else { /* sparse B */
1535       PetscCall(MatSeqAIJGetArray(Bt, &aa));
1536       PetscCall(MatGetRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
1537       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
1538       PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs));
1539       mumps->id.rhs_sparse = (MumpsScalar *)aa;
1540     }
1541     /* handle condensation step of Schur complement (if any) */
1542     if (mumps->id.size_schur > 0) {
1543       PetscCheck(mumps->petsc_size <= 1, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc");
1544       if (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2) {
1545         second_solve = PETSC_TRUE;
1546         PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE));
1547         mumps->id.ICNTL(26) = 1; /* condensation phase */
1548       } else if (mumps->id.ICNTL(26) == 1) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE));
1549     }
1550     /* solve phase */
1551     mumps->id.job = JOB_SOLVE;
1552     PetscMUMPS_c(mumps);
1553     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
1554 
1555     /* handle expansion step of Schur complement (if any) */
1556     if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE));
1557     else if (mumps->id.ICNTL(26) == 1) {
1558       PetscCall(MatMumpsSolveSchur_Private(A));
1559       for (j = 0; j < nrhs; ++j)
1560         for (i = 0; i < mumps->id.size_schur; ++i) {
1561 #if !defined(PETSC_USE_COMPLEX)
1562           PetscScalar val = mumps->id.redrhs[i + j * mumps->id.lredrhs];
1563 #else
1564           PetscScalar val = mumps->id.redrhs[i + j * mumps->id.lredrhs].r + PETSC_i * mumps->id.redrhs[i + j * mumps->id.lredrhs].i;
1565 #endif
1566           array[mumps->id.listvar_schur[i] - 1 + j * M] = val;
1567         }
1568     }
1569     if (!denseB) { /* sparse B */
1570       PetscCall(MatSeqAIJRestoreArray(Bt, &aa));
1571       PetscCall(MatRestoreRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
1572       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure");
1573     }
1574     PetscCall(MatDenseRestoreArray(X, &array));
1575     PetscFunctionReturn(PETSC_SUCCESS);
1576   }
1577 
1578   /* parallel case: MUMPS requires rhs B to be centralized on the host! */
1579   PetscCheck(mumps->petsc_size <= 1 || !mumps->id.ICNTL(19), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc");
1580 
1581   /* create msol_loc to hold mumps local solution */
1582   isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */
1583   sol_loc_save  = (PetscScalar *)mumps->id.sol_loc;
1584 
1585   lsol_loc  = mumps->id.lsol_loc;
1586   nlsol_loc = nrhs * lsol_loc; /* length of sol_loc */
1587   PetscCall(PetscMalloc2(nlsol_loc, &sol_loc, lsol_loc, &isol_loc));
1588   mumps->id.sol_loc  = (MumpsScalar *)sol_loc;
1589   mumps->id.isol_loc = isol_loc;
1590 
1591   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nlsol_loc, (PetscScalar *)sol_loc, &msol_loc));
1592 
1593   if (denseB) {
1594     if (mumps->ICNTL20 == 10) {
1595       mumps->id.ICNTL(20) = 10; /* dense distributed RHS */
1596       PetscCall(MatDenseGetArrayRead(B, &rbray));
1597       PetscCall(MatMumpsSetUpDistRHSInfo(A, nrhs, rbray));
1598       PetscCall(MatDenseRestoreArrayRead(B, &rbray));
1599       PetscCall(MatGetLocalSize(B, &m, NULL));
1600       PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhs * M, NULL, &v_mpi));
1601     } else {
1602       mumps->id.ICNTL(20) = 0; /* dense centralized RHS */
1603       /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in
1604         very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank
1605         0, re-arrange B into desired order, which is a local operation.
1606       */
1607 
1608       /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */
1609       /* wrap dense rhs matrix B into a vector v_mpi */
1610       PetscCall(MatGetLocalSize(B, &m, NULL));
1611       PetscCall(MatDenseGetArray(B, &bray));
1612       PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhs * M, (const PetscScalar *)bray, &v_mpi));
1613       PetscCall(MatDenseRestoreArray(B, &bray));
1614 
1615       /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */
1616       if (!mumps->myid) {
1617         PetscInt *idx;
1618         /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */
1619         PetscCall(PetscMalloc1(nrhs * M, &idx));
1620         PetscCall(MatGetOwnershipRanges(B, &rstart));
1621         k = 0;
1622         for (proc = 0; proc < mumps->petsc_size; proc++) {
1623           for (j = 0; j < nrhs; j++) {
1624             for (i = rstart[proc]; i < rstart[proc + 1]; i++) idx[k++] = j * M + i;
1625           }
1626         }
1627 
1628         PetscCall(VecCreateSeq(PETSC_COMM_SELF, nrhs * M, &b_seq));
1629         PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nrhs * M, idx, PETSC_OWN_POINTER, &is_to));
1630         PetscCall(ISCreateStride(PETSC_COMM_SELF, nrhs * M, 0, 1, &is_from));
1631       } else {
1632         PetscCall(VecCreateSeq(PETSC_COMM_SELF, 0, &b_seq));
1633         PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_to));
1634         PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_from));
1635       }
1636       PetscCall(VecScatterCreate(v_mpi, is_from, b_seq, is_to, &scat_rhs));
1637       PetscCall(VecScatterBegin(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD));
1638       PetscCall(ISDestroy(&is_to));
1639       PetscCall(ISDestroy(&is_from));
1640       PetscCall(VecScatterEnd(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD));
1641 
1642       if (!mumps->myid) { /* define rhs on the host */
1643         PetscCall(VecGetArray(b_seq, &bray));
1644         mumps->id.rhs = (MumpsScalar *)bray;
1645         PetscCall(VecRestoreArray(b_seq, &bray));
1646       }
1647     }
1648   } else { /* sparse B */
1649     b = (Mat_MPIAIJ *)Bt->data;
1650 
1651     /* wrap dense X into a vector v_mpi */
1652     PetscCall(MatGetLocalSize(X, &m, NULL));
1653     PetscCall(MatDenseGetArray(X, &bray));
1654     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)X), 1, nrhs * m, nrhs * M, (const PetscScalar *)bray, &v_mpi));
1655     PetscCall(MatDenseRestoreArray(X, &bray));
1656 
1657     if (!mumps->myid) {
1658       PetscCall(MatSeqAIJGetArray(b->A, &aa));
1659       PetscCall(MatGetRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
1660       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
1661       PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs));
1662       mumps->id.rhs_sparse = (MumpsScalar *)aa;
1663     } else {
1664       mumps->id.irhs_ptr    = NULL;
1665       mumps->id.irhs_sparse = NULL;
1666       mumps->id.nz_rhs      = 0;
1667       mumps->id.rhs_sparse  = NULL;
1668     }
1669   }
1670 
1671   /* solve phase */
1672   mumps->id.job = JOB_SOLVE;
1673   PetscMUMPS_c(mumps);
1674   PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
1675 
1676   /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */
1677   PetscCall(MatDenseGetArray(X, &array));
1678   PetscCall(VecPlaceArray(v_mpi, array));
1679 
1680   /* create scatter scat_sol */
1681   PetscCall(MatGetOwnershipRanges(X, &rstart));
1682   /* iidx: index for scatter mumps solution to petsc X */
1683 
1684   PetscCall(ISCreateStride(PETSC_COMM_SELF, nlsol_loc, 0, 1, &is_from));
1685   PetscCall(PetscMalloc1(nlsol_loc, &idxx));
1686   for (i = 0; i < lsol_loc; i++) {
1687     isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */
1688 
1689     for (proc = 0; proc < mumps->petsc_size; proc++) {
1690       if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc + 1]) {
1691         myrstart = rstart[proc];
1692         k        = isol_loc[i] - myrstart;          /* local index on 1st column of petsc vector X */
1693         iidx     = k + myrstart * nrhs;             /* maps mumps isol_loc[i] to petsc index in X */
1694         m        = rstart[proc + 1] - rstart[proc]; /* rows of X for this proc */
1695         break;
1696       }
1697     }
1698 
1699     for (j = 0; j < nrhs; j++) idxx[i + j * lsol_loc] = iidx + j * m;
1700   }
1701   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nlsol_loc, idxx, PETSC_COPY_VALUES, &is_to));
1702   PetscCall(VecScatterCreate(msol_loc, is_from, v_mpi, is_to, &scat_sol));
1703   PetscCall(VecScatterBegin(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD));
1704   PetscCall(ISDestroy(&is_from));
1705   PetscCall(ISDestroy(&is_to));
1706   PetscCall(VecScatterEnd(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD));
1707   PetscCall(MatDenseRestoreArray(X, &array));
1708 
1709   /* free spaces */
1710   mumps->id.sol_loc  = (MumpsScalar *)sol_loc_save;
1711   mumps->id.isol_loc = isol_loc_save;
1712 
1713   PetscCall(PetscFree2(sol_loc, isol_loc));
1714   PetscCall(PetscFree(idxx));
1715   PetscCall(VecDestroy(&msol_loc));
1716   PetscCall(VecDestroy(&v_mpi));
1717   if (!denseB) {
1718     if (!mumps->myid) {
1719       b = (Mat_MPIAIJ *)Bt->data;
1720       PetscCall(MatSeqAIJRestoreArray(b->A, &aa));
1721       PetscCall(MatRestoreRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
1722       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure");
1723     }
1724   } else {
1725     if (mumps->ICNTL20 == 0) {
1726       PetscCall(VecDestroy(&b_seq));
1727       PetscCall(VecScatterDestroy(&scat_rhs));
1728     }
1729   }
1730   PetscCall(VecScatterDestroy(&scat_sol));
1731   PetscCall(PetscLogFlops(nrhs * PetscMax(0, (2.0 * (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n))));
1732   PetscFunctionReturn(PETSC_SUCCESS);
1733 }
1734 
1735 static PetscErrorCode MatMatSolveTranspose_MUMPS(Mat A, Mat B, Mat X)
1736 {
1737   Mat_MUMPS          *mumps = (Mat_MUMPS *)A->data;
1738   const PetscMUMPSInt value = mumps->id.ICNTL(9);
1739 
1740   PetscFunctionBegin;
1741   mumps->id.ICNTL(9) = 0;
1742   PetscCall(MatMatSolve_MUMPS(A, B, X));
1743   mumps->id.ICNTL(9) = value;
1744   PetscFunctionReturn(PETSC_SUCCESS);
1745 }
1746 
1747 static PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A, Mat Bt, Mat X)
1748 {
1749   PetscBool flg;
1750   Mat       B;
1751 
1752   PetscFunctionBegin;
1753   PetscCall(PetscObjectTypeCompareAny((PetscObject)Bt, &flg, MATSEQAIJ, MATMPIAIJ, NULL));
1754   PetscCheck(flg, PetscObjectComm((PetscObject)Bt), PETSC_ERR_ARG_WRONG, "Matrix Bt must be MATAIJ matrix");
1755 
1756   /* Create B=Bt^T that uses Bt's data structure */
1757   PetscCall(MatCreateTranspose(Bt, &B));
1758 
1759   PetscCall(MatMatSolve_MUMPS(A, B, X));
1760   PetscCall(MatDestroy(&B));
1761   PetscFunctionReturn(PETSC_SUCCESS);
1762 }
1763 
1764 #if !defined(PETSC_USE_COMPLEX)
1765 /*
1766   input:
1767    F:        numeric factor
1768   output:
1769    nneg:     total number of negative pivots
1770    nzero:    total number of zero pivots
1771    npos:     (global dimension of F) - nneg - nzero
1772 */
1773 static PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos)
1774 {
1775   Mat_MUMPS  *mumps = (Mat_MUMPS *)F->data;
1776   PetscMPIInt size;
1777 
1778   PetscFunctionBegin;
1779   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)F), &size));
1780   /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */
1781   PetscCheck(size <= 1 || mumps->id.ICNTL(13) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia", mumps->id.INFOG(13));
1782 
1783   if (nneg) *nneg = mumps->id.INFOG(12);
1784   if (nzero || npos) {
1785     PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection");
1786     if (nzero) *nzero = mumps->id.INFOG(28);
1787     if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28));
1788   }
1789   PetscFunctionReturn(PETSC_SUCCESS);
1790 }
1791 #endif
1792 
1793 static PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse, Mat_MUMPS *mumps)
1794 {
1795   PetscInt       i, nreqs;
1796   PetscMUMPSInt *irn, *jcn;
1797   PetscMPIInt    count;
1798   PetscInt64     totnnz, remain;
1799   const PetscInt osize = mumps->omp_comm_size;
1800   PetscScalar   *val;
1801 
1802   PetscFunctionBegin;
1803   if (osize > 1) {
1804     if (reuse == MAT_INITIAL_MATRIX) {
1805       /* master first gathers counts of nonzeros to receive */
1806       if (mumps->is_omp_master) PetscCall(PetscMalloc1(osize, &mumps->recvcount));
1807       PetscCallMPI(MPI_Gather(&mumps->nnz, 1, MPIU_INT64, mumps->recvcount, 1, MPIU_INT64, 0 /*master*/, mumps->omp_comm));
1808 
1809       /* Then each computes number of send/recvs */
1810       if (mumps->is_omp_master) {
1811         /* Start from 1 since self communication is not done in MPI */
1812         nreqs = 0;
1813         for (i = 1; i < osize; i++) nreqs += (mumps->recvcount[i] + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX;
1814       } else {
1815         nreqs = (mumps->nnz + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX;
1816       }
1817       PetscCall(PetscMalloc1(nreqs * 3, &mumps->reqs)); /* Triple the requests since we send irn, jcn and val separately */
1818 
1819       /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others.
1820          MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz
1821          might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size
1822          is very small, the current approach should have no extra overhead compared to MPI_Gatherv.
1823        */
1824       nreqs = 0; /* counter for actual send/recvs */
1825       if (mumps->is_omp_master) {
1826         for (i = 0, totnnz = 0; i < osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */
1827         PetscCall(PetscMalloc2(totnnz, &irn, totnnz, &jcn));
1828         PetscCall(PetscMalloc1(totnnz, &val));
1829 
1830         /* Self communication */
1831         PetscCall(PetscArraycpy(irn, mumps->irn, mumps->nnz));
1832         PetscCall(PetscArraycpy(jcn, mumps->jcn, mumps->nnz));
1833         PetscCall(PetscArraycpy(val, mumps->val, mumps->nnz));
1834 
1835         /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */
1836         PetscCall(PetscFree2(mumps->irn, mumps->jcn));
1837         PetscCall(PetscFree(mumps->val_alloc));
1838         mumps->nnz = totnnz;
1839         mumps->irn = irn;
1840         mumps->jcn = jcn;
1841         mumps->val = mumps->val_alloc = val;
1842 
1843         irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */
1844         jcn += mumps->recvcount[0];
1845         val += mumps->recvcount[0];
1846 
1847         /* Remote communication */
1848         for (i = 1; i < osize; i++) {
1849           count  = PetscMin(mumps->recvcount[i], PETSC_MPI_INT_MAX);
1850           remain = mumps->recvcount[i] - count;
1851           while (count > 0) {
1852             PetscCallMPI(MPI_Irecv(irn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1853             PetscCallMPI(MPI_Irecv(jcn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1854             PetscCallMPI(MPI_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1855             irn += count;
1856             jcn += count;
1857             val += count;
1858             count = PetscMin(remain, PETSC_MPI_INT_MAX);
1859             remain -= count;
1860           }
1861         }
1862       } else {
1863         irn    = mumps->irn;
1864         jcn    = mumps->jcn;
1865         val    = mumps->val;
1866         count  = PetscMin(mumps->nnz, PETSC_MPI_INT_MAX);
1867         remain = mumps->nnz - count;
1868         while (count > 0) {
1869           PetscCallMPI(MPI_Isend(irn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1870           PetscCallMPI(MPI_Isend(jcn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1871           PetscCallMPI(MPI_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1872           irn += count;
1873           jcn += count;
1874           val += count;
1875           count = PetscMin(remain, PETSC_MPI_INT_MAX);
1876           remain -= count;
1877         }
1878       }
1879     } else {
1880       nreqs = 0;
1881       if (mumps->is_omp_master) {
1882         val = mumps->val + mumps->recvcount[0];
1883         for (i = 1; i < osize; i++) { /* Remote communication only since self data is already in place */
1884           count  = PetscMin(mumps->recvcount[i], PETSC_MPI_INT_MAX);
1885           remain = mumps->recvcount[i] - count;
1886           while (count > 0) {
1887             PetscCallMPI(MPI_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1888             val += count;
1889             count = PetscMin(remain, PETSC_MPI_INT_MAX);
1890             remain -= count;
1891           }
1892         }
1893       } else {
1894         val    = mumps->val;
1895         count  = PetscMin(mumps->nnz, PETSC_MPI_INT_MAX);
1896         remain = mumps->nnz - count;
1897         while (count > 0) {
1898           PetscCallMPI(MPI_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1899           val += count;
1900           count = PetscMin(remain, PETSC_MPI_INT_MAX);
1901           remain -= count;
1902         }
1903       }
1904     }
1905     PetscCallMPI(MPI_Waitall(nreqs, mumps->reqs, MPI_STATUSES_IGNORE));
1906     mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */
1907   }
1908   PetscFunctionReturn(PETSC_SUCCESS);
1909 }
1910 
1911 static PetscErrorCode MatFactorNumeric_MUMPS(Mat F, Mat A, const MatFactorInfo *info)
1912 {
1913   Mat_MUMPS *mumps = (Mat_MUMPS *)(F)->data;
1914   PetscBool  isMPIAIJ;
1915 
1916   PetscFunctionBegin;
1917   if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) {
1918     if (mumps->id.INFOG(1) == -6) PetscCall(PetscInfo(A, "MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1919     PetscCall(PetscInfo(A, "MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1920     PetscFunctionReturn(PETSC_SUCCESS);
1921   }
1922 
1923   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps));
1924   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX, mumps));
1925 
1926   /* numerical factorization phase */
1927   mumps->id.job = JOB_FACTNUMERIC;
1928   if (!mumps->id.ICNTL(18)) { /* A is centralized */
1929     if (!mumps->myid) mumps->id.a = (MumpsScalar *)mumps->val;
1930   } else {
1931     mumps->id.a_loc = (MumpsScalar *)mumps->val;
1932   }
1933   PetscMUMPS_c(mumps);
1934   if (mumps->id.INFOG(1) < 0) {
1935     PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2));
1936     if (mumps->id.INFOG(1) == -10) {
1937       PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1938       F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
1939     } else if (mumps->id.INFOG(1) == -13) {
1940       PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, cannot allocate required memory %d megabytes\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1941       F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1942     } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) {
1943       PetscCall(PetscInfo(F, "MUMPS error in numerical factorizatione: INFOG(1)=%d, INFO(2)=%d, problem with work array\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1944       F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1945     } else {
1946       PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1947       F->factorerrortype = MAT_FACTOR_OTHER;
1948     }
1949   }
1950   PetscCheck(mumps->myid || mumps->id.ICNTL(16) <= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in numerical factorization: ICNTL(16)=%d " MUMPS_MANUALS, mumps->id.INFOG(16));
1951 
1952   F->assembled = PETSC_TRUE;
1953 
1954   if (F->schur) { /* reset Schur status to unfactored */
1955 #if defined(PETSC_HAVE_CUDA)
1956     F->schur->offloadmask = PETSC_OFFLOAD_CPU;
1957 #endif
1958     if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
1959       mumps->id.ICNTL(19) = 2;
1960       PetscCall(MatTranspose(F->schur, MAT_INPLACE_MATRIX, &F->schur));
1961     }
1962     PetscCall(MatFactorRestoreSchurComplement(F, NULL, MAT_FACTOR_SCHUR_UNFACTORED));
1963   }
1964 
1965   /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */
1966   if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3;
1967 
1968   if (!mumps->is_omp_master) mumps->id.INFO(23) = 0;
1969   if (mumps->petsc_size > 1) {
1970     PetscInt     lsol_loc;
1971     PetscScalar *sol_loc;
1972 
1973     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &isMPIAIJ));
1974 
1975     /* distributed solution; Create x_seq=sol_loc for repeated use */
1976     if (mumps->x_seq) {
1977       PetscCall(VecScatterDestroy(&mumps->scat_sol));
1978       PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc));
1979       PetscCall(VecDestroy(&mumps->x_seq));
1980     }
1981     lsol_loc = mumps->id.INFO(23); /* length of sol_loc */
1982     PetscCall(PetscMalloc2(lsol_loc, &sol_loc, lsol_loc, &mumps->id.isol_loc));
1983     mumps->id.lsol_loc = lsol_loc;
1984     mumps->id.sol_loc  = (MumpsScalar *)sol_loc;
1985     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, lsol_loc, sol_loc, &mumps->x_seq));
1986   }
1987   PetscCall(PetscLogFlops(mumps->id.RINFO(2)));
1988   PetscFunctionReturn(PETSC_SUCCESS);
1989 }
1990 
1991 /* Sets MUMPS options from the options database */
1992 static PetscErrorCode MatSetFromOptions_MUMPS(Mat F, Mat A)
1993 {
1994   Mat_MUMPS    *mumps = (Mat_MUMPS *)F->data;
1995   PetscMUMPSInt icntl = 0, size, *listvar_schur;
1996   PetscInt      info[80], i, ninfo = 80, rbs, cbs;
1997   PetscBool     flg = PETSC_FALSE, schur = (PetscBool)(mumps->id.ICNTL(26) == -1);
1998   MumpsScalar  *arr;
1999 
2000   PetscFunctionBegin;
2001   PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "MUMPS Options", "Mat");
2002   if (mumps->id.job == JOB_NULL) { /* MatSetFromOptions_MUMPS() has never been called before */
2003     PetscInt nthreads   = 0;
2004     PetscInt nCNTL_pre  = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0;
2005     PetscInt nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0;
2006 
2007     mumps->petsc_comm = PetscObjectComm((PetscObject)A);
2008     PetscCallMPI(MPI_Comm_size(mumps->petsc_comm, &mumps->petsc_size));
2009     PetscCallMPI(MPI_Comm_rank(mumps->petsc_comm, &mumps->myid)); /* "if (!myid)" still works even if mumps_comm is different */
2010 
2011     PetscCall(PetscOptionsName("-mat_mumps_use_omp_threads", "Convert MPI processes into OpenMP threads", "None", &mumps->use_petsc_omp_support));
2012     if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */
2013     /* do not use PetscOptionsInt() so that the option -mat_mumps_use_omp_threads is not displayed twice in the help */
2014     PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)F)->prefix, "-mat_mumps_use_omp_threads", &nthreads, NULL));
2015     if (mumps->use_petsc_omp_support) {
2016       PetscCheck(PetscDefined(HAVE_OPENMP_SUPPORT), PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "The system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual",
2017                  ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : "");
2018       PetscCheck(!schur, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use -%smat_mumps_use_omp_threads with the Schur complement feature", ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : "");
2019 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
2020       PetscCall(PetscOmpCtrlCreate(mumps->petsc_comm, nthreads, &mumps->omp_ctrl));
2021       PetscCall(PetscOmpCtrlGetOmpComms(mumps->omp_ctrl, &mumps->omp_comm, &mumps->mumps_comm, &mumps->is_omp_master));
2022 #endif
2023     } else {
2024       mumps->omp_comm      = PETSC_COMM_SELF;
2025       mumps->mumps_comm    = mumps->petsc_comm;
2026       mumps->is_omp_master = PETSC_TRUE;
2027     }
2028     PetscCallMPI(MPI_Comm_size(mumps->omp_comm, &mumps->omp_comm_size));
2029     mumps->reqs = NULL;
2030     mumps->tag  = 0;
2031 
2032     if (mumps->mumps_comm != MPI_COMM_NULL) {
2033       if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) {
2034         /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */
2035         MPI_Comm comm;
2036         PetscCallMPI(MPI_Comm_dup(mumps->mumps_comm, &comm));
2037         mumps->mumps_comm = comm;
2038       } else PetscCall(PetscCommGetComm(mumps->petsc_comm, &mumps->mumps_comm));
2039     }
2040 
2041     mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm);
2042     mumps->id.job          = JOB_INIT;
2043     mumps->id.par          = 1; /* host participates factorizaton and solve */
2044     mumps->id.sym          = mumps->sym;
2045 
2046     size          = mumps->id.size_schur;
2047     arr           = mumps->id.schur;
2048     listvar_schur = mumps->id.listvar_schur;
2049     PetscMUMPS_c(mumps);
2050     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
2051     /* restore cached ICNTL and CNTL values */
2052     for (icntl = 0; icntl < nICNTL_pre; ++icntl) mumps->id.ICNTL(mumps->ICNTL_pre[1 + 2 * icntl]) = mumps->ICNTL_pre[2 + 2 * icntl];
2053     for (icntl = 0; icntl < nCNTL_pre; ++icntl) mumps->id.CNTL((PetscInt)mumps->CNTL_pre[1 + 2 * icntl]) = mumps->CNTL_pre[2 + 2 * icntl];
2054     PetscCall(PetscFree(mumps->ICNTL_pre));
2055     PetscCall(PetscFree(mumps->CNTL_pre));
2056 
2057     if (schur) {
2058       mumps->id.size_schur    = size;
2059       mumps->id.schur_lld     = size;
2060       mumps->id.schur         = arr;
2061       mumps->id.listvar_schur = listvar_schur;
2062       if (mumps->petsc_size > 1) {
2063         PetscBool gs; /* gs is false if any rank other than root has non-empty IS */
2064 
2065         mumps->id.ICNTL(19) = 1;                                                                            /* MUMPS returns Schur centralized on the host */
2066         gs                  = mumps->myid ? (mumps->id.size_schur ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */
2067         PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &gs, 1, MPIU_BOOL, MPI_LAND, mumps->petsc_comm));
2068         PetscCheck(gs, PETSC_COMM_SELF, PETSC_ERR_SUP, "MUMPS distributed parallel Schur complements not yet supported from PETSc");
2069       } else {
2070         if (F->factortype == MAT_FACTOR_LU) {
2071           mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */
2072         } else {
2073           mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */
2074         }
2075       }
2076       mumps->id.ICNTL(26) = -1;
2077     }
2078 
2079     /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code.
2080        For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS.
2081      */
2082     PetscCallMPI(MPI_Bcast(mumps->id.icntl, 40, MPI_INT, 0, mumps->omp_comm));
2083     PetscCallMPI(MPI_Bcast(mumps->id.cntl, 15, MPIU_REAL, 0, mumps->omp_comm));
2084 
2085     mumps->scat_rhs = NULL;
2086     mumps->scat_sol = NULL;
2087 
2088     /* set PETSc-MUMPS default options - override MUMPS default */
2089     mumps->id.ICNTL(3) = 0;
2090     mumps->id.ICNTL(4) = 0;
2091     if (mumps->petsc_size == 1) {
2092       mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */
2093       mumps->id.ICNTL(7)  = 7; /* automatic choice of ordering done by the package */
2094     } else {
2095       mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */
2096       mumps->id.ICNTL(21) = 1; /* distributed solution */
2097     }
2098   }
2099   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_1", "ICNTL(1): output stream for error messages", "None", mumps->id.ICNTL(1), &icntl, &flg));
2100   if (flg) mumps->id.ICNTL(1) = icntl;
2101   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_2", "ICNTL(2): output stream for diagnostic printing, statistics, and warning", "None", mumps->id.ICNTL(2), &icntl, &flg));
2102   if (flg) mumps->id.ICNTL(2) = icntl;
2103   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_3", "ICNTL(3): output stream for global information, collected on the host", "None", mumps->id.ICNTL(3), &icntl, &flg));
2104   if (flg) mumps->id.ICNTL(3) = icntl;
2105 
2106   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_4", "ICNTL(4): level of printing (0 to 4)", "None", mumps->id.ICNTL(4), &icntl, &flg));
2107   if (flg) mumps->id.ICNTL(4) = icntl;
2108   if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */
2109 
2110   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_6", "ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)", "None", mumps->id.ICNTL(6), &icntl, &flg));
2111   if (flg) mumps->id.ICNTL(6) = icntl;
2112 
2113   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_7", "ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)", "None", mumps->id.ICNTL(7), &icntl, &flg));
2114   if (flg) {
2115     PetscCheck(icntl != 1 && icntl >= 0 && icntl <= 7, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto");
2116     mumps->id.ICNTL(7) = icntl;
2117   }
2118 
2119   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_8", "ICNTL(8): scaling strategy (-2 to 8 or 77)", "None", mumps->id.ICNTL(8), &mumps->id.ICNTL(8), NULL));
2120   /* PetscCall(PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL)); handled by MatSolveTranspose_MUMPS() */
2121   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_10", "ICNTL(10): max num of refinements", "None", mumps->id.ICNTL(10), &mumps->id.ICNTL(10), NULL));
2122   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_11", "ICNTL(11): statistics related to an error analysis (via -ksp_view)", "None", mumps->id.ICNTL(11), &mumps->id.ICNTL(11), NULL));
2123   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_12", "ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)", "None", mumps->id.ICNTL(12), &mumps->id.ICNTL(12), NULL));
2124   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_13", "ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting", "None", mumps->id.ICNTL(13), &mumps->id.ICNTL(13), NULL));
2125   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_14", "ICNTL(14): percentage increase in the estimated working space", "None", mumps->id.ICNTL(14), &mumps->id.ICNTL(14), NULL));
2126   PetscCall(MatGetBlockSizes(A, &rbs, &cbs));
2127   if (rbs == cbs && rbs > 1) mumps->id.ICNTL(15) = -rbs;
2128   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_15", "ICNTL(15): compression of the input matrix resulting from a block format", "None", mumps->id.ICNTL(15), &mumps->id.ICNTL(15), &flg));
2129   if (flg) {
2130     PetscCheck(mumps->id.ICNTL(15) <= 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Positive -mat_mumps_icntl_15 not handled");
2131     PetscCheck((-mumps->id.ICNTL(15) % cbs == 0) && (-mumps->id.ICNTL(15) % rbs == 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "The opposite of -mat_mumps_icntl_15 must be a multiple of the column and row blocksizes");
2132   }
2133   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_19", "ICNTL(19): computes the Schur complement", "None", mumps->id.ICNTL(19), &mumps->id.ICNTL(19), NULL));
2134   if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */
2135     PetscCall(MatDestroy(&F->schur));
2136     PetscCall(MatMumpsResetSchur_Private(mumps));
2137   }
2138 
2139   /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps",
2140      and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran
2141      and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...'
2142      This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug,
2143      see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590.
2144      In short, we could not use distributed RHS with MPICH until v4.0b1.
2145    */
2146 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) || (defined(PETSC_HAVE_MPICH_NUMVERSION) && (PETSC_HAVE_MPICH_NUMVERSION < 40000101))
2147   mumps->ICNTL20 = 0; /* Centralized dense RHS*/
2148 #else
2149   mumps->ICNTL20     = 10; /* Distributed dense RHS*/
2150 #endif
2151   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_20", "ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides", "None", mumps->ICNTL20, &mumps->ICNTL20, &flg));
2152   PetscCheck(!flg || mumps->ICNTL20 == 10 || mumps->ICNTL20 == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10", (int)mumps->ICNTL20);
2153 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0)
2154   PetscCheck(!flg || mumps->ICNTL20 != 10, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=10 is not supported before MUMPS-5.3.0");
2155 #endif
2156   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL)); we only use distributed solution vector */
2157 
2158   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_22", "ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)", "None", mumps->id.ICNTL(22), &mumps->id.ICNTL(22), NULL));
2159   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_23", "ICNTL(23): max size of the working memory (MB) that can allocate per processor", "None", mumps->id.ICNTL(23), &mumps->id.ICNTL(23), NULL));
2160   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_24", "ICNTL(24): detection of null pivot rows (0 or 1)", "None", mumps->id.ICNTL(24), &mumps->id.ICNTL(24), NULL));
2161   if (mumps->id.ICNTL(24)) { mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ }
2162 
2163   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_25", "ICNTL(25): computes a solution of a deficient matrix and a null space basis", "None", mumps->id.ICNTL(25), &mumps->id.ICNTL(25), NULL));
2164   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_26", "ICNTL(26): drives the solution phase if a Schur complement matrix", "None", mumps->id.ICNTL(26), &mumps->id.ICNTL(26), NULL));
2165   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_27", "ICNTL(27): controls the blocking size for multiple right-hand sides", "None", mumps->id.ICNTL(27), &mumps->id.ICNTL(27), NULL));
2166   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_28", "ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering", "None", mumps->id.ICNTL(28), &mumps->id.ICNTL(28), NULL));
2167   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_29", "ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis", "None", mumps->id.ICNTL(29), &mumps->id.ICNTL(29), NULL));
2168   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL)); */ /* call MatMumpsGetInverse() directly */
2169   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_31", "ICNTL(31): indicates which factors may be discarded during factorization", "None", mumps->id.ICNTL(31), &mumps->id.ICNTL(31), NULL));
2170   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elimination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL));  -- not supported by PETSc API */
2171   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_33", "ICNTL(33): compute determinant", "None", mumps->id.ICNTL(33), &mumps->id.ICNTL(33), NULL));
2172   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_35", "ICNTL(35): activates Block Low Rank (BLR) based factorization", "None", mumps->id.ICNTL(35), &mumps->id.ICNTL(35), NULL));
2173   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_36", "ICNTL(36): choice of BLR factorization variant", "None", mumps->id.ICNTL(36), &mumps->id.ICNTL(36), NULL));
2174   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_38", "ICNTL(38): estimated compression rate of LU factors with BLR", "None", mumps->id.ICNTL(38), &mumps->id.ICNTL(38), NULL));
2175   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_58", "ICNTL(58): defines options for symbolic factorization", "None", mumps->id.ICNTL(58), &mumps->id.ICNTL(58), NULL));
2176 
2177   PetscCall(PetscOptionsReal("-mat_mumps_cntl_1", "CNTL(1): relative pivoting threshold", "None", mumps->id.CNTL(1), &mumps->id.CNTL(1), NULL));
2178   PetscCall(PetscOptionsReal("-mat_mumps_cntl_2", "CNTL(2): stopping criterion of refinement", "None", mumps->id.CNTL(2), &mumps->id.CNTL(2), NULL));
2179   PetscCall(PetscOptionsReal("-mat_mumps_cntl_3", "CNTL(3): absolute pivoting threshold", "None", mumps->id.CNTL(3), &mumps->id.CNTL(3), NULL));
2180   PetscCall(PetscOptionsReal("-mat_mumps_cntl_4", "CNTL(4): value for static pivoting", "None", mumps->id.CNTL(4), &mumps->id.CNTL(4), NULL));
2181   PetscCall(PetscOptionsReal("-mat_mumps_cntl_5", "CNTL(5): fixation for null pivots", "None", mumps->id.CNTL(5), &mumps->id.CNTL(5), NULL));
2182   PetscCall(PetscOptionsReal("-mat_mumps_cntl_7", "CNTL(7): dropping parameter used during BLR", "None", mumps->id.CNTL(7), &mumps->id.CNTL(7), NULL));
2183 
2184   PetscCall(PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL));
2185 
2186   PetscCall(PetscOptionsIntArray("-mat_mumps_view_info", "request INFO local to each processor", "", info, &ninfo, NULL));
2187   if (ninfo) {
2188     PetscCheck(ninfo <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "number of INFO %" PetscInt_FMT " must <= 80", ninfo);
2189     PetscCall(PetscMalloc1(ninfo, &mumps->info));
2190     mumps->ninfo = ninfo;
2191     for (i = 0; i < ninfo; i++) {
2192       PetscCheck(info[i] >= 0 && info[i] <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "index of INFO %" PetscInt_FMT " must between 1 and 80", ninfo);
2193       mumps->info[i] = info[i];
2194     }
2195   }
2196   PetscOptionsEnd();
2197   PetscFunctionReturn(PETSC_SUCCESS);
2198 }
2199 
2200 static PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F, Mat A, const MatFactorInfo *info, Mat_MUMPS *mumps)
2201 {
2202   PetscFunctionBegin;
2203   if (mumps->id.INFOG(1) < 0) {
2204     PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in analysis: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
2205     if (mumps->id.INFOG(1) == -6) {
2206       PetscCall(PetscInfo(F, "MUMPS error in analysis: matrix is singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2207       F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT;
2208     } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) {
2209       PetscCall(PetscInfo(F, "MUMPS error in analysis: problem with work array, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2210       F->factorerrortype = MAT_FACTOR_OUTMEMORY;
2211     } else if (mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0) {
2212       PetscCall(PetscInfo(F, "MUMPS error in analysis: empty matrix\n"));
2213     } else {
2214       PetscCall(PetscInfo(F, "MUMPS error in analysis: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS "\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2215       F->factorerrortype = MAT_FACTOR_OTHER;
2216     }
2217   }
2218   PetscFunctionReturn(PETSC_SUCCESS);
2219 }
2220 
2221 static PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info)
2222 {
2223   Mat_MUMPS     *mumps = (Mat_MUMPS *)F->data;
2224   Vec            b;
2225   const PetscInt M = A->rmap->N;
2226 
2227   PetscFunctionBegin;
2228   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
2229     /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */
2230     PetscFunctionReturn(PETSC_SUCCESS);
2231   }
2232 
2233   /* Set MUMPS options from the options database */
2234   PetscCall(MatSetFromOptions_MUMPS(F, A));
2235 
2236   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
2237   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps));
2238 
2239   /* analysis phase */
2240   mumps->id.job = JOB_FACTSYMBOLIC;
2241   mumps->id.n   = M;
2242   switch (mumps->id.ICNTL(18)) {
2243   case 0: /* centralized assembled matrix input */
2244     if (!mumps->myid) {
2245       mumps->id.nnz = mumps->nnz;
2246       mumps->id.irn = mumps->irn;
2247       mumps->id.jcn = mumps->jcn;
2248       if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val;
2249       if (r) {
2250         mumps->id.ICNTL(7) = 1;
2251         if (!mumps->myid) {
2252           const PetscInt *idx;
2253           PetscInt        i;
2254 
2255           PetscCall(PetscMalloc1(M, &mumps->id.perm_in));
2256           PetscCall(ISGetIndices(r, &idx));
2257           for (i = 0; i < M; i++) PetscCall(PetscMUMPSIntCast(idx[i] + 1, &(mumps->id.perm_in[i]))); /* perm_in[]: start from 1, not 0! */
2258           PetscCall(ISRestoreIndices(r, &idx));
2259         }
2260       }
2261     }
2262     break;
2263   case 3: /* distributed assembled matrix input (size>1) */
2264     mumps->id.nnz_loc = mumps->nnz;
2265     mumps->id.irn_loc = mumps->irn;
2266     mumps->id.jcn_loc = mumps->jcn;
2267     if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val;
2268     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2269       PetscCall(MatCreateVecs(A, NULL, &b));
2270       PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq));
2271       PetscCall(VecDestroy(&b));
2272     }
2273     break;
2274   }
2275   PetscMUMPS_c(mumps);
2276   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps));
2277 
2278   F->ops->lufactornumeric   = MatFactorNumeric_MUMPS;
2279   F->ops->solve             = MatSolve_MUMPS;
2280   F->ops->solvetranspose    = MatSolveTranspose_MUMPS;
2281   F->ops->matsolve          = MatMatSolve_MUMPS;
2282   F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS;
2283   F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS;
2284 
2285   mumps->matstruc = SAME_NONZERO_PATTERN;
2286   PetscFunctionReturn(PETSC_SUCCESS);
2287 }
2288 
2289 /* Note the Petsc r and c permutations are ignored */
2290 static PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info)
2291 {
2292   Mat_MUMPS     *mumps = (Mat_MUMPS *)F->data;
2293   Vec            b;
2294   const PetscInt M = A->rmap->N;
2295 
2296   PetscFunctionBegin;
2297   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
2298     /* F is assembled by a previous call of MatLUFactorSymbolic_BAIJMUMPS() */
2299     PetscFunctionReturn(PETSC_SUCCESS);
2300   }
2301 
2302   /* Set MUMPS options from the options database */
2303   PetscCall(MatSetFromOptions_MUMPS(F, A));
2304 
2305   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
2306   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps));
2307 
2308   /* analysis phase */
2309   mumps->id.job = JOB_FACTSYMBOLIC;
2310   mumps->id.n   = M;
2311   switch (mumps->id.ICNTL(18)) {
2312   case 0: /* centralized assembled matrix input */
2313     if (!mumps->myid) {
2314       mumps->id.nnz = mumps->nnz;
2315       mumps->id.irn = mumps->irn;
2316       mumps->id.jcn = mumps->jcn;
2317       if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val;
2318     }
2319     break;
2320   case 3: /* distributed assembled matrix input (size>1) */
2321     mumps->id.nnz_loc = mumps->nnz;
2322     mumps->id.irn_loc = mumps->irn;
2323     mumps->id.jcn_loc = mumps->jcn;
2324     if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val;
2325     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2326       PetscCall(MatCreateVecs(A, NULL, &b));
2327       PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq));
2328       PetscCall(VecDestroy(&b));
2329     }
2330     break;
2331   }
2332   PetscMUMPS_c(mumps);
2333   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps));
2334 
2335   F->ops->lufactornumeric   = MatFactorNumeric_MUMPS;
2336   F->ops->solve             = MatSolve_MUMPS;
2337   F->ops->solvetranspose    = MatSolveTranspose_MUMPS;
2338   F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS;
2339 
2340   mumps->matstruc = SAME_NONZERO_PATTERN;
2341   PetscFunctionReturn(PETSC_SUCCESS);
2342 }
2343 
2344 /* Note the Petsc r permutation and factor info are ignored */
2345 static PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F, Mat A, IS r, const MatFactorInfo *info)
2346 {
2347   Mat_MUMPS     *mumps = (Mat_MUMPS *)F->data;
2348   Vec            b;
2349   const PetscInt M = A->rmap->N;
2350 
2351   PetscFunctionBegin;
2352   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
2353     /* F is assembled by a previous call of MatCholeskyFactorSymbolic_MUMPS() */
2354     PetscFunctionReturn(PETSC_SUCCESS);
2355   }
2356 
2357   /* Set MUMPS options from the options database */
2358   PetscCall(MatSetFromOptions_MUMPS(F, A));
2359 
2360   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
2361   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps));
2362 
2363   /* analysis phase */
2364   mumps->id.job = JOB_FACTSYMBOLIC;
2365   mumps->id.n   = M;
2366   switch (mumps->id.ICNTL(18)) {
2367   case 0: /* centralized assembled matrix input */
2368     if (!mumps->myid) {
2369       mumps->id.nnz = mumps->nnz;
2370       mumps->id.irn = mumps->irn;
2371       mumps->id.jcn = mumps->jcn;
2372       if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val;
2373     }
2374     break;
2375   case 3: /* distributed assembled matrix input (size>1) */
2376     mumps->id.nnz_loc = mumps->nnz;
2377     mumps->id.irn_loc = mumps->irn;
2378     mumps->id.jcn_loc = mumps->jcn;
2379     if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val;
2380     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2381       PetscCall(MatCreateVecs(A, NULL, &b));
2382       PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq));
2383       PetscCall(VecDestroy(&b));
2384     }
2385     break;
2386   }
2387   PetscMUMPS_c(mumps);
2388   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps));
2389 
2390   F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS;
2391   F->ops->solve                 = MatSolve_MUMPS;
2392   F->ops->solvetranspose        = MatSolve_MUMPS;
2393   F->ops->matsolve              = MatMatSolve_MUMPS;
2394   F->ops->mattransposesolve     = MatMatTransposeSolve_MUMPS;
2395   F->ops->matsolvetranspose     = MatMatSolveTranspose_MUMPS;
2396 #if defined(PETSC_USE_COMPLEX)
2397   F->ops->getinertia = NULL;
2398 #else
2399   F->ops->getinertia = MatGetInertia_SBAIJMUMPS;
2400 #endif
2401 
2402   mumps->matstruc = SAME_NONZERO_PATTERN;
2403   PetscFunctionReturn(PETSC_SUCCESS);
2404 }
2405 
2406 static PetscErrorCode MatView_MUMPS(Mat A, PetscViewer viewer)
2407 {
2408   PetscBool         iascii;
2409   PetscViewerFormat format;
2410   Mat_MUMPS        *mumps = (Mat_MUMPS *)A->data;
2411 
2412   PetscFunctionBegin;
2413   /* check if matrix is mumps type */
2414   if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(PETSC_SUCCESS);
2415 
2416   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
2417   if (iascii) {
2418     PetscCall(PetscViewerGetFormat(viewer, &format));
2419     if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
2420       PetscCall(PetscViewerASCIIPrintf(viewer, "MUMPS run parameters:\n"));
2421       if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
2422         PetscCall(PetscViewerASCIIPrintf(viewer, "  SYM (matrix type):                   %d\n", mumps->id.sym));
2423         PetscCall(PetscViewerASCIIPrintf(viewer, "  PAR (host participation):            %d\n", mumps->id.par));
2424         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(1) (output for error):         %d\n", mumps->id.ICNTL(1)));
2425         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(2) (output of diagnostic msg): %d\n", mumps->id.ICNTL(2)));
2426         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(3) (output for global info):   %d\n", mumps->id.ICNTL(3)));
2427         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(4) (level of printing):        %d\n", mumps->id.ICNTL(4)));
2428         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(5) (input mat struct):         %d\n", mumps->id.ICNTL(5)));
2429         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(6) (matrix prescaling):        %d\n", mumps->id.ICNTL(6)));
2430         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(7) (sequential matrix ordering):%d\n", mumps->id.ICNTL(7)));
2431         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(8) (scaling strategy):         %d\n", mumps->id.ICNTL(8)));
2432         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(10) (max num of refinements):  %d\n", mumps->id.ICNTL(10)));
2433         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(11) (error analysis):          %d\n", mumps->id.ICNTL(11)));
2434         if (mumps->id.ICNTL(11) > 0) {
2435           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(4) (inf norm of input mat):        %g\n", mumps->id.RINFOG(4)));
2436           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(5) (inf norm of solution):         %g\n", mumps->id.RINFOG(5)));
2437           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(6) (inf norm of residual):         %g\n", mumps->id.RINFOG(6)));
2438           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(7),RINFOG(8) (backward error est): %g, %g\n", mumps->id.RINFOG(7), mumps->id.RINFOG(8)));
2439           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(9) (error estimate):               %g\n", mumps->id.RINFOG(9)));
2440           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n", mumps->id.RINFOG(10), mumps->id.RINFOG(11)));
2441         }
2442         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(12) (efficiency control):                         %d\n", mumps->id.ICNTL(12)));
2443         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(13) (sequential factorization of the root node):  %d\n", mumps->id.ICNTL(13)));
2444         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(14) (percentage of estimated workspace increase): %d\n", mumps->id.ICNTL(14)));
2445         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(15) (compression of the input matrix):            %d\n", mumps->id.ICNTL(15)));
2446         /* ICNTL(15-17) not used */
2447         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(18) (input mat struct):                           %d\n", mumps->id.ICNTL(18)));
2448         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(19) (Schur complement info):                      %d\n", mumps->id.ICNTL(19)));
2449         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(20) (RHS sparse pattern):                         %d\n", mumps->id.ICNTL(20)));
2450         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(21) (solution struct):                            %d\n", mumps->id.ICNTL(21)));
2451         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(22) (in-core/out-of-core facility):               %d\n", mumps->id.ICNTL(22)));
2452         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(23) (max size of memory can be allocated locally):%d\n", mumps->id.ICNTL(23)));
2453 
2454         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(24) (detection of null pivot rows):               %d\n", mumps->id.ICNTL(24)));
2455         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(25) (computation of a null space basis):          %d\n", mumps->id.ICNTL(25)));
2456         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(26) (Schur options for RHS or solution):          %d\n", mumps->id.ICNTL(26)));
2457         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(27) (blocking size for multiple RHS):             %d\n", mumps->id.ICNTL(27)));
2458         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(28) (use parallel or sequential ordering):        %d\n", mumps->id.ICNTL(28)));
2459         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(29) (parallel ordering):                          %d\n", mumps->id.ICNTL(29)));
2460 
2461         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(30) (user-specified set of entries in inv(A)):    %d\n", mumps->id.ICNTL(30)));
2462         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(31) (factors is discarded in the solve phase):    %d\n", mumps->id.ICNTL(31)));
2463         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(33) (compute determinant):                        %d\n", mumps->id.ICNTL(33)));
2464         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(35) (activate BLR based factorization):           %d\n", mumps->id.ICNTL(35)));
2465         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(36) (choice of BLR factorization variant):        %d\n", mumps->id.ICNTL(36)));
2466         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(38) (estimated compression rate of LU factors):   %d\n", mumps->id.ICNTL(38)));
2467         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(58) (options for symbolic factorization):         %d\n", mumps->id.ICNTL(58)));
2468 
2469         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(1) (relative pivoting threshold):      %g\n", mumps->id.CNTL(1)));
2470         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(2) (stopping criterion of refinement): %g\n", mumps->id.CNTL(2)));
2471         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(3) (absolute pivoting threshold):      %g\n", mumps->id.CNTL(3)));
2472         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(4) (value of static pivoting):         %g\n", mumps->id.CNTL(4)));
2473         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(5) (fixation for null pivots):         %g\n", mumps->id.CNTL(5)));
2474         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(7) (dropping parameter for BLR):       %g\n", mumps->id.CNTL(7)));
2475 
2476         /* information local to each processor */
2477         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(1) (local estimated flops for the elimination after analysis):\n"));
2478         PetscCall(PetscViewerASCIIPushSynchronized(viewer));
2479         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %g\n", mumps->myid, mumps->id.RINFO(1)));
2480         PetscCall(PetscViewerFlush(viewer));
2481         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(2) (local estimated flops for the assembly after factorization):\n"));
2482         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %g\n", mumps->myid, mumps->id.RINFO(2)));
2483         PetscCall(PetscViewerFlush(viewer));
2484         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(3) (local estimated flops for the elimination after factorization):\n"));
2485         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %g\n", mumps->myid, mumps->id.RINFO(3)));
2486         PetscCall(PetscViewerFlush(viewer));
2487 
2488         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization):\n"));
2489         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(15)));
2490         PetscCall(PetscViewerFlush(viewer));
2491 
2492         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization):\n"));
2493         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(16)));
2494         PetscCall(PetscViewerFlush(viewer));
2495 
2496         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(23) (num of pivots eliminated on this processor after factorization):\n"));
2497         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(23)));
2498         PetscCall(PetscViewerFlush(viewer));
2499 
2500         if (mumps->ninfo && mumps->ninfo <= 80) {
2501           PetscInt i;
2502           for (i = 0; i < mumps->ninfo; i++) {
2503             PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(%" PetscInt_FMT "):\n", mumps->info[i]));
2504             PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(mumps->info[i])));
2505             PetscCall(PetscViewerFlush(viewer));
2506           }
2507         }
2508         PetscCall(PetscViewerASCIIPopSynchronized(viewer));
2509       } else PetscCall(PetscViewerASCIIPrintf(viewer, "  Use -%sksp_view ::ascii_info_detail to display information for all processes\n", ((PetscObject)A)->prefix ? ((PetscObject)A)->prefix : ""));
2510 
2511       if (mumps->myid == 0) { /* information from the host */
2512         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFOG(1) (global estimated flops for the elimination after analysis): %g\n", mumps->id.RINFOG(1)));
2513         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFOG(2) (global estimated flops for the assembly after factorization): %g\n", mumps->id.RINFOG(2)));
2514         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFOG(3) (global estimated flops for the elimination after factorization): %g\n", mumps->id.RINFOG(3)));
2515         PetscCall(PetscViewerASCIIPrintf(viewer, "  (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n", mumps->id.RINFOG(12), mumps->id.RINFOG(13), mumps->id.INFOG(34)));
2516 
2517         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(3)));
2518         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(4)));
2519         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(5) (estimated maximum front size in the complete tree): %d\n", mumps->id.INFOG(5)));
2520         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(6) (number of nodes in the complete tree): %d\n", mumps->id.INFOG(6)));
2521         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(7) (ordering option effectively used after analysis): %d\n", mumps->id.INFOG(7)));
2522         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n", mumps->id.INFOG(8)));
2523         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n", mumps->id.INFOG(9)));
2524         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(10) (total integer space store the matrix factors after factorization): %d\n", mumps->id.INFOG(10)));
2525         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(11) (order of largest frontal matrix after factorization): %d\n", mumps->id.INFOG(11)));
2526         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(12) (number of off-diagonal pivots): %d\n", mumps->id.INFOG(12)));
2527         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(13) (number of delayed pivots after factorization): %d\n", mumps->id.INFOG(13)));
2528         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(14) (number of memory compress after factorization): %d\n", mumps->id.INFOG(14)));
2529         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(15) (number of steps of iterative refinement after solution): %d\n", mumps->id.INFOG(15)));
2530         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n", mumps->id.INFOG(16)));
2531         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n", mumps->id.INFOG(17)));
2532         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n", mumps->id.INFOG(18)));
2533         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n", mumps->id.INFOG(19)));
2534         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(20) (estimated number of entries in the factors): %d\n", mumps->id.INFOG(20)));
2535         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n", mumps->id.INFOG(21)));
2536         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n", mumps->id.INFOG(22)));
2537         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n", mumps->id.INFOG(23)));
2538         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n", mumps->id.INFOG(24)));
2539         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n", mumps->id.INFOG(25)));
2540         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(28) (after factorization: number of null pivots encountered): %d\n", mumps->id.INFOG(28)));
2541         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n", mumps->id.INFOG(29)));
2542         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n", mumps->id.INFOG(30), mumps->id.INFOG(31)));
2543         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(32) (after analysis: type of analysis done): %d\n", mumps->id.INFOG(32)));
2544         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(33) (value used for ICNTL(8)): %d\n", mumps->id.INFOG(33)));
2545         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(34) (exponent of the determinant if determinant is requested): %d\n", mumps->id.INFOG(34)));
2546         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n", mumps->id.INFOG(35)));
2547         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(36)));
2548         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n", mumps->id.INFOG(37)));
2549         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(38)));
2550         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n", mumps->id.INFOG(39)));
2551       }
2552     }
2553   }
2554   PetscFunctionReturn(PETSC_SUCCESS);
2555 }
2556 
2557 static PetscErrorCode MatGetInfo_MUMPS(Mat A, MatInfoType flag, MatInfo *info)
2558 {
2559   Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;
2560 
2561   PetscFunctionBegin;
2562   info->block_size        = 1.0;
2563   info->nz_allocated      = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20);
2564   info->nz_used           = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20);
2565   info->nz_unneeded       = 0.0;
2566   info->assemblies        = 0.0;
2567   info->mallocs           = 0.0;
2568   info->memory            = 0.0;
2569   info->fill_ratio_given  = 0;
2570   info->fill_ratio_needed = 0;
2571   info->factor_mallocs    = 0;
2572   PetscFunctionReturn(PETSC_SUCCESS);
2573 }
2574 
2575 static PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is)
2576 {
2577   Mat_MUMPS         *mumps = (Mat_MUMPS *)F->data;
2578   const PetscScalar *arr;
2579   const PetscInt    *idxs;
2580   PetscInt           size, i;
2581 
2582   PetscFunctionBegin;
2583   PetscCall(ISGetLocalSize(is, &size));
2584   /* Schur complement matrix */
2585   PetscCall(MatDestroy(&F->schur));
2586   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size, size, NULL, &F->schur));
2587   PetscCall(MatDenseGetArrayRead(F->schur, &arr));
2588   mumps->id.schur      = (MumpsScalar *)arr;
2589   mumps->id.size_schur = size;
2590   mumps->id.schur_lld  = size;
2591   PetscCall(MatDenseRestoreArrayRead(F->schur, &arr));
2592   if (mumps->sym == 1) PetscCall(MatSetOption(F->schur, MAT_SPD, PETSC_TRUE));
2593 
2594   /* MUMPS expects Fortran style indices */
2595   PetscCall(PetscFree(mumps->id.listvar_schur));
2596   PetscCall(PetscMalloc1(size, &mumps->id.listvar_schur));
2597   PetscCall(ISGetIndices(is, &idxs));
2598   for (i = 0; i < size; i++) PetscCall(PetscMUMPSIntCast(idxs[i] + 1, &(mumps->id.listvar_schur[i])));
2599   PetscCall(ISRestoreIndices(is, &idxs));
2600   /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */
2601   mumps->id.ICNTL(26) = -1;
2602   PetscFunctionReturn(PETSC_SUCCESS);
2603 }
2604 
2605 static PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F, Mat *S)
2606 {
2607   Mat          St;
2608   Mat_MUMPS   *mumps = (Mat_MUMPS *)F->data;
2609   PetscScalar *array;
2610 
2611   PetscFunctionBegin;
2612   PetscCheck(mumps->id.ICNTL(19), PetscObjectComm((PetscObject)F), PETSC_ERR_ORDER, "Schur complement mode not selected! Call MatFactorSetSchurIS() to enable it");
2613   PetscCall(MatCreate(PETSC_COMM_SELF, &St));
2614   PetscCall(MatSetSizes(St, PETSC_DECIDE, PETSC_DECIDE, mumps->id.size_schur, mumps->id.size_schur));
2615   PetscCall(MatSetType(St, MATDENSE));
2616   PetscCall(MatSetUp(St));
2617   PetscCall(MatDenseGetArray(St, &array));
2618   if (!mumps->sym) {                /* MUMPS always return a full matrix */
2619     if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
2620       PetscInt i, j, N = mumps->id.size_schur;
2621       for (i = 0; i < N; i++) {
2622         for (j = 0; j < N; j++) {
2623 #if !defined(PETSC_USE_COMPLEX)
2624           PetscScalar val = mumps->id.schur[i * N + j];
2625 #else
2626           PetscScalar val = mumps->id.schur[i * N + j].r + PETSC_i * mumps->id.schur[i * N + j].i;
2627 #endif
2628           array[j * N + i] = val;
2629         }
2630       }
2631     } else { /* stored by columns */
2632       PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur));
2633     }
2634   } else {                          /* either full or lower-triangular (not packed) */
2635     if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */
2636       PetscInt i, j, N = mumps->id.size_schur;
2637       for (i = 0; i < N; i++) {
2638         for (j = i; j < N; j++) {
2639 #if !defined(PETSC_USE_COMPLEX)
2640           PetscScalar val = mumps->id.schur[i * N + j];
2641 #else
2642           PetscScalar val = mumps->id.schur[i * N + j].r + PETSC_i * mumps->id.schur[i * N + j].i;
2643 #endif
2644           array[i * N + j] = array[j * N + i] = val;
2645         }
2646       }
2647     } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */
2648       PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur));
2649     } else { /* ICNTL(19) == 1 lower triangular stored by rows */
2650       PetscInt i, j, N = mumps->id.size_schur;
2651       for (i = 0; i < N; i++) {
2652         for (j = 0; j < i + 1; j++) {
2653 #if !defined(PETSC_USE_COMPLEX)
2654           PetscScalar val = mumps->id.schur[i * N + j];
2655 #else
2656           PetscScalar val = mumps->id.schur[i * N + j].r + PETSC_i * mumps->id.schur[i * N + j].i;
2657 #endif
2658           array[i * N + j] = array[j * N + i] = val;
2659         }
2660       }
2661     }
2662   }
2663   PetscCall(MatDenseRestoreArray(St, &array));
2664   *S = St;
2665   PetscFunctionReturn(PETSC_SUCCESS);
2666 }
2667 
2668 static PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt ival)
2669 {
2670   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2671 
2672   PetscFunctionBegin;
2673   if (mumps->id.job == JOB_NULL) {                                       /* need to cache icntl and ival since PetscMUMPS_c() has never been called */
2674     PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; /* number of already cached ICNTL */
2675     for (i = 0; i < nICNTL_pre; ++i)
2676       if (mumps->ICNTL_pre[1 + 2 * i] == icntl) break; /* is this ICNTL already cached? */
2677     if (i == nICNTL_pre) {                             /* not already cached */
2678       if (i > 0) PetscCall(PetscRealloc(sizeof(PetscMUMPSInt) * (2 * nICNTL_pre + 3), &mumps->ICNTL_pre));
2679       else PetscCall(PetscCalloc(sizeof(PetscMUMPSInt) * 3, &mumps->ICNTL_pre));
2680       mumps->ICNTL_pre[0]++;
2681     }
2682     mumps->ICNTL_pre[1 + 2 * i] = icntl;
2683     PetscCall(PetscMUMPSIntCast(ival, mumps->ICNTL_pre + 2 + 2 * i));
2684   } else PetscCall(PetscMUMPSIntCast(ival, &mumps->id.ICNTL(icntl)));
2685   PetscFunctionReturn(PETSC_SUCCESS);
2686 }
2687 
2688 static PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt *ival)
2689 {
2690   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2691 
2692   PetscFunctionBegin;
2693   if (mumps->id.job == JOB_NULL) {
2694     PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0;
2695     *ival = 0;
2696     for (i = 0; i < nICNTL_pre; ++i) {
2697       if (mumps->ICNTL_pre[1 + 2 * i] == icntl) *ival = mumps->ICNTL_pre[2 + 2 * i];
2698     }
2699   } else *ival = mumps->id.ICNTL(icntl);
2700   PetscFunctionReturn(PETSC_SUCCESS);
2701 }
2702 
2703 /*@
2704   MatMumpsSetIcntl - Set MUMPS parameter ICNTL()
2705 
2706   Logically Collective
2707 
2708   Input Parameters:
2709 + F     - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2710 . icntl - index of MUMPS parameter array ICNTL()
2711 - ival  - value of MUMPS ICNTL(icntl)
2712 
2713   Options Database Key:
2714 . -mat_mumps_icntl_<icntl> <ival> - change the option numbered icntl to ival
2715 
2716   Level: beginner
2717 
2718   References:
2719 .  * - MUMPS Users' Guide
2720 
2721 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2722 @*/
2723 PetscErrorCode MatMumpsSetIcntl(Mat F, PetscInt icntl, PetscInt ival)
2724 {
2725   PetscFunctionBegin;
2726   PetscValidType(F, 1);
2727   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2728   PetscValidLogicalCollectiveInt(F, icntl, 2);
2729   PetscValidLogicalCollectiveInt(F, ival, 3);
2730   PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl);
2731   PetscTryMethod(F, "MatMumpsSetIcntl_C", (Mat, PetscInt, PetscInt), (F, icntl, ival));
2732   PetscFunctionReturn(PETSC_SUCCESS);
2733 }
2734 
2735 /*@
2736   MatMumpsGetIcntl - Get MUMPS parameter ICNTL()
2737 
2738   Logically Collective
2739 
2740   Input Parameters:
2741 + F     - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2742 - icntl - index of MUMPS parameter array ICNTL()
2743 
2744   Output Parameter:
2745 . ival - value of MUMPS ICNTL(icntl)
2746 
2747   Level: beginner
2748 
2749   References:
2750 .  * - MUMPS Users' Guide
2751 
2752 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2753 @*/
2754 PetscErrorCode MatMumpsGetIcntl(Mat F, PetscInt icntl, PetscInt *ival)
2755 {
2756   PetscFunctionBegin;
2757   PetscValidType(F, 1);
2758   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2759   PetscValidLogicalCollectiveInt(F, icntl, 2);
2760   PetscAssertPointer(ival, 3);
2761   PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl);
2762   PetscUseMethod(F, "MatMumpsGetIcntl_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival));
2763   PetscFunctionReturn(PETSC_SUCCESS);
2764 }
2765 
2766 static PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal val)
2767 {
2768   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2769 
2770   PetscFunctionBegin;
2771   if (mumps->id.job == JOB_NULL) {
2772     PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0;
2773     for (i = 0; i < nCNTL_pre; ++i)
2774       if (mumps->CNTL_pre[1 + 2 * i] == icntl) break;
2775     if (i == nCNTL_pre) {
2776       if (i > 0) PetscCall(PetscRealloc(sizeof(PetscReal) * (2 * nCNTL_pre + 3), &mumps->CNTL_pre));
2777       else PetscCall(PetscCalloc(sizeof(PetscReal) * 3, &mumps->CNTL_pre));
2778       mumps->CNTL_pre[0]++;
2779     }
2780     mumps->CNTL_pre[1 + 2 * i] = icntl;
2781     mumps->CNTL_pre[2 + 2 * i] = val;
2782   } else mumps->id.CNTL(icntl) = val;
2783   PetscFunctionReturn(PETSC_SUCCESS);
2784 }
2785 
2786 static PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal *val)
2787 {
2788   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2789 
2790   PetscFunctionBegin;
2791   if (mumps->id.job == JOB_NULL) {
2792     PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0;
2793     *val = 0.0;
2794     for (i = 0; i < nCNTL_pre; ++i) {
2795       if (mumps->CNTL_pre[1 + 2 * i] == icntl) *val = mumps->CNTL_pre[2 + 2 * i];
2796     }
2797   } else *val = mumps->id.CNTL(icntl);
2798   PetscFunctionReturn(PETSC_SUCCESS);
2799 }
2800 
2801 /*@
2802   MatMumpsSetCntl - Set MUMPS parameter CNTL()
2803 
2804   Logically Collective
2805 
2806   Input Parameters:
2807 + F     - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2808 . icntl - index of MUMPS parameter array CNTL()
2809 - val   - value of MUMPS CNTL(icntl)
2810 
2811   Options Database Key:
2812 . -mat_mumps_cntl_<icntl> <val> - change the option numbered icntl to ival
2813 
2814   Level: beginner
2815 
2816   References:
2817 .  * - MUMPS Users' Guide
2818 
2819 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2820 @*/
2821 PetscErrorCode MatMumpsSetCntl(Mat F, PetscInt icntl, PetscReal val)
2822 {
2823   PetscFunctionBegin;
2824   PetscValidType(F, 1);
2825   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2826   PetscValidLogicalCollectiveInt(F, icntl, 2);
2827   PetscValidLogicalCollectiveReal(F, val, 3);
2828   PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl);
2829   PetscTryMethod(F, "MatMumpsSetCntl_C", (Mat, PetscInt, PetscReal), (F, icntl, val));
2830   PetscFunctionReturn(PETSC_SUCCESS);
2831 }
2832 
2833 /*@
2834   MatMumpsGetCntl - Get MUMPS parameter CNTL()
2835 
2836   Logically Collective
2837 
2838   Input Parameters:
2839 + F     - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2840 - icntl - index of MUMPS parameter array CNTL()
2841 
2842   Output Parameter:
2843 . val - value of MUMPS CNTL(icntl)
2844 
2845   Level: beginner
2846 
2847   References:
2848 .  * - MUMPS Users' Guide
2849 
2850 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2851 @*/
2852 PetscErrorCode MatMumpsGetCntl(Mat F, PetscInt icntl, PetscReal *val)
2853 {
2854   PetscFunctionBegin;
2855   PetscValidType(F, 1);
2856   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2857   PetscValidLogicalCollectiveInt(F, icntl, 2);
2858   PetscAssertPointer(val, 3);
2859   PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl);
2860   PetscUseMethod(F, "MatMumpsGetCntl_C", (Mat, PetscInt, PetscReal *), (F, icntl, val));
2861   PetscFunctionReturn(PETSC_SUCCESS);
2862 }
2863 
2864 static PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F, PetscInt icntl, PetscInt *info)
2865 {
2866   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2867 
2868   PetscFunctionBegin;
2869   *info = mumps->id.INFO(icntl);
2870   PetscFunctionReturn(PETSC_SUCCESS);
2871 }
2872 
2873 static PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F, PetscInt icntl, PetscInt *infog)
2874 {
2875   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2876 
2877   PetscFunctionBegin;
2878   *infog = mumps->id.INFOG(icntl);
2879   PetscFunctionReturn(PETSC_SUCCESS);
2880 }
2881 
2882 static PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfo)
2883 {
2884   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2885 
2886   PetscFunctionBegin;
2887   *rinfo = mumps->id.RINFO(icntl);
2888   PetscFunctionReturn(PETSC_SUCCESS);
2889 }
2890 
2891 static PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfog)
2892 {
2893   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2894 
2895   PetscFunctionBegin;
2896   *rinfog = mumps->id.RINFOG(icntl);
2897   PetscFunctionReturn(PETSC_SUCCESS);
2898 }
2899 
2900 static PetscErrorCode MatMumpsGetNullPivots_MUMPS(Mat F, PetscInt *size, PetscInt **array)
2901 {
2902   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2903 
2904   PetscFunctionBegin;
2905   PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection");
2906   *size  = 0;
2907   *array = NULL;
2908   if (!mumps->myid) {
2909     *size = mumps->id.INFOG(28);
2910     PetscCall(PetscMalloc1(*size, array));
2911     for (int i = 0; i < *size; i++) (*array)[i] = mumps->id.pivnul_list[i] - 1;
2912   }
2913   PetscFunctionReturn(PETSC_SUCCESS);
2914 }
2915 
2916 static PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F, Mat spRHS)
2917 {
2918   Mat          Bt = NULL, Btseq = NULL;
2919   PetscBool    flg;
2920   Mat_MUMPS   *mumps = (Mat_MUMPS *)F->data;
2921   PetscScalar *aa;
2922   PetscInt     spnr, *ia, *ja, M, nrhs;
2923 
2924   PetscFunctionBegin;
2925   PetscAssertPointer(spRHS, 2);
2926   PetscCall(PetscObjectTypeCompare((PetscObject)spRHS, MATTRANSPOSEVIRTUAL, &flg));
2927   if (flg) {
2928     PetscCall(MatTransposeGetMat(spRHS, &Bt));
2929   } else SETERRQ(PetscObjectComm((PetscObject)spRHS), PETSC_ERR_ARG_WRONG, "Matrix spRHS must be type MATTRANSPOSEVIRTUAL matrix");
2930 
2931   PetscCall(MatMumpsSetIcntl(F, 30, 1));
2932 
2933   if (mumps->petsc_size > 1) {
2934     Mat_MPIAIJ *b = (Mat_MPIAIJ *)Bt->data;
2935     Btseq         = b->A;
2936   } else {
2937     Btseq = Bt;
2938   }
2939 
2940   PetscCall(MatGetSize(spRHS, &M, &nrhs));
2941   mumps->id.nrhs = nrhs;
2942   mumps->id.lrhs = M;
2943   mumps->id.rhs  = NULL;
2944 
2945   if (!mumps->myid) {
2946     PetscCall(MatSeqAIJGetArray(Btseq, &aa));
2947     PetscCall(MatGetRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
2948     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
2949     PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs));
2950     mumps->id.rhs_sparse = (MumpsScalar *)aa;
2951   } else {
2952     mumps->id.irhs_ptr    = NULL;
2953     mumps->id.irhs_sparse = NULL;
2954     mumps->id.nz_rhs      = 0;
2955     mumps->id.rhs_sparse  = NULL;
2956   }
2957   mumps->id.ICNTL(20) = 1; /* rhs is sparse */
2958   mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */
2959 
2960   /* solve phase */
2961   mumps->id.job = JOB_SOLVE;
2962   PetscMUMPS_c(mumps);
2963   PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2));
2964 
2965   if (!mumps->myid) {
2966     PetscCall(MatSeqAIJRestoreArray(Btseq, &aa));
2967     PetscCall(MatRestoreRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
2968     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
2969   }
2970   PetscFunctionReturn(PETSC_SUCCESS);
2971 }
2972 
2973 /*@
2974   MatMumpsGetInverse - Get user-specified set of entries in inverse of `A`
2975 
2976   Logically Collective
2977 
2978   Input Parameter:
2979 . F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2980 
2981   Output Parameter:
2982 . spRHS - sequential sparse matrix in `MATTRANSPOSEVIRTUAL` format with requested entries of inverse of `A`
2983 
2984   Level: beginner
2985 
2986   References:
2987 .  * - MUMPS Users' Guide
2988 
2989 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()`
2990 @*/
2991 PetscErrorCode MatMumpsGetInverse(Mat F, Mat spRHS)
2992 {
2993   PetscFunctionBegin;
2994   PetscValidType(F, 1);
2995   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2996   PetscUseMethod(F, "MatMumpsGetInverse_C", (Mat, Mat), (F, spRHS));
2997   PetscFunctionReturn(PETSC_SUCCESS);
2998 }
2999 
3000 static PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F, Mat spRHST)
3001 {
3002   Mat spRHS;
3003 
3004   PetscFunctionBegin;
3005   PetscCall(MatCreateTranspose(spRHST, &spRHS));
3006   PetscCall(MatMumpsGetInverse_MUMPS(F, spRHS));
3007   PetscCall(MatDestroy(&spRHS));
3008   PetscFunctionReturn(PETSC_SUCCESS);
3009 }
3010 
3011 /*@
3012   MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix `A`^T
3013 
3014   Logically Collective
3015 
3016   Input Parameter:
3017 . F - the factored matrix of A obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
3018 
3019   Output Parameter:
3020 . spRHST - sequential sparse matrix in `MATAIJ` format containing the requested entries of inverse of `A`^T
3021 
3022   Level: beginner
3023 
3024   References:
3025 .  * - MUMPS Users' Guide
3026 
3027 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()`, `MatMumpsGetInverse()`
3028 @*/
3029 PetscErrorCode MatMumpsGetInverseTranspose(Mat F, Mat spRHST)
3030 {
3031   PetscBool flg;
3032 
3033   PetscFunctionBegin;
3034   PetscValidType(F, 1);
3035   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3036   PetscCall(PetscObjectTypeCompareAny((PetscObject)spRHST, &flg, MATSEQAIJ, MATMPIAIJ, NULL));
3037   PetscCheck(flg, PetscObjectComm((PetscObject)spRHST), PETSC_ERR_ARG_WRONG, "Matrix spRHST must be MATAIJ matrix");
3038 
3039   PetscUseMethod(F, "MatMumpsGetInverseTranspose_C", (Mat, Mat), (F, spRHST));
3040   PetscFunctionReturn(PETSC_SUCCESS);
3041 }
3042 
3043 /*@
3044   MatMumpsGetInfo - Get MUMPS parameter INFO()
3045 
3046   Logically Collective
3047 
3048   Input Parameters:
3049 + F     - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
3050 - icntl - index of MUMPS parameter array INFO()
3051 
3052   Output Parameter:
3053 . ival - value of MUMPS INFO(icntl)
3054 
3055   Level: beginner
3056 
3057   References:
3058 .  * - MUMPS Users' Guide
3059 
3060 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
3061 @*/
3062 PetscErrorCode MatMumpsGetInfo(Mat F, PetscInt icntl, PetscInt *ival)
3063 {
3064   PetscFunctionBegin;
3065   PetscValidType(F, 1);
3066   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3067   PetscAssertPointer(ival, 3);
3068   PetscUseMethod(F, "MatMumpsGetInfo_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival));
3069   PetscFunctionReturn(PETSC_SUCCESS);
3070 }
3071 
3072 /*@
3073   MatMumpsGetInfog - Get MUMPS parameter INFOG()
3074 
3075   Logically Collective
3076 
3077   Input Parameters:
3078 + F     - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
3079 - icntl - index of MUMPS parameter array INFOG()
3080 
3081   Output Parameter:
3082 . ival - value of MUMPS INFOG(icntl)
3083 
3084   Level: beginner
3085 
3086   References:
3087 .  * - MUMPS Users' Guide
3088 
3089 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
3090 @*/
3091 PetscErrorCode MatMumpsGetInfog(Mat F, PetscInt icntl, PetscInt *ival)
3092 {
3093   PetscFunctionBegin;
3094   PetscValidType(F, 1);
3095   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3096   PetscAssertPointer(ival, 3);
3097   PetscUseMethod(F, "MatMumpsGetInfog_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival));
3098   PetscFunctionReturn(PETSC_SUCCESS);
3099 }
3100 
3101 /*@
3102   MatMumpsGetRinfo - Get MUMPS parameter RINFO()
3103 
3104   Logically Collective
3105 
3106   Input Parameters:
3107 + F     - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
3108 - icntl - index of MUMPS parameter array RINFO()
3109 
3110   Output Parameter:
3111 . val - value of MUMPS RINFO(icntl)
3112 
3113   Level: beginner
3114 
3115   References:
3116 .  * - MUMPS Users' Guide
3117 
3118 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfog()`
3119 @*/
3120 PetscErrorCode MatMumpsGetRinfo(Mat F, PetscInt icntl, PetscReal *val)
3121 {
3122   PetscFunctionBegin;
3123   PetscValidType(F, 1);
3124   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3125   PetscAssertPointer(val, 3);
3126   PetscUseMethod(F, "MatMumpsGetRinfo_C", (Mat, PetscInt, PetscReal *), (F, icntl, val));
3127   PetscFunctionReturn(PETSC_SUCCESS);
3128 }
3129 
3130 /*@
3131   MatMumpsGetRinfog - Get MUMPS parameter RINFOG()
3132 
3133   Logically Collective
3134 
3135   Input Parameters:
3136 + F     - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
3137 - icntl - index of MUMPS parameter array RINFOG()
3138 
3139   Output Parameter:
3140 . val - value of MUMPS RINFOG(icntl)
3141 
3142   Level: beginner
3143 
3144   References:
3145 .  * - MUMPS Users' Guide
3146 
3147 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`
3148 @*/
3149 PetscErrorCode MatMumpsGetRinfog(Mat F, PetscInt icntl, PetscReal *val)
3150 {
3151   PetscFunctionBegin;
3152   PetscValidType(F, 1);
3153   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3154   PetscAssertPointer(val, 3);
3155   PetscUseMethod(F, "MatMumpsGetRinfog_C", (Mat, PetscInt, PetscReal *), (F, icntl, val));
3156   PetscFunctionReturn(PETSC_SUCCESS);
3157 }
3158 
3159 /*@
3160   MatMumpsGetNullPivots - Get MUMPS parameter PIVNUL_LIST()
3161 
3162   Logically Collective
3163 
3164   Input Parameter:
3165 . F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
3166 
3167   Output Parameters:
3168 + size  - local size of the array. The size of the array is non-zero only on the host.
3169 - array - array of rows with null pivot, these rows follow 0-based indexing. The array gets allocated within the function and the user is responsible
3170            for freeing this array.
3171 
3172   Level: beginner
3173 
3174   References:
3175 .  * - MUMPS Users' Guide
3176 
3177 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`
3178 @*/
3179 PetscErrorCode MatMumpsGetNullPivots(Mat F, PetscInt *size, PetscInt **array)
3180 {
3181   PetscFunctionBegin;
3182   PetscValidType(F, 1);
3183   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3184   PetscAssertPointer(size, 2);
3185   PetscAssertPointer(array, 3);
3186   PetscUseMethod(F, "MatMumpsGetNullPivots_C", (Mat, PetscInt *, PetscInt **), (F, size, array));
3187   PetscFunctionReturn(PETSC_SUCCESS);
3188 }
3189 
3190 /*MC
3191   MATSOLVERMUMPS -  A matrix type providing direct solvers (LU and Cholesky) for
3192   distributed and sequential matrices via the external package MUMPS.
3193 
3194   Works with `MATAIJ` and `MATSBAIJ` matrices
3195 
3196   Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS
3197 
3198   Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode.
3199   See details below.
3200 
3201   Use `-pc_type cholesky` or `lu` `-pc_factor_mat_solver_type mumps` to use this direct solver
3202 
3203   Options Database Keys:
3204 +  -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages
3205 .  -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning
3206 .  -mat_mumps_icntl_3 -  ICNTL(3): output stream for global information, collected on the host
3207 .  -mat_mumps_icntl_4 -  ICNTL(4): level of printing (0 to 4)
3208 .  -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)
3209 .  -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto
3210                         Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only)
3211 .  -mat_mumps_icntl_8  - ICNTL(8): scaling strategy (-2 to 8 or 77)
3212 .  -mat_mumps_icntl_10  - ICNTL(10): max num of refinements
3213 .  -mat_mumps_icntl_11  - ICNTL(11): statistics related to an error analysis (via -ksp_view)
3214 .  -mat_mumps_icntl_12  - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)
3215 .  -mat_mumps_icntl_13  - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting
3216 .  -mat_mumps_icntl_14  - ICNTL(14): percentage increase in the estimated working space
3217 .  -mat_mumps_icntl_15  - ICNTL(15): compression of the input matrix resulting from a block format
3218 .  -mat_mumps_icntl_19  - ICNTL(19): computes the Schur complement
3219 .  -mat_mumps_icntl_20  - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS
3220 .  -mat_mumps_icntl_22  - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)
3221 .  -mat_mumps_icntl_23  - ICNTL(23): max size of the working memory (MB) that can allocate per processor
3222 .  -mat_mumps_icntl_24  - ICNTL(24): detection of null pivot rows (0 or 1)
3223 .  -mat_mumps_icntl_25  - ICNTL(25): compute a solution of a deficient matrix and a null space basis
3224 .  -mat_mumps_icntl_26  - ICNTL(26): drives the solution phase if a Schur complement matrix
3225 .  -mat_mumps_icntl_28  - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering
3226 .  -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis
3227 .  -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A)
3228 .  -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization
3229 .  -mat_mumps_icntl_33 - ICNTL(33): compute determinant
3230 .  -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature
3231 .  -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant
3232 .  -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR
3233 .  -mat_mumps_icntl_58 - ICNTL(58): options for symbolic factorization
3234 .  -mat_mumps_cntl_1  - CNTL(1): relative pivoting threshold
3235 .  -mat_mumps_cntl_2  -  CNTL(2): stopping criterion of refinement
3236 .  -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold
3237 .  -mat_mumps_cntl_4 - CNTL(4): value for static pivoting
3238 .  -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots
3239 .  -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization
3240 -  -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS.
3241                                    Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual.
3242 
3243   Level: beginner
3244 
3245     Notes:
3246     MUMPS Cholesky does not handle (complex) Hermitian matrices (see User's Guide at https://mumps-solver.org/index.php?page=doc) so using it will
3247     error if the matrix is Hermitian.
3248 
3249     When used within a `KSP`/`PC` solve the options are prefixed with that of the `PC`. Otherwise one can set the options prefix by calling
3250     `MatSetOptionsPrefixFactor()` on the matrix from which the factor was obtained or `MatSetOptionsPrefix()` on the factor matrix.
3251 
3252     When a MUMPS factorization fails inside a KSP solve, for example with a `KSP_DIVERGED_PC_FAILED`, one can find the MUMPS information about
3253     the failure with
3254 .vb
3255           KSPGetPC(ksp,&pc);
3256           PCFactorGetMatrix(pc,&mat);
3257           MatMumpsGetInfo(mat,....);
3258           MatMumpsGetInfog(mat,....); etc.
3259 .ve
3260     Or run with `-ksp_error_if_not_converged` and the program will be stopped and the information printed in the error message.
3261 
3262     MUMPS provides 64-bit integer support in two build modes:
3263       full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and
3264       requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI).
3265 
3266       selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices,
3267       MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and
3268       columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit
3269       integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS.
3270 
3271     With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc.
3272 
3273   Two modes to run MUMPS/PETSc with OpenMP
3274 .vb
3275      Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP
3276      threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test".
3277 .ve
3278 
3279 .vb
3280      -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example,
3281     if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16"
3282 .ve
3283 
3284    To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part
3285    (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with `--with-openmp` `--download-hwloc`
3286    (or `--with-hwloc`), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS
3287    libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS
3288    (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided).
3289 
3290    If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI
3291    processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of
3292    size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm
3293    are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set
3294    by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs.
3295    In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets,
3296    if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind
3297    MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half
3298    cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the
3299    problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding.
3300    For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and
3301    examine the mapping result.
3302 
3303    PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set `OMP_PROC_BIND` and `OMP_PLACES` in job scripts,
3304    for example, export `OMP_PLACES`=threads and export `OMP_PROC_BIND`=spread. One does not need to export `OMP_NUM_THREADS`=m in job scripts as PETSc
3305    calls `omp_set_num_threads`(m) internally before calling MUMPS.
3306 
3307    References:
3308 +  * - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011).
3309 -  * - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017.
3310 
3311 .seealso: [](ch_matrices), `Mat`, `PCFactorSetMatSolverType()`, `MatSolverType`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`, `KSPGetPC()`, `PCFactorGetMatrix()`
3312 M*/
3313 
3314 static PetscErrorCode MatFactorGetSolverType_mumps(Mat A, MatSolverType *type)
3315 {
3316   PetscFunctionBegin;
3317   *type = MATSOLVERMUMPS;
3318   PetscFunctionReturn(PETSC_SUCCESS);
3319 }
3320 
3321 /* MatGetFactor for Seq and MPI AIJ matrices */
3322 static PetscErrorCode MatGetFactor_aij_mumps(Mat A, MatFactorType ftype, Mat *F)
3323 {
3324   Mat         B;
3325   Mat_MUMPS  *mumps;
3326   PetscBool   isSeqAIJ;
3327   PetscMPIInt size;
3328 
3329   PetscFunctionBegin;
3330 #if defined(PETSC_USE_COMPLEX)
3331   if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) {
3332     PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n"));
3333     *F = NULL;
3334     PetscFunctionReturn(PETSC_SUCCESS);
3335   }
3336 #endif
3337   /* Create the factorization matrix */
3338   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isSeqAIJ));
3339   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
3340   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
3341   PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name));
3342   PetscCall(MatSetUp(B));
3343 
3344   PetscCall(PetscNew(&mumps));
3345 
3346   B->ops->view    = MatView_MUMPS;
3347   B->ops->getinfo = MatGetInfo_MUMPS;
3348 
3349   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
3350   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
3351   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
3352   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
3353   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
3354   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
3355   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
3356   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
3357   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
3358   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
3359   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
3360   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
3361   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
3362   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
3363 
3364   if (ftype == MAT_FACTOR_LU) {
3365     B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
3366     B->factortype            = MAT_FACTOR_LU;
3367     if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij;
3368     else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij;
3369     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]));
3370     mumps->sym = 0;
3371   } else {
3372     B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
3373     B->factortype                  = MAT_FACTOR_CHOLESKY;
3374     if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij;
3375     else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij;
3376     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY]));
3377 #if defined(PETSC_USE_COMPLEX)
3378     mumps->sym = 2;
3379 #else
3380     if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1;
3381     else mumps->sym = 2;
3382 #endif
3383   }
3384 
3385   /* set solvertype */
3386   PetscCall(PetscFree(B->solvertype));
3387   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
3388   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
3389   if (size == 1) {
3390     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3391     B->canuseordering = PETSC_TRUE;
3392   }
3393   B->ops->destroy = MatDestroy_MUMPS;
3394   B->data         = (void *)mumps;
3395 
3396   *F               = B;
3397   mumps->id.job    = JOB_NULL;
3398   mumps->ICNTL_pre = NULL;
3399   mumps->CNTL_pre  = NULL;
3400   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
3401   PetscFunctionReturn(PETSC_SUCCESS);
3402 }
3403 
3404 /* MatGetFactor for Seq and MPI SBAIJ matrices */
3405 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A, MatFactorType ftype, Mat *F)
3406 {
3407   Mat         B;
3408   Mat_MUMPS  *mumps;
3409   PetscBool   isSeqSBAIJ;
3410   PetscMPIInt size;
3411 
3412   PetscFunctionBegin;
3413 #if defined(PETSC_USE_COMPLEX)
3414   if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) {
3415     PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n"));
3416     *F = NULL;
3417     PetscFunctionReturn(PETSC_SUCCESS);
3418   }
3419 #endif
3420   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
3421   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
3422   PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name));
3423   PetscCall(MatSetUp(B));
3424 
3425   PetscCall(PetscNew(&mumps));
3426   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &isSeqSBAIJ));
3427   if (isSeqSBAIJ) {
3428     mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij;
3429   } else {
3430     mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij;
3431   }
3432 
3433   B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
3434   B->ops->view                   = MatView_MUMPS;
3435   B->ops->getinfo                = MatGetInfo_MUMPS;
3436 
3437   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
3438   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
3439   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
3440   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
3441   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
3442   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
3443   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
3444   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
3445   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
3446   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
3447   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
3448   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
3449   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
3450   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
3451 
3452   B->factortype = MAT_FACTOR_CHOLESKY;
3453 #if defined(PETSC_USE_COMPLEX)
3454   mumps->sym = 2;
3455 #else
3456   if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1;
3457   else mumps->sym = 2;
3458 #endif
3459 
3460   /* set solvertype */
3461   PetscCall(PetscFree(B->solvertype));
3462   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
3463   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
3464   if (size == 1) {
3465     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3466     B->canuseordering = PETSC_TRUE;
3467   }
3468   PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY]));
3469   B->ops->destroy = MatDestroy_MUMPS;
3470   B->data         = (void *)mumps;
3471 
3472   *F               = B;
3473   mumps->id.job    = JOB_NULL;
3474   mumps->ICNTL_pre = NULL;
3475   mumps->CNTL_pre  = NULL;
3476   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
3477   PetscFunctionReturn(PETSC_SUCCESS);
3478 }
3479 
3480 static PetscErrorCode MatGetFactor_baij_mumps(Mat A, MatFactorType ftype, Mat *F)
3481 {
3482   Mat         B;
3483   Mat_MUMPS  *mumps;
3484   PetscBool   isSeqBAIJ;
3485   PetscMPIInt size;
3486 
3487   PetscFunctionBegin;
3488   /* Create the factorization matrix */
3489   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &isSeqBAIJ));
3490   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
3491   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
3492   PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name));
3493   PetscCall(MatSetUp(B));
3494 
3495   PetscCall(PetscNew(&mumps));
3496   if (ftype == MAT_FACTOR_LU) {
3497     B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS;
3498     B->factortype            = MAT_FACTOR_LU;
3499     if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij;
3500     else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij;
3501     mumps->sym = 0;
3502     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]));
3503   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead");
3504 
3505   B->ops->view    = MatView_MUMPS;
3506   B->ops->getinfo = MatGetInfo_MUMPS;
3507 
3508   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
3509   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
3510   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
3511   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
3512   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
3513   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
3514   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
3515   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
3516   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
3517   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
3518   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
3519   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
3520   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
3521   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
3522 
3523   /* set solvertype */
3524   PetscCall(PetscFree(B->solvertype));
3525   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
3526   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
3527   if (size == 1) {
3528     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3529     B->canuseordering = PETSC_TRUE;
3530   }
3531   B->ops->destroy = MatDestroy_MUMPS;
3532   B->data         = (void *)mumps;
3533 
3534   *F               = B;
3535   mumps->id.job    = JOB_NULL;
3536   mumps->ICNTL_pre = NULL;
3537   mumps->CNTL_pre  = NULL;
3538   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
3539   PetscFunctionReturn(PETSC_SUCCESS);
3540 }
3541 
3542 /* MatGetFactor for Seq and MPI SELL matrices */
3543 static PetscErrorCode MatGetFactor_sell_mumps(Mat A, MatFactorType ftype, Mat *F)
3544 {
3545   Mat         B;
3546   Mat_MUMPS  *mumps;
3547   PetscBool   isSeqSELL;
3548   PetscMPIInt size;
3549 
3550   PetscFunctionBegin;
3551   /* Create the factorization matrix */
3552   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSELL, &isSeqSELL));
3553   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
3554   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
3555   PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name));
3556   PetscCall(MatSetUp(B));
3557 
3558   PetscCall(PetscNew(&mumps));
3559 
3560   B->ops->view    = MatView_MUMPS;
3561   B->ops->getinfo = MatGetInfo_MUMPS;
3562 
3563   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
3564   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
3565   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
3566   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
3567   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
3568   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
3569   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
3570   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
3571   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
3572   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
3573   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
3574   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
3575 
3576   if (ftype == MAT_FACTOR_LU) {
3577     B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
3578     B->factortype            = MAT_FACTOR_LU;
3579     if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij;
3580     else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented");
3581     mumps->sym = 0;
3582     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]));
3583   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented");
3584 
3585   /* set solvertype */
3586   PetscCall(PetscFree(B->solvertype));
3587   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
3588   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
3589   if (size == 1) {
3590     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization  */
3591     B->canuseordering = PETSC_TRUE;
3592   }
3593   B->ops->destroy = MatDestroy_MUMPS;
3594   B->data         = (void *)mumps;
3595 
3596   *F               = B;
3597   mumps->id.job    = JOB_NULL;
3598   mumps->ICNTL_pre = NULL;
3599   mumps->CNTL_pre  = NULL;
3600   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
3601   PetscFunctionReturn(PETSC_SUCCESS);
3602 }
3603 
3604 /* MatGetFactor for MATNEST matrices */
3605 static PetscErrorCode MatGetFactor_nest_mumps(Mat A, MatFactorType ftype, Mat *F)
3606 {
3607   Mat         B, **mats;
3608   Mat_MUMPS  *mumps;
3609   PetscInt    nr, nc;
3610   PetscMPIInt size;
3611   PetscBool   flg = PETSC_TRUE;
3612 
3613   PetscFunctionBegin;
3614 #if defined(PETSC_USE_COMPLEX)
3615   if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) {
3616     PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n"));
3617     *F = NULL;
3618     PetscFunctionReturn(PETSC_SUCCESS);
3619   }
3620 #endif
3621 
3622   /* Return if some condition is not satisfied */
3623   *F = NULL;
3624   PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats));
3625   if (ftype == MAT_FACTOR_CHOLESKY) {
3626     IS       *rows, *cols;
3627     PetscInt *m, *M;
3628 
3629     PetscCheck(nr == nc, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MAT_FACTOR_CHOLESKY not supported for nest sizes %" PetscInt_FMT " != %" PetscInt_FMT ". Use MAT_FACTOR_LU.", nr, nc);
3630     PetscCall(PetscMalloc2(nr, &rows, nc, &cols));
3631     PetscCall(MatNestGetISs(A, rows, cols));
3632     for (PetscInt r = 0; flg && r < nr; r++) PetscCall(ISEqualUnsorted(rows[r], cols[r], &flg));
3633     if (!flg) {
3634       PetscCall(PetscFree2(rows, cols));
3635       PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for unequal row and column maps. Use MAT_FACTOR_LU.\n"));
3636       PetscFunctionReturn(PETSC_SUCCESS);
3637     }
3638     PetscCall(PetscMalloc2(nr, &m, nr, &M));
3639     for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetMinMax(rows[r], &m[r], &M[r]));
3640     for (PetscInt r = 0; flg && r < nr; r++)
3641       for (PetscInt k = r + 1; flg && k < nr; k++)
3642         if ((m[k] <= m[r] && m[r] <= M[k]) || (m[k] <= M[r] && M[r] <= M[k])) flg = PETSC_FALSE;
3643     PetscCall(PetscFree2(m, M));
3644     PetscCall(PetscFree2(rows, cols));
3645     if (!flg) {
3646       PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for intersecting row maps. Use MAT_FACTOR_LU.\n"));
3647       PetscFunctionReturn(PETSC_SUCCESS);
3648     }
3649   }
3650 
3651   for (PetscInt r = 0; r < nr; r++) {
3652     for (PetscInt c = 0; c < nc; c++) {
3653       Mat       sub = mats[r][c];
3654       PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isTrans;
3655 
3656       if (!sub || (ftype == MAT_FACTOR_CHOLESKY && c < r)) continue;
3657       PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans));
3658       if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub));
3659       else {
3660         PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isTrans));
3661         if (isTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub));
3662       }
3663       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ));
3664       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ));
3665       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ));
3666       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ));
3667       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ));
3668       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ));
3669       if (ftype == MAT_FACTOR_CHOLESKY) {
3670         if (r == c) {
3671           if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isSeqSBAIJ && !isMPISBAIJ) {
3672             PetscCall(PetscInfo(sub, "MAT_CHOLESKY_FACTOR not supported for diagonal block of type %s.\n", ((PetscObject)sub)->type_name));
3673             flg = PETSC_FALSE;
3674           }
3675         } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ) {
3676           PetscCall(PetscInfo(sub, "MAT_CHOLESKY_FACTOR not supported for off-diagonal block of type %s.\n", ((PetscObject)sub)->type_name));
3677           flg = PETSC_FALSE;
3678         }
3679       } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ) {
3680         PetscCall(PetscInfo(sub, "MAT_LU_FACTOR not supported for block of type %s.\n", ((PetscObject)sub)->type_name));
3681         flg = PETSC_FALSE;
3682       }
3683     }
3684   }
3685   if (!flg) PetscFunctionReturn(PETSC_SUCCESS);
3686 
3687   /* Create the factorization matrix */
3688   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
3689   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
3690   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name));
3691   PetscCall(MatSetUp(B));
3692 
3693   PetscCall(PetscNew(&mumps));
3694 
3695   B->ops->view    = MatView_MUMPS;
3696   B->ops->getinfo = MatGetInfo_MUMPS;
3697 
3698   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
3699   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
3700   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
3701   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
3702   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
3703   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
3704   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
3705   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
3706   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
3707   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
3708   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
3709   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
3710   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
3711   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
3712 
3713   if (ftype == MAT_FACTOR_LU) {
3714     B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
3715     B->factortype            = MAT_FACTOR_LU;
3716     mumps->sym               = 0;
3717   } else {
3718     B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
3719     B->factortype                  = MAT_FACTOR_CHOLESKY;
3720 #if defined(PETSC_USE_COMPLEX)
3721     mumps->sym = 2;
3722 #else
3723     if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1;
3724     else mumps->sym = 2;
3725 #endif
3726   }
3727   mumps->ConvertToTriples = MatConvertToTriples_nest_xaij;
3728   PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[ftype]));
3729 
3730   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
3731   if (size == 1) {
3732     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3733     B->canuseordering = PETSC_TRUE;
3734   }
3735 
3736   /* set solvertype */
3737   PetscCall(PetscFree(B->solvertype));
3738   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
3739   B->ops->destroy = MatDestroy_MUMPS;
3740   B->data         = (void *)mumps;
3741 
3742   *F               = B;
3743   mumps->id.job    = JOB_NULL;
3744   mumps->ICNTL_pre = NULL;
3745   mumps->CNTL_pre  = NULL;
3746   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
3747   PetscFunctionReturn(PETSC_SUCCESS);
3748 }
3749 
3750 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void)
3751 {
3752   PetscFunctionBegin;
3753   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
3754   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
3755   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps));
3756   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps));
3757   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPISBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps));
3758   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
3759   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
3760   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps));
3761   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps));
3762   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps));
3763   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSELL, MAT_FACTOR_LU, MatGetFactor_sell_mumps));
3764   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_LU, MatGetFactor_nest_mumps));
3765   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_CHOLESKY, MatGetFactor_nest_mumps));
3766   PetscFunctionReturn(PETSC_SUCCESS);
3767 }
3768