xref: /petsc/src/mat/impls/aij/mpi/mumps/mumps.c (revision 3e5b40d05d5df51c8bf9b11f428c5a4540c49d18)
1 
2 /*
3     Provides an interface to the MUMPS sparse solver
4 */
5 #include <petscpkg_version.h>
6 #include <petscsf.h>
7 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I  "petscmat.h"  I*/
8 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
9 #include <../src/mat/impls/sell/mpi/mpisell.h>
10 
11 #define MUMPS_MANUALS "(see users manual https://mumps-solver.org/index.php?page=doc \"Error and warning diagnostics\")"
12 
13 EXTERN_C_BEGIN
14 #if defined(PETSC_USE_COMPLEX)
15   #if defined(PETSC_USE_REAL_SINGLE)
16     #include <cmumps_c.h>
17   #else
18     #include <zmumps_c.h>
19   #endif
20 #else
21   #if defined(PETSC_USE_REAL_SINGLE)
22     #include <smumps_c.h>
23   #else
24     #include <dmumps_c.h>
25   #endif
26 #endif
27 EXTERN_C_END
28 #define JOB_INIT         -1
29 #define JOB_NULL         0
30 #define JOB_FACTSYMBOLIC 1
31 #define JOB_FACTNUMERIC  2
32 #define JOB_SOLVE        3
33 #define JOB_END          -2
34 
35 /* calls to MUMPS */
36 #if defined(PETSC_USE_COMPLEX)
37   #if defined(PETSC_USE_REAL_SINGLE)
38     #define MUMPS_c cmumps_c
39   #else
40     #define MUMPS_c zmumps_c
41   #endif
42 #else
43   #if defined(PETSC_USE_REAL_SINGLE)
44     #define MUMPS_c smumps_c
45   #else
46     #define MUMPS_c dmumps_c
47   #endif
48 #endif
49 
50 /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for
51    number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the
52    naming convention in PetscMPIInt, PetscBLASInt etc.
53 */
54 typedef MUMPS_INT PetscMUMPSInt;
55 
56 #if PETSC_PKG_MUMPS_VERSION_GE(5, 3, 0)
57   #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */
58     #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out"
59   #endif
60 #else
61   #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */
62     #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out"
63   #endif
64 #endif
65 
66 #define MPIU_MUMPSINT       MPI_INT
67 #define PETSC_MUMPS_INT_MAX 2147483647
68 #define PETSC_MUMPS_INT_MIN -2147483648
69 
70 /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/
71 static inline PetscErrorCode PetscMUMPSIntCast(PetscInt a, PetscMUMPSInt *b)
72 {
73   PetscFunctionBegin;
74 #if PetscDefined(USE_64BIT_INDICES)
75   PetscAssert(a <= PETSC_MUMPS_INT_MAX && a >= PETSC_MUMPS_INT_MIN, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt");
76 #endif
77   *b = (PetscMUMPSInt)(a);
78   PetscFunctionReturn(PETSC_SUCCESS);
79 }
80 
81 /* Put these utility routines here since they are only used in this file */
82 static inline PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems *PetscOptionsObject, const char opt[], const char text[], const char man[], PetscMUMPSInt currentvalue, PetscMUMPSInt *value, PetscBool *set, PetscMUMPSInt lb, PetscMUMPSInt ub)
83 {
84   PetscInt  myval;
85   PetscBool myset;
86   PetscFunctionBegin;
87   /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */
88   PetscCall(PetscOptionsInt_Private(PetscOptionsObject, opt, text, man, (PetscInt)currentvalue, &myval, &myset, lb, ub));
89   if (myset) PetscCall(PetscMUMPSIntCast(myval, value));
90   if (set) *set = myset;
91   PetscFunctionReturn(PETSC_SUCCESS);
92 }
93 #define PetscOptionsMUMPSInt(a, b, c, d, e, f) PetscOptionsMUMPSInt_Private(PetscOptionsObject, a, b, c, d, e, f, PETSC_MUMPS_INT_MIN, PETSC_MUMPS_INT_MAX)
94 
95 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */
96 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
97   #define PetscMUMPS_c(mumps) \
98     do { \
99       if (mumps->use_petsc_omp_support) { \
100         if (mumps->is_omp_master) { \
101           PetscCall(PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl)); \
102           PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \
103           PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \
104           PetscCall(PetscFPTrapPop()); \
105           PetscCall(PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl)); \
106         } \
107         PetscCall(PetscOmpCtrlBarrier(mumps->omp_ctrl)); \
108         /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific      \
109          to processes, so we only Bcast info[1], an error code and leave others (since they do not have   \
110          an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82.                   \
111          omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \
112       */ \
113         PetscCallMPI(MPI_Bcast(mumps->id.infog, PETSC_STATIC_ARRAY_LENGTH(mumps->id.infog), MPIU_MUMPSINT, 0, mumps->omp_comm)); \
114         PetscCallMPI(MPI_Bcast(mumps->id.rinfog, PETSC_STATIC_ARRAY_LENGTH(mumps->id.rinfog), MPIU_REAL, 0, mumps->omp_comm)); \
115         PetscCallMPI(MPI_Bcast(mumps->id.info, PETSC_STATIC_ARRAY_LENGTH(mumps->id.info), MPIU_MUMPSINT, 0, mumps->omp_comm)); \
116         PetscCallMPI(MPI_Bcast(mumps->id.rinfo, PETSC_STATIC_ARRAY_LENGTH(mumps->id.rinfo), MPIU_REAL, 0, mumps->omp_comm)); \
117       } else { \
118         PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \
119         PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \
120         PetscCall(PetscFPTrapPop()); \
121       } \
122     } while (0)
123 #else
124   #define PetscMUMPS_c(mumps) \
125     do { \
126       PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \
127       PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \
128       PetscCall(PetscFPTrapPop()); \
129     } while (0)
130 #endif
131 
132 /* declare MumpsScalar */
133 #if defined(PETSC_USE_COMPLEX)
134   #if defined(PETSC_USE_REAL_SINGLE)
135     #define MumpsScalar mumps_complex
136   #else
137     #define MumpsScalar mumps_double_complex
138   #endif
139 #else
140   #define MumpsScalar PetscScalar
141 #endif
142 
143 /* macros s.t. indices match MUMPS documentation */
144 #define ICNTL(I)  icntl[(I)-1]
145 #define CNTL(I)   cntl[(I)-1]
146 #define INFOG(I)  infog[(I)-1]
147 #define INFO(I)   info[(I)-1]
148 #define RINFOG(I) rinfog[(I)-1]
149 #define RINFO(I)  rinfo[(I)-1]
150 
151 typedef struct Mat_MUMPS Mat_MUMPS;
152 struct Mat_MUMPS {
153 #if defined(PETSC_USE_COMPLEX)
154   #if defined(PETSC_USE_REAL_SINGLE)
155   CMUMPS_STRUC_C id;
156   #else
157   ZMUMPS_STRUC_C id;
158   #endif
159 #else
160   #if defined(PETSC_USE_REAL_SINGLE)
161   SMUMPS_STRUC_C id;
162   #else
163   DMUMPS_STRUC_C id;
164   #endif
165 #endif
166 
167   MatStructure   matstruc;
168   PetscMPIInt    myid, petsc_size;
169   PetscMUMPSInt *irn, *jcn;       /* the (i,j,v) triplets passed to mumps. */
170   PetscScalar   *val, *val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */
171   PetscInt64     nnz;             /* number of nonzeros. The type is called selective 64-bit in mumps */
172   PetscMUMPSInt  sym;
173   MPI_Comm       mumps_comm;
174   PetscMUMPSInt *ICNTL_pre;
175   PetscReal     *CNTL_pre;
176   PetscMUMPSInt  ICNTL9_pre;         /* check if ICNTL(9) is changed from previous MatSolve */
177   VecScatter     scat_rhs, scat_sol; /* used by MatSolve() */
178   PetscMUMPSInt  ICNTL20;            /* use centralized (0) or distributed (10) dense RHS */
179   PetscMUMPSInt  lrhs_loc, nloc_rhs, *irhs_loc;
180 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
181   PetscInt    *rhs_nrow, max_nrhs;
182   PetscMPIInt *rhs_recvcounts, *rhs_disps;
183   PetscScalar *rhs_loc, *rhs_recvbuf;
184 #endif
185   Vec            b_seq, x_seq;
186   PetscInt       ninfo, *info; /* which INFO to display */
187   PetscInt       sizeredrhs;
188   PetscScalar   *schur_sol;
189   PetscInt       schur_sizesol;
190   PetscMUMPSInt *ia_alloc, *ja_alloc; /* work arrays used for the CSR struct for sparse rhs */
191   PetscInt64     cur_ilen, cur_jlen;  /* current len of ia_alloc[], ja_alloc[] */
192   PetscErrorCode (*ConvertToTriples)(Mat, PetscInt, MatReuse, Mat_MUMPS *);
193 
194   /* Support for MATNEST */
195   PetscErrorCode (**nest_convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *);
196   PetscInt64  *nest_vals_start;
197   PetscScalar *nest_vals;
198 
199   /* stuff used by petsc/mumps OpenMP support*/
200   PetscBool    use_petsc_omp_support;
201   PetscOmpCtrl omp_ctrl;             /* an OpenMP controller that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */
202   MPI_Comm     petsc_comm, omp_comm; /* petsc_comm is petsc matrix's comm */
203   PetscInt64  *recvcount;            /* a collection of nnz on omp_master */
204   PetscMPIInt  tag, omp_comm_size;
205   PetscBool    is_omp_master; /* is this rank the master of omp_comm */
206   MPI_Request *reqs;
207 };
208 
209 /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt.
210    Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices.
211  */
212 static PetscErrorCode PetscMUMPSIntCSRCast(Mat_MUMPS *mumps, PetscInt nrow, PetscInt *ia, PetscInt *ja, PetscMUMPSInt **ia_mumps, PetscMUMPSInt **ja_mumps, PetscMUMPSInt *nnz_mumps)
213 {
214   PetscInt nnz = ia[nrow] - 1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscInt64 since mumps only uses PetscMUMPSInt for rhs */
215 
216   PetscFunctionBegin;
217 #if defined(PETSC_USE_64BIT_INDICES)
218   {
219     PetscInt i;
220     if (nrow + 1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */
221       PetscCall(PetscFree(mumps->ia_alloc));
222       PetscCall(PetscMalloc1(nrow + 1, &mumps->ia_alloc));
223       mumps->cur_ilen = nrow + 1;
224     }
225     if (nnz > mumps->cur_jlen) {
226       PetscCall(PetscFree(mumps->ja_alloc));
227       PetscCall(PetscMalloc1(nnz, &mumps->ja_alloc));
228       mumps->cur_jlen = nnz;
229     }
230     for (i = 0; i < nrow + 1; i++) PetscCall(PetscMUMPSIntCast(ia[i], &(mumps->ia_alloc[i])));
231     for (i = 0; i < nnz; i++) PetscCall(PetscMUMPSIntCast(ja[i], &(mumps->ja_alloc[i])));
232     *ia_mumps = mumps->ia_alloc;
233     *ja_mumps = mumps->ja_alloc;
234   }
235 #else
236   *ia_mumps = ia;
237   *ja_mumps = ja;
238 #endif
239   PetscCall(PetscMUMPSIntCast(nnz, nnz_mumps));
240   PetscFunctionReturn(PETSC_SUCCESS);
241 }
242 
243 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS *mumps)
244 {
245   PetscFunctionBegin;
246   PetscCall(PetscFree(mumps->id.listvar_schur));
247   PetscCall(PetscFree(mumps->id.redrhs));
248   PetscCall(PetscFree(mumps->schur_sol));
249   mumps->id.size_schur = 0;
250   mumps->id.schur_lld  = 0;
251   mumps->id.ICNTL(19)  = 0;
252   PetscFunctionReturn(PETSC_SUCCESS);
253 }
254 
255 /* solve with rhs in mumps->id.redrhs and return in the same location */
256 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F)
257 {
258   Mat_MUMPS           *mumps = (Mat_MUMPS *)F->data;
259   Mat                  S, B, X;
260   MatFactorSchurStatus schurstatus;
261   PetscInt             sizesol;
262 
263   PetscFunctionBegin;
264   PetscCall(MatFactorFactorizeSchurComplement(F));
265   PetscCall(MatFactorGetSchurComplement(F, &S, &schurstatus));
266   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &B));
267   PetscCall(MatSetType(B, ((PetscObject)S)->type_name));
268 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
269   PetscCall(MatBindToCPU(B, S->boundtocpu));
270 #endif
271   switch (schurstatus) {
272   case MAT_FACTOR_SCHUR_FACTORED:
273     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &X));
274     PetscCall(MatSetType(X, ((PetscObject)S)->type_name));
275 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
276     PetscCall(MatBindToCPU(X, S->boundtocpu));
277 #endif
278     if (!mumps->id.ICNTL(9)) { /* transpose solve */
279       PetscCall(MatMatSolveTranspose(S, B, X));
280     } else {
281       PetscCall(MatMatSolve(S, B, X));
282     }
283     break;
284   case MAT_FACTOR_SCHUR_INVERTED:
285     sizesol = mumps->id.nrhs * mumps->id.size_schur;
286     if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) {
287       PetscCall(PetscFree(mumps->schur_sol));
288       PetscCall(PetscMalloc1(sizesol, &mumps->schur_sol));
289       mumps->schur_sizesol = sizesol;
290     }
291     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->schur_sol, &X));
292     PetscCall(MatSetType(X, ((PetscObject)S)->type_name));
293 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
294     PetscCall(MatBindToCPU(X, S->boundtocpu));
295 #endif
296     PetscCall(MatProductCreateWithMat(S, B, NULL, X));
297     if (!mumps->id.ICNTL(9)) { /* transpose solve */
298       PetscCall(MatProductSetType(X, MATPRODUCT_AtB));
299     } else {
300       PetscCall(MatProductSetType(X, MATPRODUCT_AB));
301     }
302     PetscCall(MatProductSetFromOptions(X));
303     PetscCall(MatProductSymbolic(X));
304     PetscCall(MatProductNumeric(X));
305 
306     PetscCall(MatCopy(X, B, SAME_NONZERO_PATTERN));
307     break;
308   default:
309     SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unhandled MatFactorSchurStatus %d", F->schur_status);
310   }
311   PetscCall(MatFactorRestoreSchurComplement(F, &S, schurstatus));
312   PetscCall(MatDestroy(&B));
313   PetscCall(MatDestroy(&X));
314   PetscFunctionReturn(PETSC_SUCCESS);
315 }
316 
317 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion)
318 {
319   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
320 
321   PetscFunctionBegin;
322   if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */
323     PetscFunctionReturn(PETSC_SUCCESS);
324   }
325   if (!expansion) { /* prepare for the condensation step */
326     PetscInt sizeredrhs = mumps->id.nrhs * mumps->id.size_schur;
327     /* allocate MUMPS internal array to store reduced right-hand sides */
328     if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) {
329       PetscCall(PetscFree(mumps->id.redrhs));
330       mumps->id.lredrhs = mumps->id.size_schur;
331       PetscCall(PetscMalloc1(mumps->id.nrhs * mumps->id.lredrhs, &mumps->id.redrhs));
332       mumps->sizeredrhs = mumps->id.nrhs * mumps->id.lredrhs;
333     }
334   } else { /* prepare for the expansion step */
335     /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */
336     PetscCall(MatMumpsSolveSchur_Private(F));
337     mumps->id.ICNTL(26) = 2; /* expansion phase */
338     PetscMUMPS_c(mumps);
339     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
340     /* restore defaults */
341     mumps->id.ICNTL(26) = -1;
342     /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */
343     if (mumps->id.nrhs > 1) {
344       PetscCall(PetscFree(mumps->id.redrhs));
345       mumps->id.lredrhs = 0;
346       mumps->sizeredrhs = 0;
347     }
348   }
349   PetscFunctionReturn(PETSC_SUCCESS);
350 }
351 
352 /*
353   MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz]
354 
355   input:
356     A       - matrix in aij,baij or sbaij format
357     shift   - 0: C style output triple; 1: Fortran style output triple.
358     reuse   - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple
359               MAT_REUSE_MATRIX:   only the values in v array are updated
360   output:
361     nnz     - dim of r, c, and v (number of local nonzero entries of A)
362     r, c, v - row and col index, matrix values (matrix triples)
363 
364   The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is
365   freed with PetscFree(mumps->irn);  This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means
366   that the PetscMalloc() cannot easily be replaced with a PetscMalloc3().
367 
368  */
369 
370 PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
371 {
372   const PetscScalar *av;
373   const PetscInt    *ai, *aj, *ajj, M = A->rmap->n;
374   PetscInt64         nz, rnz, i, j, k;
375   PetscMUMPSInt     *row, *col;
376   Mat_SeqAIJ        *aa = (Mat_SeqAIJ *)A->data;
377 
378   PetscFunctionBegin;
379   PetscCall(MatSeqAIJGetArrayRead(A, &av));
380   mumps->val = (PetscScalar *)av;
381   if (reuse == MAT_INITIAL_MATRIX) {
382     nz = aa->nz;
383     ai = aa->i;
384     aj = aa->j;
385     PetscCall(PetscMalloc2(nz, &row, nz, &col));
386     for (i = k = 0; i < M; i++) {
387       rnz = ai[i + 1] - ai[i];
388       ajj = aj + ai[i];
389       for (j = 0; j < rnz; j++) {
390         PetscCall(PetscMUMPSIntCast(i + shift, &row[k]));
391         PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[k]));
392         k++;
393       }
394     }
395     mumps->irn = row;
396     mumps->jcn = col;
397     mumps->nnz = nz;
398   }
399   PetscCall(MatSeqAIJRestoreArrayRead(A, &av));
400   PetscFunctionReturn(PETSC_SUCCESS);
401 }
402 
403 PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
404 {
405   PetscInt64     nz, i, j, k, r;
406   Mat_SeqSELL   *a = (Mat_SeqSELL *)A->data;
407   PetscMUMPSInt *row, *col;
408 
409   PetscFunctionBegin;
410   mumps->val = a->val;
411   if (reuse == MAT_INITIAL_MATRIX) {
412     nz = a->sliidx[a->totalslices];
413     PetscCall(PetscMalloc2(nz, &row, nz, &col));
414     for (i = k = 0; i < a->totalslices; i++) {
415       for (j = a->sliidx[i], r = 0; j < a->sliidx[i + 1]; j++, r = ((r + 1) & 0x07)) PetscCall(PetscMUMPSIntCast(8 * i + r + shift, &row[k++]));
416     }
417     for (i = 0; i < nz; i++) PetscCall(PetscMUMPSIntCast(a->colidx[i] + shift, &col[i]));
418     mumps->irn = row;
419     mumps->jcn = col;
420     mumps->nnz = nz;
421   }
422   PetscFunctionReturn(PETSC_SUCCESS);
423 }
424 
425 PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
426 {
427   Mat_SeqBAIJ    *aa = (Mat_SeqBAIJ *)A->data;
428   const PetscInt *ai, *aj, *ajj, bs2 = aa->bs2;
429   PetscInt64      M, nz, idx = 0, rnz, i, j, k, m;
430   PetscInt        bs;
431   PetscMUMPSInt  *row, *col;
432 
433   PetscFunctionBegin;
434   PetscCall(MatGetBlockSize(A, &bs));
435   M          = A->rmap->N / bs;
436   mumps->val = aa->a;
437   if (reuse == MAT_INITIAL_MATRIX) {
438     ai = aa->i;
439     aj = aa->j;
440     nz = bs2 * aa->nz;
441     PetscCall(PetscMalloc2(nz, &row, nz, &col));
442     for (i = 0; i < M; i++) {
443       ajj = aj + ai[i];
444       rnz = ai[i + 1] - ai[i];
445       for (k = 0; k < rnz; k++) {
446         for (j = 0; j < bs; j++) {
447           for (m = 0; m < bs; m++) {
448             PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[idx]));
449             PetscCall(PetscMUMPSIntCast(bs * ajj[k] + j + shift, &col[idx]));
450             idx++;
451           }
452         }
453       }
454     }
455     mumps->irn = row;
456     mumps->jcn = col;
457     mumps->nnz = nz;
458   }
459   PetscFunctionReturn(PETSC_SUCCESS);
460 }
461 
462 PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
463 {
464   const PetscInt *ai, *aj, *ajj;
465   PetscInt        bs;
466   PetscInt64      nz, rnz, i, j, k, m;
467   PetscMUMPSInt  *row, *col;
468   PetscScalar    *val;
469   Mat_SeqSBAIJ   *aa  = (Mat_SeqSBAIJ *)A->data;
470   const PetscInt  bs2 = aa->bs2, mbs = aa->mbs;
471 #if defined(PETSC_USE_COMPLEX)
472   PetscBool isset, hermitian;
473 #endif
474 
475   PetscFunctionBegin;
476 #if defined(PETSC_USE_COMPLEX)
477   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
478   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
479 #endif
480   ai = aa->i;
481   aj = aa->j;
482   PetscCall(MatGetBlockSize(A, &bs));
483   if (reuse == MAT_INITIAL_MATRIX) {
484     const PetscInt64 alloc_size = aa->nz * bs2;
485 
486     PetscCall(PetscMalloc2(alloc_size, &row, alloc_size, &col));
487     if (bs > 1) {
488       PetscCall(PetscMalloc1(alloc_size, &mumps->val_alloc));
489       mumps->val = mumps->val_alloc;
490     } else {
491       mumps->val = aa->a;
492     }
493     mumps->irn = row;
494     mumps->jcn = col;
495   } else {
496     if (bs == 1) mumps->val = aa->a;
497     row = mumps->irn;
498     col = mumps->jcn;
499   }
500   val = mumps->val;
501 
502   nz = 0;
503   if (bs > 1) {
504     for (i = 0; i < mbs; i++) {
505       rnz = ai[i + 1] - ai[i];
506       ajj = aj + ai[i];
507       for (j = 0; j < rnz; j++) {
508         for (k = 0; k < bs; k++) {
509           for (m = 0; m < bs; m++) {
510             if (ajj[j] > i || k >= m) {
511               if (reuse == MAT_INITIAL_MATRIX) {
512                 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[nz]));
513                 PetscCall(PetscMUMPSIntCast(ajj[j] * bs + k + shift, &col[nz]));
514               }
515               val[nz++] = aa->a[(ai[i] + j) * bs2 + m + k * bs];
516             }
517           }
518         }
519       }
520     }
521   } else if (reuse == MAT_INITIAL_MATRIX) {
522     for (i = 0; i < mbs; i++) {
523       rnz = ai[i + 1] - ai[i];
524       ajj = aj + ai[i];
525       for (j = 0; j < rnz; j++) {
526         PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
527         PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz]));
528         nz++;
529       }
530     }
531     PetscCheck(nz == aa->nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different numbers of nonzeros %" PetscInt64_FMT " != %" PetscInt_FMT, nz, aa->nz);
532   }
533   if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz;
534   PetscFunctionReturn(PETSC_SUCCESS);
535 }
536 
537 PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
538 {
539   const PetscInt    *ai, *aj, *ajj, *adiag, M = A->rmap->n;
540   PetscInt64         nz, rnz, i, j;
541   const PetscScalar *av, *v1;
542   PetscScalar       *val;
543   PetscMUMPSInt     *row, *col;
544   Mat_SeqAIJ        *aa = (Mat_SeqAIJ *)A->data;
545   PetscBool          missing;
546 #if defined(PETSC_USE_COMPLEX)
547   PetscBool hermitian, isset;
548 #endif
549 
550   PetscFunctionBegin;
551 #if defined(PETSC_USE_COMPLEX)
552   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
553   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
554 #endif
555   PetscCall(MatSeqAIJGetArrayRead(A, &av));
556   ai    = aa->i;
557   aj    = aa->j;
558   adiag = aa->diag;
559   PetscCall(MatMissingDiagonal_SeqAIJ(A, &missing, NULL));
560   if (reuse == MAT_INITIAL_MATRIX) {
561     /* count nz in the upper triangular part of A */
562     nz = 0;
563     if (missing) {
564       for (i = 0; i < M; i++) {
565         if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
566           for (j = ai[i]; j < ai[i + 1]; j++) {
567             if (aj[j] < i) continue;
568             nz++;
569           }
570         } else {
571           nz += ai[i + 1] - adiag[i];
572         }
573       }
574     } else {
575       for (i = 0; i < M; i++) nz += ai[i + 1] - adiag[i];
576     }
577     PetscCall(PetscMalloc2(nz, &row, nz, &col));
578     PetscCall(PetscMalloc1(nz, &val));
579     mumps->nnz = nz;
580     mumps->irn = row;
581     mumps->jcn = col;
582     mumps->val = mumps->val_alloc = val;
583 
584     nz = 0;
585     if (missing) {
586       for (i = 0; i < M; i++) {
587         if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
588           for (j = ai[i]; j < ai[i + 1]; j++) {
589             if (aj[j] < i) continue;
590             PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
591             PetscCall(PetscMUMPSIntCast(aj[j] + shift, &col[nz]));
592             val[nz] = av[j];
593             nz++;
594           }
595         } else {
596           rnz = ai[i + 1] - adiag[i];
597           ajj = aj + adiag[i];
598           v1  = av + adiag[i];
599           for (j = 0; j < rnz; j++) {
600             PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
601             PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz]));
602             val[nz++] = v1[j];
603           }
604         }
605       }
606     } else {
607       for (i = 0; i < M; i++) {
608         rnz = ai[i + 1] - adiag[i];
609         ajj = aj + adiag[i];
610         v1  = av + adiag[i];
611         for (j = 0; j < rnz; j++) {
612           PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
613           PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz]));
614           val[nz++] = v1[j];
615         }
616       }
617     }
618   } else {
619     nz  = 0;
620     val = mumps->val;
621     if (missing) {
622       for (i = 0; i < M; i++) {
623         if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
624           for (j = ai[i]; j < ai[i + 1]; j++) {
625             if (aj[j] < i) continue;
626             val[nz++] = av[j];
627           }
628         } else {
629           rnz = ai[i + 1] - adiag[i];
630           v1  = av + adiag[i];
631           for (j = 0; j < rnz; j++) val[nz++] = v1[j];
632         }
633       }
634     } else {
635       for (i = 0; i < M; i++) {
636         rnz = ai[i + 1] - adiag[i];
637         v1  = av + adiag[i];
638         for (j = 0; j < rnz; j++) val[nz++] = v1[j];
639       }
640     }
641   }
642   PetscCall(MatSeqAIJRestoreArrayRead(A, &av));
643   PetscFunctionReturn(PETSC_SUCCESS);
644 }
645 
646 PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
647 {
648   const PetscInt    *ai, *aj, *bi, *bj, *garray, *ajj, *bjj;
649   PetscInt           bs;
650   PetscInt64         rstart, nz, i, j, k, m, jj, irow, countA, countB;
651   PetscMUMPSInt     *row, *col;
652   const PetscScalar *av, *bv, *v1, *v2;
653   PetscScalar       *val;
654   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ *)A->data;
655   Mat_SeqSBAIJ      *aa  = (Mat_SeqSBAIJ *)(mat->A)->data;
656   Mat_SeqBAIJ       *bb  = (Mat_SeqBAIJ *)(mat->B)->data;
657   const PetscInt     bs2 = aa->bs2, mbs = aa->mbs;
658 #if defined(PETSC_USE_COMPLEX)
659   PetscBool hermitian, isset;
660 #endif
661 
662   PetscFunctionBegin;
663 #if defined(PETSC_USE_COMPLEX)
664   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
665   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
666 #endif
667   PetscCall(MatGetBlockSize(A, &bs));
668   rstart = A->rmap->rstart;
669   ai     = aa->i;
670   aj     = aa->j;
671   bi     = bb->i;
672   bj     = bb->j;
673   av     = aa->a;
674   bv     = bb->a;
675 
676   garray = mat->garray;
677 
678   if (reuse == MAT_INITIAL_MATRIX) {
679     nz = (aa->nz + bb->nz) * bs2; /* just a conservative estimate */
680     PetscCall(PetscMalloc2(nz, &row, nz, &col));
681     PetscCall(PetscMalloc1(nz, &val));
682     /* can not decide the exact mumps->nnz now because of the SBAIJ */
683     mumps->irn = row;
684     mumps->jcn = col;
685     mumps->val = mumps->val_alloc = val;
686   } else {
687     val = mumps->val;
688   }
689 
690   jj   = 0;
691   irow = rstart;
692   for (i = 0; i < mbs; i++) {
693     ajj    = aj + ai[i]; /* ptr to the beginning of this row */
694     countA = ai[i + 1] - ai[i];
695     countB = bi[i + 1] - bi[i];
696     bjj    = bj + bi[i];
697     v1     = av + ai[i] * bs2;
698     v2     = bv + bi[i] * bs2;
699 
700     if (bs > 1) {
701       /* A-part */
702       for (j = 0; j < countA; j++) {
703         for (k = 0; k < bs; k++) {
704           for (m = 0; m < bs; m++) {
705             if (rstart + ajj[j] * bs > irow || k >= m) {
706               if (reuse == MAT_INITIAL_MATRIX) {
707                 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj]));
708                 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] * bs + k + shift, &col[jj]));
709               }
710               val[jj++] = v1[j * bs2 + m + k * bs];
711             }
712           }
713         }
714       }
715 
716       /* B-part */
717       for (j = 0; j < countB; j++) {
718         for (k = 0; k < bs; k++) {
719           for (m = 0; m < bs; m++) {
720             if (reuse == MAT_INITIAL_MATRIX) {
721               PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj]));
722               PetscCall(PetscMUMPSIntCast(garray[bjj[j]] * bs + k + shift, &col[jj]));
723             }
724             val[jj++] = v2[j * bs2 + m + k * bs];
725           }
726         }
727       }
728     } else {
729       /* A-part */
730       for (j = 0; j < countA; j++) {
731         if (reuse == MAT_INITIAL_MATRIX) {
732           PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
733           PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj]));
734         }
735         val[jj++] = v1[j];
736       }
737 
738       /* B-part */
739       for (j = 0; j < countB; j++) {
740         if (reuse == MAT_INITIAL_MATRIX) {
741           PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
742           PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]));
743         }
744         val[jj++] = v2[j];
745       }
746     }
747     irow += bs;
748   }
749   if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = jj;
750   PetscFunctionReturn(PETSC_SUCCESS);
751 }
752 
753 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
754 {
755   const PetscInt    *ai, *aj, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj;
756   PetscInt64         rstart, cstart, nz, i, j, jj, irow, countA, countB;
757   PetscMUMPSInt     *row, *col;
758   const PetscScalar *av, *bv, *v1, *v2;
759   PetscScalar       *val;
760   Mat                Ad, Ao;
761   Mat_SeqAIJ        *aa;
762   Mat_SeqAIJ        *bb;
763 
764   PetscFunctionBegin;
765   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray));
766   PetscCall(MatSeqAIJGetArrayRead(Ad, &av));
767   PetscCall(MatSeqAIJGetArrayRead(Ao, &bv));
768 
769   aa = (Mat_SeqAIJ *)(Ad)->data;
770   bb = (Mat_SeqAIJ *)(Ao)->data;
771   ai = aa->i;
772   aj = aa->j;
773   bi = bb->i;
774   bj = bb->j;
775 
776   rstart = A->rmap->rstart;
777   cstart = A->cmap->rstart;
778 
779   if (reuse == MAT_INITIAL_MATRIX) {
780     nz = (PetscInt64)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */
781     PetscCall(PetscMalloc2(nz, &row, nz, &col));
782     PetscCall(PetscMalloc1(nz, &val));
783     mumps->nnz = nz;
784     mumps->irn = row;
785     mumps->jcn = col;
786     mumps->val = mumps->val_alloc = val;
787   } else {
788     val = mumps->val;
789   }
790 
791   jj   = 0;
792   irow = rstart;
793   for (i = 0; i < m; i++) {
794     ajj    = aj + ai[i]; /* ptr to the beginning of this row */
795     countA = ai[i + 1] - ai[i];
796     countB = bi[i + 1] - bi[i];
797     bjj    = bj + bi[i];
798     v1     = av + ai[i];
799     v2     = bv + bi[i];
800 
801     /* A-part */
802     for (j = 0; j < countA; j++) {
803       if (reuse == MAT_INITIAL_MATRIX) {
804         PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
805         PetscCall(PetscMUMPSIntCast(cstart + ajj[j] + shift, &col[jj]));
806       }
807       val[jj++] = v1[j];
808     }
809 
810     /* B-part */
811     for (j = 0; j < countB; j++) {
812       if (reuse == MAT_INITIAL_MATRIX) {
813         PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
814         PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]));
815       }
816       val[jj++] = v2[j];
817     }
818     irow++;
819   }
820   PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av));
821   PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv));
822   PetscFunctionReturn(PETSC_SUCCESS);
823 }
824 
825 PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
826 {
827   Mat_MPIBAIJ       *mat = (Mat_MPIBAIJ *)A->data;
828   Mat_SeqBAIJ       *aa  = (Mat_SeqBAIJ *)(mat->A)->data;
829   Mat_SeqBAIJ       *bb  = (Mat_SeqBAIJ *)(mat->B)->data;
830   const PetscInt    *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j, *ajj, *bjj;
831   const PetscInt    *garray = mat->garray, mbs = mat->mbs, rstart = A->rmap->rstart, cstart = A->cmap->rstart;
832   const PetscInt     bs2 = mat->bs2;
833   PetscInt           bs;
834   PetscInt64         nz, i, j, k, n, jj, irow, countA, countB, idx;
835   PetscMUMPSInt     *row, *col;
836   const PetscScalar *av = aa->a, *bv = bb->a, *v1, *v2;
837   PetscScalar       *val;
838 
839   PetscFunctionBegin;
840   PetscCall(MatGetBlockSize(A, &bs));
841   if (reuse == MAT_INITIAL_MATRIX) {
842     nz = bs2 * (aa->nz + bb->nz);
843     PetscCall(PetscMalloc2(nz, &row, nz, &col));
844     PetscCall(PetscMalloc1(nz, &val));
845     mumps->nnz = nz;
846     mumps->irn = row;
847     mumps->jcn = col;
848     mumps->val = mumps->val_alloc = val;
849   } else {
850     val = mumps->val;
851   }
852 
853   jj   = 0;
854   irow = rstart;
855   for (i = 0; i < mbs; i++) {
856     countA = ai[i + 1] - ai[i];
857     countB = bi[i + 1] - bi[i];
858     ajj    = aj + ai[i];
859     bjj    = bj + bi[i];
860     v1     = av + bs2 * ai[i];
861     v2     = bv + bs2 * bi[i];
862 
863     idx = 0;
864     /* A-part */
865     for (k = 0; k < countA; k++) {
866       for (j = 0; j < bs; j++) {
867         for (n = 0; n < bs; n++) {
868           if (reuse == MAT_INITIAL_MATRIX) {
869             PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj]));
870             PetscCall(PetscMUMPSIntCast(cstart + bs * ajj[k] + j + shift, &col[jj]));
871           }
872           val[jj++] = v1[idx++];
873         }
874       }
875     }
876 
877     idx = 0;
878     /* B-part */
879     for (k = 0; k < countB; k++) {
880       for (j = 0; j < bs; j++) {
881         for (n = 0; n < bs; n++) {
882           if (reuse == MAT_INITIAL_MATRIX) {
883             PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj]));
884             PetscCall(PetscMUMPSIntCast(bs * garray[bjj[k]] + j + shift, &col[jj]));
885           }
886           val[jj++] = v2[idx++];
887         }
888       }
889     }
890     irow += bs;
891   }
892   PetscFunctionReturn(PETSC_SUCCESS);
893 }
894 
895 PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
896 {
897   const PetscInt    *ai, *aj, *adiag, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj;
898   PetscInt64         rstart, nz, nza, nzb, i, j, jj, irow, countA, countB;
899   PetscMUMPSInt     *row, *col;
900   const PetscScalar *av, *bv, *v1, *v2;
901   PetscScalar       *val;
902   Mat                Ad, Ao;
903   Mat_SeqAIJ        *aa;
904   Mat_SeqAIJ        *bb;
905 #if defined(PETSC_USE_COMPLEX)
906   PetscBool hermitian, isset;
907 #endif
908 
909   PetscFunctionBegin;
910 #if defined(PETSC_USE_COMPLEX)
911   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
912   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
913 #endif
914   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray));
915   PetscCall(MatSeqAIJGetArrayRead(Ad, &av));
916   PetscCall(MatSeqAIJGetArrayRead(Ao, &bv));
917 
918   aa    = (Mat_SeqAIJ *)(Ad)->data;
919   bb    = (Mat_SeqAIJ *)(Ao)->data;
920   ai    = aa->i;
921   aj    = aa->j;
922   adiag = aa->diag;
923   bi    = bb->i;
924   bj    = bb->j;
925 
926   rstart = A->rmap->rstart;
927 
928   if (reuse == MAT_INITIAL_MATRIX) {
929     nza = 0; /* num of upper triangular entries in mat->A, including diagonals */
930     nzb = 0; /* num of upper triangular entries in mat->B */
931     for (i = 0; i < m; i++) {
932       nza += (ai[i + 1] - adiag[i]);
933       countB = bi[i + 1] - bi[i];
934       bjj    = bj + bi[i];
935       for (j = 0; j < countB; j++) {
936         if (garray[bjj[j]] > rstart) nzb++;
937       }
938     }
939 
940     nz = nza + nzb; /* total nz of upper triangular part of mat */
941     PetscCall(PetscMalloc2(nz, &row, nz, &col));
942     PetscCall(PetscMalloc1(nz, &val));
943     mumps->nnz = nz;
944     mumps->irn = row;
945     mumps->jcn = col;
946     mumps->val = mumps->val_alloc = val;
947   } else {
948     val = mumps->val;
949   }
950 
951   jj   = 0;
952   irow = rstart;
953   for (i = 0; i < m; i++) {
954     ajj    = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */
955     v1     = av + adiag[i];
956     countA = ai[i + 1] - adiag[i];
957     countB = bi[i + 1] - bi[i];
958     bjj    = bj + bi[i];
959     v2     = bv + bi[i];
960 
961     /* A-part */
962     for (j = 0; j < countA; j++) {
963       if (reuse == MAT_INITIAL_MATRIX) {
964         PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
965         PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj]));
966       }
967       val[jj++] = v1[j];
968     }
969 
970     /* B-part */
971     for (j = 0; j < countB; j++) {
972       if (garray[bjj[j]] > rstart) {
973         if (reuse == MAT_INITIAL_MATRIX) {
974           PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
975           PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]));
976         }
977         val[jj++] = v2[j];
978       }
979     }
980     irow++;
981   }
982   PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av));
983   PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv));
984   PetscFunctionReturn(PETSC_SUCCESS);
985 }
986 
987 PetscErrorCode MatConvertToTriples_nest_xaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
988 {
989   Mat     **mats;
990   PetscInt  nr, nc;
991   PetscBool chol = mumps->sym ? PETSC_TRUE : PETSC_FALSE;
992 
993   PetscFunctionBegin;
994   PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats));
995   if (reuse == MAT_INITIAL_MATRIX) {
996     PetscMUMPSInt *irns, *jcns;
997     PetscScalar   *vals;
998     PetscInt64     totnnz, cumnnz, maxnnz;
999     PetscInt      *pjcns_w;
1000     IS            *rows, *cols;
1001     PetscInt     **rows_idx, **cols_idx;
1002 
1003     cumnnz = 0;
1004     maxnnz = 0;
1005     PetscCall(PetscMalloc2(nr * nc + 1, &mumps->nest_vals_start, nr * nc, &mumps->nest_convert_to_triples));
1006     for (PetscInt r = 0; r < nr; r++) {
1007       for (PetscInt c = 0; c < nc; c++) {
1008         Mat sub = mats[r][c];
1009 
1010         mumps->nest_convert_to_triples[r * nc + c] = NULL;
1011         if (chol && c < r) continue; /* skip lower-triangular block for Cholesky */
1012         if (sub) {
1013           PetscErrorCode (*convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *) = NULL;
1014           PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isTrans, isHTrans = PETSC_FALSE;
1015           MatInfo   info;
1016 
1017           PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans));
1018           if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub));
1019           else {
1020             PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans));
1021             if (isHTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub));
1022           }
1023           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ));
1024           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ));
1025           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ));
1026           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ));
1027           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ));
1028           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ));
1029 
1030           if (chol) {
1031             if (r == c) {
1032               if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqsbaij;
1033               else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpisbaij;
1034               else if (isSeqSBAIJ) convert_to_triples = MatConvertToTriples_seqsbaij_seqsbaij;
1035               else if (isMPISBAIJ) convert_to_triples = MatConvertToTriples_mpisbaij_mpisbaij;
1036             } else {
1037               if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij;
1038               else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij;
1039               else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij;
1040               else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij;
1041             }
1042           } else {
1043             if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij;
1044             else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij;
1045             else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij;
1046             else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij;
1047           }
1048           PetscCheck(convert_to_triples, PetscObjectComm((PetscObject)sub), PETSC_ERR_SUP, "Not for block of type %s", ((PetscObject)sub)->type_name);
1049           mumps->nest_convert_to_triples[r * nc + c] = convert_to_triples;
1050           PetscCall(MatGetInfo(sub, MAT_LOCAL, &info));
1051           cumnnz += (PetscInt64)info.nz_used; /* can be overestimated for Cholesky */
1052           maxnnz = PetscMax(maxnnz, info.nz_used);
1053         }
1054       }
1055     }
1056 
1057     /* Allocate total COO */
1058     totnnz = cumnnz;
1059     PetscCall(PetscMalloc2(totnnz, &irns, totnnz, &jcns));
1060     PetscCall(PetscMalloc1(totnnz, &vals));
1061 
1062     /* Handle rows and column maps
1063        We directly map rows and use an SF for the columns */
1064     PetscCall(PetscMalloc4(nr, &rows, nc, &cols, nr, &rows_idx, nc, &cols_idx));
1065     PetscCall(MatNestGetISs(A, rows, cols));
1066     for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetIndices(rows[r], (const PetscInt **)&rows_idx[r]));
1067     for (PetscInt c = 0; c < nc; c++) PetscCall(ISGetIndices(cols[c], (const PetscInt **)&cols_idx[c]));
1068     if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscMalloc1(maxnnz, &pjcns_w));
1069     else (void)maxnnz;
1070 
1071     cumnnz = 0;
1072     for (PetscInt r = 0; r < nr; r++) {
1073       for (PetscInt c = 0; c < nc; c++) {
1074         Mat             sub  = mats[r][c];
1075         const PetscInt *ridx = rows_idx[r];
1076         const PetscInt *cidx = cols_idx[c];
1077         PetscInt        rst;
1078         PetscSF         csf;
1079         PetscBool       isTrans, isHTrans = PETSC_FALSE, swap;
1080         PetscLayout     cmap;
1081 
1082         mumps->nest_vals_start[r * nc + c] = cumnnz;
1083         if (!mumps->nest_convert_to_triples[r * nc + c]) continue;
1084 
1085         /* Extract inner blocks if needed */
1086         PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans));
1087         if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub));
1088         else {
1089           PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans));
1090           if (isHTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub));
1091         }
1092         swap = (PetscBool)(isTrans || isHTrans);
1093 
1094         /* Get column layout to map off-process columns */
1095         PetscCall(MatGetLayouts(sub, NULL, &cmap));
1096 
1097         /* Get row start to map on-process rows */
1098         PetscCall(MatGetOwnershipRange(sub, &rst, NULL));
1099 
1100         /* Directly use the mumps datastructure and use C ordering for now */
1101         PetscCall((*mumps->nest_convert_to_triples[r * nc + c])(sub, 0, MAT_INITIAL_MATRIX, mumps));
1102 
1103         /* Swap the role of rows and columns indices for transposed blocks
1104            since we need values with global final ordering */
1105         if (swap) {
1106           cidx = rows_idx[r];
1107           ridx = cols_idx[c];
1108         }
1109 
1110         /* Communicate column indices
1111            This could have been done with a single SF but it would have complicated the code a lot.
1112            But since we do it only once, we pay the price of setting up an SF for each block */
1113         if (PetscDefined(USE_64BIT_INDICES)) {
1114           for (PetscInt k = 0; k < mumps->nnz; k++) pjcns_w[k] = mumps->jcn[k];
1115         } else pjcns_w = (PetscInt *)(mumps->jcn); /* This cast is needed only to silence warnings for 64bit integers builds */
1116         PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &csf));
1117         PetscCall(PetscSFSetGraphLayout(csf, cmap, mumps->nnz, NULL, PETSC_OWN_POINTER, pjcns_w));
1118         PetscCall(PetscSFBcastBegin(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE));
1119         PetscCall(PetscSFBcastEnd(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE));
1120         PetscCall(PetscSFDestroy(&csf));
1121 
1122         /* Import indices: use direct map for rows and mapped indices for columns */
1123         if (swap) {
1124           for (PetscInt k = 0; k < mumps->nnz; k++) {
1125             PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &jcns[cumnnz + k]));
1126             PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &irns[cumnnz + k]));
1127           }
1128         } else {
1129           for (PetscInt k = 0; k < mumps->nnz; k++) {
1130             PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &irns[cumnnz + k]));
1131             PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &jcns[cumnnz + k]));
1132           }
1133         }
1134 
1135         /* Import values to full COO */
1136         if (isHTrans) { /* conjugate the entries */
1137           for (PetscInt k = 0; k < mumps->nnz; k++) mumps->val[k] = PetscConj(mumps->val[k]);
1138         }
1139         PetscCall(PetscArraycpy(vals + cumnnz, mumps->val, mumps->nnz));
1140 
1141         /* Shift new starting point and sanity check */
1142         cumnnz += mumps->nnz;
1143         PetscCheck(cumnnz <= totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected number of nonzeros %" PetscInt64_FMT " != %" PetscInt64_FMT, cumnnz, totnnz);
1144 
1145         /* Free scratch memory */
1146         PetscCall(PetscFree2(mumps->irn, mumps->jcn));
1147         PetscCall(PetscFree(mumps->val_alloc));
1148         mumps->val = NULL;
1149         mumps->nnz = 0;
1150       }
1151     }
1152     if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscFree(pjcns_w));
1153     for (PetscInt r = 0; r < nr; r++) PetscCall(ISRestoreIndices(rows[r], (const PetscInt **)&rows_idx[r]));
1154     for (PetscInt c = 0; c < nc; c++) PetscCall(ISRestoreIndices(cols[c], (const PetscInt **)&cols_idx[c]));
1155     PetscCall(PetscFree4(rows, cols, rows_idx, cols_idx));
1156     if (!chol) PetscCheck(cumnnz == totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different number of nonzeros %" PetscInt64_FMT " != %" PetscInt64_FMT, cumnnz, totnnz);
1157     mumps->nest_vals_start[nr * nc] = cumnnz;
1158 
1159     /* Set pointers for final MUMPS data structure */
1160     mumps->nest_vals = vals;
1161     mumps->val_alloc = NULL; /* do not use val_alloc since it may be reallocated with the OMP callpath */
1162     mumps->val       = vals;
1163     mumps->irn       = irns;
1164     mumps->jcn       = jcns;
1165     mumps->nnz       = cumnnz;
1166   } else {
1167     PetscScalar *oval = mumps->nest_vals;
1168     for (PetscInt r = 0; r < nr; r++) {
1169       for (PetscInt c = 0; c < nc; c++) {
1170         PetscBool isTrans, isHTrans = PETSC_FALSE;
1171         Mat       sub  = mats[r][c];
1172         PetscInt  midx = r * nc + c;
1173 
1174         if (!mumps->nest_convert_to_triples[midx]) continue;
1175         PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans));
1176         if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub));
1177         else {
1178           PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans));
1179           if (isHTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub));
1180         }
1181         mumps->val = oval + mumps->nest_vals_start[midx];
1182         PetscCall((*mumps->nest_convert_to_triples[midx])(sub, shift, MAT_REUSE_MATRIX, mumps));
1183         if (isHTrans) {
1184           PetscInt nnz = mumps->nest_vals_start[midx + 1] - mumps->nest_vals_start[midx];
1185           for (PetscInt k = 0; k < nnz; k++) mumps->val[k] = PetscConj(mumps->val[k]);
1186         }
1187       }
1188     }
1189     mumps->val = oval;
1190   }
1191   PetscFunctionReturn(PETSC_SUCCESS);
1192 }
1193 
1194 PetscErrorCode MatDestroy_MUMPS(Mat A)
1195 {
1196   Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;
1197 
1198   PetscFunctionBegin;
1199   PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc));
1200   PetscCall(VecScatterDestroy(&mumps->scat_rhs));
1201   PetscCall(VecScatterDestroy(&mumps->scat_sol));
1202   PetscCall(VecDestroy(&mumps->b_seq));
1203   PetscCall(VecDestroy(&mumps->x_seq));
1204   PetscCall(PetscFree(mumps->id.perm_in));
1205   PetscCall(PetscFree2(mumps->irn, mumps->jcn));
1206   PetscCall(PetscFree(mumps->val_alloc));
1207   PetscCall(PetscFree(mumps->info));
1208   PetscCall(PetscFree(mumps->ICNTL_pre));
1209   PetscCall(PetscFree(mumps->CNTL_pre));
1210   PetscCall(MatMumpsResetSchur_Private(mumps));
1211   if (mumps->id.job != JOB_NULL) { /* cannot call PetscMUMPS_c() if JOB_INIT has never been called for this instance */
1212     mumps->id.job = JOB_END;
1213     PetscMUMPS_c(mumps);
1214     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in termination: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
1215     if (mumps->mumps_comm != MPI_COMM_NULL) {
1216       if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) PetscCallMPI(MPI_Comm_free(&mumps->mumps_comm));
1217       else PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A), &mumps->mumps_comm));
1218     }
1219   }
1220 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1221   if (mumps->use_petsc_omp_support) {
1222     PetscCall(PetscOmpCtrlDestroy(&mumps->omp_ctrl));
1223     PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf));
1224     PetscCall(PetscFree3(mumps->rhs_nrow, mumps->rhs_recvcounts, mumps->rhs_disps));
1225   }
1226 #endif
1227   PetscCall(PetscFree(mumps->ia_alloc));
1228   PetscCall(PetscFree(mumps->ja_alloc));
1229   PetscCall(PetscFree(mumps->recvcount));
1230   PetscCall(PetscFree(mumps->reqs));
1231   PetscCall(PetscFree(mumps->irhs_loc));
1232   PetscCall(PetscFree2(mumps->nest_vals_start, mumps->nest_convert_to_triples));
1233   PetscCall(PetscFree(mumps->nest_vals));
1234   PetscCall(PetscFree(A->data));
1235 
1236   /* clear composed functions */
1237   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
1238   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorSetSchurIS_C", NULL));
1239   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorCreateSchurComplement_C", NULL));
1240   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetIcntl_C", NULL));
1241   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetIcntl_C", NULL));
1242   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetCntl_C", NULL));
1243   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetCntl_C", NULL));
1244   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfo_C", NULL));
1245   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfog_C", NULL));
1246   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfo_C", NULL));
1247   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfog_C", NULL));
1248   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetNullPivots_C", NULL));
1249   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverse_C", NULL));
1250   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverseTranspose_C", NULL));
1251   PetscFunctionReturn(PETSC_SUCCESS);
1252 }
1253 
1254 /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */
1255 static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A, PetscInt nrhs, const PetscScalar *array)
1256 {
1257   Mat_MUMPS        *mumps   = (Mat_MUMPS *)A->data;
1258   const PetscMPIInt ompsize = mumps->omp_comm_size;
1259   PetscInt          i, m, M, rstart;
1260 
1261   PetscFunctionBegin;
1262   PetscCall(MatGetSize(A, &M, NULL));
1263   PetscCall(MatGetLocalSize(A, &m, NULL));
1264   PetscCheck(M <= PETSC_MUMPS_INT_MAX, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt");
1265   if (ompsize == 1) {
1266     if (!mumps->irhs_loc) {
1267       mumps->nloc_rhs = m;
1268       PetscCall(PetscMalloc1(m, &mumps->irhs_loc));
1269       PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
1270       for (i = 0; i < m; i++) mumps->irhs_loc[i] = rstart + i + 1; /* use 1-based indices */
1271     }
1272     mumps->id.rhs_loc = (MumpsScalar *)array;
1273   } else {
1274 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1275     const PetscInt *ranges;
1276     PetscMPIInt     j, k, sendcount, *petsc_ranks, *omp_ranks;
1277     MPI_Group       petsc_group, omp_group;
1278     PetscScalar    *recvbuf = NULL;
1279 
1280     if (mumps->is_omp_master) {
1281       /* Lazily initialize the omp stuff for distributed rhs */
1282       if (!mumps->irhs_loc) {
1283         PetscCall(PetscMalloc2(ompsize, &omp_ranks, ompsize, &petsc_ranks));
1284         PetscCall(PetscMalloc3(ompsize, &mumps->rhs_nrow, ompsize, &mumps->rhs_recvcounts, ompsize, &mumps->rhs_disps));
1285         PetscCallMPI(MPI_Comm_group(mumps->petsc_comm, &petsc_group));
1286         PetscCallMPI(MPI_Comm_group(mumps->omp_comm, &omp_group));
1287         for (j = 0; j < ompsize; j++) omp_ranks[j] = j;
1288         PetscCallMPI(MPI_Group_translate_ranks(omp_group, ompsize, omp_ranks, petsc_group, petsc_ranks));
1289 
1290         /* Populate mumps->irhs_loc[], rhs_nrow[] */
1291         mumps->nloc_rhs = 0;
1292         PetscCall(MatGetOwnershipRanges(A, &ranges));
1293         for (j = 0; j < ompsize; j++) {
1294           mumps->rhs_nrow[j] = ranges[petsc_ranks[j] + 1] - ranges[petsc_ranks[j]];
1295           mumps->nloc_rhs += mumps->rhs_nrow[j];
1296         }
1297         PetscCall(PetscMalloc1(mumps->nloc_rhs, &mumps->irhs_loc));
1298         for (j = k = 0; j < ompsize; j++) {
1299           for (i = ranges[petsc_ranks[j]]; i < ranges[petsc_ranks[j] + 1]; i++, k++) mumps->irhs_loc[k] = i + 1; /* uses 1-based indices */
1300         }
1301 
1302         PetscCall(PetscFree2(omp_ranks, petsc_ranks));
1303         PetscCallMPI(MPI_Group_free(&petsc_group));
1304         PetscCallMPI(MPI_Group_free(&omp_group));
1305       }
1306 
1307       /* Realloc buffers when current nrhs is bigger than what we have met */
1308       if (nrhs > mumps->max_nrhs) {
1309         PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf));
1310         PetscCall(PetscMalloc2(mumps->nloc_rhs * nrhs, &mumps->rhs_loc, mumps->nloc_rhs * nrhs, &mumps->rhs_recvbuf));
1311         mumps->max_nrhs = nrhs;
1312       }
1313 
1314       /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */
1315       for (j = 0; j < ompsize; j++) PetscCall(PetscMPIIntCast(mumps->rhs_nrow[j] * nrhs, &mumps->rhs_recvcounts[j]));
1316       mumps->rhs_disps[0] = 0;
1317       for (j = 1; j < ompsize; j++) {
1318         mumps->rhs_disps[j] = mumps->rhs_disps[j - 1] + mumps->rhs_recvcounts[j - 1];
1319         PetscCheck(mumps->rhs_disps[j] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscMPIInt overflow!");
1320       }
1321       recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */
1322     }
1323 
1324     PetscCall(PetscMPIIntCast(m * nrhs, &sendcount));
1325     PetscCallMPI(MPI_Gatherv(array, sendcount, MPIU_SCALAR, recvbuf, mumps->rhs_recvcounts, mumps->rhs_disps, MPIU_SCALAR, 0, mumps->omp_comm));
1326 
1327     if (mumps->is_omp_master) {
1328       if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */
1329         PetscScalar *dst, *dstbase = mumps->rhs_loc;
1330         for (j = 0; j < ompsize; j++) {
1331           const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j];
1332           dst                    = dstbase;
1333           for (i = 0; i < nrhs; i++) {
1334             PetscCall(PetscArraycpy(dst, src, mumps->rhs_nrow[j]));
1335             src += mumps->rhs_nrow[j];
1336             dst += mumps->nloc_rhs;
1337           }
1338           dstbase += mumps->rhs_nrow[j];
1339         }
1340       }
1341       mumps->id.rhs_loc = (MumpsScalar *)mumps->rhs_loc;
1342     }
1343 #endif /* PETSC_HAVE_OPENMP_SUPPORT */
1344   }
1345   mumps->id.nrhs     = nrhs;
1346   mumps->id.nloc_rhs = mumps->nloc_rhs;
1347   mumps->id.lrhs_loc = mumps->nloc_rhs;
1348   mumps->id.irhs_loc = mumps->irhs_loc;
1349   PetscFunctionReturn(PETSC_SUCCESS);
1350 }
1351 
1352 PetscErrorCode MatSolve_MUMPS(Mat A, Vec b, Vec x)
1353 {
1354   Mat_MUMPS         *mumps  = (Mat_MUMPS *)A->data;
1355   const PetscScalar *rarray = NULL;
1356   PetscScalar       *array;
1357   IS                 is_iden, is_petsc;
1358   PetscInt           i;
1359   PetscBool          second_solve = PETSC_FALSE;
1360   static PetscBool   cite1 = PETSC_FALSE, cite2 = PETSC_FALSE;
1361 
1362   PetscFunctionBegin;
1363   PetscCall(PetscCitationsRegister("@article{MUMPS01,\n  author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n  title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n  journal = {SIAM "
1364                                    "Journal on Matrix Analysis and Applications},\n  volume = {23},\n  number = {1},\n  pages = {15--41},\n  year = {2001}\n}\n",
1365                                    &cite1));
1366   PetscCall(PetscCitationsRegister("@article{MUMPS02,\n  author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n  title = {Hybrid scheduling for the parallel solution of linear systems},\n  journal = {Parallel "
1367                                    "Computing},\n  volume = {32},\n  number = {2},\n  pages = {136--156},\n  year = {2006}\n}\n",
1368                                    &cite2));
1369 
1370   if (A->factorerrortype) {
1371     PetscCall(PetscInfo(A, "MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1372     PetscCall(VecSetInf(x));
1373     PetscFunctionReturn(PETSC_SUCCESS);
1374   }
1375 
1376   mumps->id.nrhs = 1;
1377   if (mumps->petsc_size > 1) {
1378     if (mumps->ICNTL20 == 10) {
1379       mumps->id.ICNTL(20) = 10; /* dense distributed RHS */
1380       PetscCall(VecGetArrayRead(b, &rarray));
1381       PetscCall(MatMumpsSetUpDistRHSInfo(A, 1, rarray));
1382     } else {
1383       mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential rhs vector*/
1384       PetscCall(VecScatterBegin(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD));
1385       PetscCall(VecScatterEnd(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD));
1386       if (!mumps->myid) {
1387         PetscCall(VecGetArray(mumps->b_seq, &array));
1388         mumps->id.rhs = (MumpsScalar *)array;
1389       }
1390     }
1391   } else {                   /* petsc_size == 1 */
1392     mumps->id.ICNTL(20) = 0; /* dense centralized RHS */
1393     PetscCall(VecCopy(b, x));
1394     PetscCall(VecGetArray(x, &array));
1395     mumps->id.rhs = (MumpsScalar *)array;
1396   }
1397 
1398   /*
1399      handle condensation step of Schur complement (if any)
1400      We set by default ICNTL(26) == -1 when Schur indices have been provided by the user.
1401      According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase
1402      Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system.
1403      This requires an extra call to PetscMUMPS_c and the computation of the factors for S
1404   */
1405   if (mumps->id.size_schur > 0) {
1406     PetscCheck(mumps->petsc_size <= 1, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc");
1407     if (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2) {
1408       second_solve = PETSC_TRUE;
1409       PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE));
1410       mumps->id.ICNTL(26) = 1; /* condensation phase */
1411     } else if (mumps->id.ICNTL(26) == 1) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE));
1412   }
1413   /* solve phase */
1414   mumps->id.job = JOB_SOLVE;
1415   PetscMUMPS_c(mumps);
1416   PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
1417 
1418   /* handle expansion step of Schur complement (if any) */
1419   if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE));
1420   else if (mumps->id.ICNTL(26) == 1) {
1421     PetscCall(MatMumpsSolveSchur_Private(A));
1422     for (i = 0; i < mumps->id.size_schur; ++i) {
1423 #if !defined(PETSC_USE_COMPLEX)
1424       PetscScalar val = mumps->id.redrhs[i];
1425 #else
1426       PetscScalar val = mumps->id.redrhs[i].r + PETSC_i * mumps->id.redrhs[i].i;
1427 #endif
1428       array[mumps->id.listvar_schur[i] - 1] = val;
1429     }
1430   }
1431 
1432   if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */
1433     if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) {
1434       /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */
1435       PetscCall(VecScatterDestroy(&mumps->scat_sol));
1436     }
1437     if (!mumps->scat_sol) { /* create scatter scat_sol */
1438       PetscInt *isol2_loc = NULL;
1439       PetscCall(ISCreateStride(PETSC_COMM_SELF, mumps->id.lsol_loc, 0, 1, &is_iden)); /* from */
1440       PetscCall(PetscMalloc1(mumps->id.lsol_loc, &isol2_loc));
1441       for (i = 0; i < mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i] - 1;                        /* change Fortran style to C style */
1442       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, mumps->id.lsol_loc, isol2_loc, PETSC_OWN_POINTER, &is_petsc)); /* to */
1443       PetscCall(VecScatterCreate(mumps->x_seq, is_iden, x, is_petsc, &mumps->scat_sol));
1444       PetscCall(ISDestroy(&is_iden));
1445       PetscCall(ISDestroy(&is_petsc));
1446       mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */
1447     }
1448 
1449     PetscCall(VecScatterBegin(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD));
1450     PetscCall(VecScatterEnd(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD));
1451   }
1452 
1453   if (mumps->petsc_size > 1) {
1454     if (mumps->ICNTL20 == 10) {
1455       PetscCall(VecRestoreArrayRead(b, &rarray));
1456     } else if (!mumps->myid) {
1457       PetscCall(VecRestoreArray(mumps->b_seq, &array));
1458     }
1459   } else PetscCall(VecRestoreArray(x, &array));
1460 
1461   PetscCall(PetscLogFlops(2.0 * PetscMax(0, (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n)));
1462   PetscFunctionReturn(PETSC_SUCCESS);
1463 }
1464 
1465 PetscErrorCode MatSolveTranspose_MUMPS(Mat A, Vec b, Vec x)
1466 {
1467   Mat_MUMPS          *mumps = (Mat_MUMPS *)A->data;
1468   const PetscMUMPSInt value = mumps->id.ICNTL(9);
1469 
1470   PetscFunctionBegin;
1471   mumps->id.ICNTL(9) = 0;
1472   PetscCall(MatSolve_MUMPS(A, b, x));
1473   mumps->id.ICNTL(9) = value;
1474   PetscFunctionReturn(PETSC_SUCCESS);
1475 }
1476 
1477 PetscErrorCode MatMatSolve_MUMPS(Mat A, Mat B, Mat X)
1478 {
1479   Mat                Bt = NULL;
1480   PetscBool          denseX, denseB, flg, flgT;
1481   Mat_MUMPS         *mumps = (Mat_MUMPS *)A->data;
1482   PetscInt           i, nrhs, M;
1483   PetscScalar       *array;
1484   const PetscScalar *rbray;
1485   PetscInt           lsol_loc, nlsol_loc, *idxx, iidx = 0;
1486   PetscMUMPSInt     *isol_loc, *isol_loc_save;
1487   PetscScalar       *bray, *sol_loc, *sol_loc_save;
1488   IS                 is_to, is_from;
1489   PetscInt           k, proc, j, m, myrstart;
1490   const PetscInt    *rstart;
1491   Vec                v_mpi, msol_loc;
1492   VecScatter         scat_sol;
1493   Vec                b_seq;
1494   VecScatter         scat_rhs;
1495   PetscScalar       *aa;
1496   PetscInt           spnr, *ia, *ja;
1497   Mat_MPIAIJ        *b = NULL;
1498 
1499   PetscFunctionBegin;
1500   PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &denseX, MATSEQDENSE, MATMPIDENSE, NULL));
1501   PetscCheck(denseX, PetscObjectComm((PetscObject)X), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix");
1502 
1503   PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &denseB, MATSEQDENSE, MATMPIDENSE, NULL));
1504   if (denseB) {
1505     PetscCheck(B->rmap->n == X->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Matrix B and X must have same row distribution");
1506     mumps->id.ICNTL(20) = 0; /* dense RHS */
1507   } else {                   /* sparse B */
1508     PetscCheck(X != B, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_IDN, "X and B must be different matrices");
1509     PetscCall(PetscObjectTypeCompare((PetscObject)B, MATTRANSPOSEVIRTUAL, &flgT));
1510     if (flgT) { /* input B is transpose of actual RHS matrix,
1511                  because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */
1512       PetscCall(MatTransposeGetMat(B, &Bt));
1513     } else SETERRQ(PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_WRONG, "Matrix B must be MATTRANSPOSEVIRTUAL matrix");
1514     mumps->id.ICNTL(20) = 1; /* sparse RHS */
1515   }
1516 
1517   PetscCall(MatGetSize(B, &M, &nrhs));
1518   mumps->id.nrhs = nrhs;
1519   mumps->id.lrhs = M;
1520   mumps->id.rhs  = NULL;
1521 
1522   if (mumps->petsc_size == 1) {
1523     PetscScalar *aa;
1524     PetscInt     spnr, *ia, *ja;
1525     PetscBool    second_solve = PETSC_FALSE;
1526 
1527     PetscCall(MatDenseGetArray(X, &array));
1528     mumps->id.rhs = (MumpsScalar *)array;
1529 
1530     if (denseB) {
1531       /* copy B to X */
1532       PetscCall(MatDenseGetArrayRead(B, &rbray));
1533       PetscCall(PetscArraycpy(array, rbray, M * nrhs));
1534       PetscCall(MatDenseRestoreArrayRead(B, &rbray));
1535     } else { /* sparse B */
1536       PetscCall(MatSeqAIJGetArray(Bt, &aa));
1537       PetscCall(MatGetRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
1538       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
1539       PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs));
1540       mumps->id.rhs_sparse = (MumpsScalar *)aa;
1541     }
1542     /* handle condensation step of Schur complement (if any) */
1543     if (mumps->id.size_schur > 0) {
1544       PetscCheck(mumps->petsc_size <= 1, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc");
1545       if (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2) {
1546         second_solve = PETSC_TRUE;
1547         PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE));
1548         mumps->id.ICNTL(26) = 1; /* condensation phase */
1549       } else if (mumps->id.ICNTL(26) == 1) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE));
1550     }
1551     /* solve phase */
1552     mumps->id.job = JOB_SOLVE;
1553     PetscMUMPS_c(mumps);
1554     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
1555 
1556     /* handle expansion step of Schur complement (if any) */
1557     if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE));
1558     else if (mumps->id.ICNTL(26) == 1) {
1559       PetscCall(MatMumpsSolveSchur_Private(A));
1560       for (j = 0; j < nrhs; ++j)
1561         for (i = 0; i < mumps->id.size_schur; ++i) {
1562 #if !defined(PETSC_USE_COMPLEX)
1563           PetscScalar val = mumps->id.redrhs[i + j * mumps->id.lredrhs];
1564 #else
1565           PetscScalar val = mumps->id.redrhs[i + j * mumps->id.lredrhs].r + PETSC_i * mumps->id.redrhs[i + j * mumps->id.lredrhs].i;
1566 #endif
1567           array[mumps->id.listvar_schur[i] - 1 + j * M] = val;
1568         }
1569     }
1570     if (!denseB) { /* sparse B */
1571       PetscCall(MatSeqAIJRestoreArray(Bt, &aa));
1572       PetscCall(MatRestoreRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
1573       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure");
1574     }
1575     PetscCall(MatDenseRestoreArray(X, &array));
1576     PetscFunctionReturn(PETSC_SUCCESS);
1577   }
1578 
1579   /* parallel case: MUMPS requires rhs B to be centralized on the host! */
1580   PetscCheck(mumps->petsc_size <= 1 || !mumps->id.ICNTL(19), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc");
1581 
1582   /* create msol_loc to hold mumps local solution */
1583   isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */
1584   sol_loc_save  = (PetscScalar *)mumps->id.sol_loc;
1585 
1586   lsol_loc  = mumps->id.lsol_loc;
1587   nlsol_loc = nrhs * lsol_loc; /* length of sol_loc */
1588   PetscCall(PetscMalloc2(nlsol_loc, &sol_loc, lsol_loc, &isol_loc));
1589   mumps->id.sol_loc  = (MumpsScalar *)sol_loc;
1590   mumps->id.isol_loc = isol_loc;
1591 
1592   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nlsol_loc, (PetscScalar *)sol_loc, &msol_loc));
1593 
1594   if (denseB) {
1595     if (mumps->ICNTL20 == 10) {
1596       mumps->id.ICNTL(20) = 10; /* dense distributed RHS */
1597       PetscCall(MatDenseGetArrayRead(B, &rbray));
1598       PetscCall(MatMumpsSetUpDistRHSInfo(A, nrhs, rbray));
1599       PetscCall(MatDenseRestoreArrayRead(B, &rbray));
1600       PetscCall(MatGetLocalSize(B, &m, NULL));
1601       PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhs * M, NULL, &v_mpi));
1602     } else {
1603       mumps->id.ICNTL(20) = 0; /* dense centralized RHS */
1604       /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in
1605         very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank
1606         0, re-arrange B into desired order, which is a local operation.
1607       */
1608 
1609       /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */
1610       /* wrap dense rhs matrix B into a vector v_mpi */
1611       PetscCall(MatGetLocalSize(B, &m, NULL));
1612       PetscCall(MatDenseGetArray(B, &bray));
1613       PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhs * M, (const PetscScalar *)bray, &v_mpi));
1614       PetscCall(MatDenseRestoreArray(B, &bray));
1615 
1616       /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */
1617       if (!mumps->myid) {
1618         PetscInt *idx;
1619         /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */
1620         PetscCall(PetscMalloc1(nrhs * M, &idx));
1621         PetscCall(MatGetOwnershipRanges(B, &rstart));
1622         k = 0;
1623         for (proc = 0; proc < mumps->petsc_size; proc++) {
1624           for (j = 0; j < nrhs; j++) {
1625             for (i = rstart[proc]; i < rstart[proc + 1]; i++) idx[k++] = j * M + i;
1626           }
1627         }
1628 
1629         PetscCall(VecCreateSeq(PETSC_COMM_SELF, nrhs * M, &b_seq));
1630         PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nrhs * M, idx, PETSC_OWN_POINTER, &is_to));
1631         PetscCall(ISCreateStride(PETSC_COMM_SELF, nrhs * M, 0, 1, &is_from));
1632       } else {
1633         PetscCall(VecCreateSeq(PETSC_COMM_SELF, 0, &b_seq));
1634         PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_to));
1635         PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_from));
1636       }
1637       PetscCall(VecScatterCreate(v_mpi, is_from, b_seq, is_to, &scat_rhs));
1638       PetscCall(VecScatterBegin(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD));
1639       PetscCall(ISDestroy(&is_to));
1640       PetscCall(ISDestroy(&is_from));
1641       PetscCall(VecScatterEnd(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD));
1642 
1643       if (!mumps->myid) { /* define rhs on the host */
1644         PetscCall(VecGetArray(b_seq, &bray));
1645         mumps->id.rhs = (MumpsScalar *)bray;
1646         PetscCall(VecRestoreArray(b_seq, &bray));
1647       }
1648     }
1649   } else { /* sparse B */
1650     b = (Mat_MPIAIJ *)Bt->data;
1651 
1652     /* wrap dense X into a vector v_mpi */
1653     PetscCall(MatGetLocalSize(X, &m, NULL));
1654     PetscCall(MatDenseGetArray(X, &bray));
1655     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)X), 1, nrhs * m, nrhs * M, (const PetscScalar *)bray, &v_mpi));
1656     PetscCall(MatDenseRestoreArray(X, &bray));
1657 
1658     if (!mumps->myid) {
1659       PetscCall(MatSeqAIJGetArray(b->A, &aa));
1660       PetscCall(MatGetRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
1661       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
1662       PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs));
1663       mumps->id.rhs_sparse = (MumpsScalar *)aa;
1664     } else {
1665       mumps->id.irhs_ptr    = NULL;
1666       mumps->id.irhs_sparse = NULL;
1667       mumps->id.nz_rhs      = 0;
1668       mumps->id.rhs_sparse  = NULL;
1669     }
1670   }
1671 
1672   /* solve phase */
1673   mumps->id.job = JOB_SOLVE;
1674   PetscMUMPS_c(mumps);
1675   PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
1676 
1677   /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */
1678   PetscCall(MatDenseGetArray(X, &array));
1679   PetscCall(VecPlaceArray(v_mpi, array));
1680 
1681   /* create scatter scat_sol */
1682   PetscCall(MatGetOwnershipRanges(X, &rstart));
1683   /* iidx: index for scatter mumps solution to petsc X */
1684 
1685   PetscCall(ISCreateStride(PETSC_COMM_SELF, nlsol_loc, 0, 1, &is_from));
1686   PetscCall(PetscMalloc1(nlsol_loc, &idxx));
1687   for (i = 0; i < lsol_loc; i++) {
1688     isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */
1689 
1690     for (proc = 0; proc < mumps->petsc_size; proc++) {
1691       if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc + 1]) {
1692         myrstart = rstart[proc];
1693         k        = isol_loc[i] - myrstart;          /* local index on 1st column of petsc vector X */
1694         iidx     = k + myrstart * nrhs;             /* maps mumps isol_loc[i] to petsc index in X */
1695         m        = rstart[proc + 1] - rstart[proc]; /* rows of X for this proc */
1696         break;
1697       }
1698     }
1699 
1700     for (j = 0; j < nrhs; j++) idxx[i + j * lsol_loc] = iidx + j * m;
1701   }
1702   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nlsol_loc, idxx, PETSC_COPY_VALUES, &is_to));
1703   PetscCall(VecScatterCreate(msol_loc, is_from, v_mpi, is_to, &scat_sol));
1704   PetscCall(VecScatterBegin(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD));
1705   PetscCall(ISDestroy(&is_from));
1706   PetscCall(ISDestroy(&is_to));
1707   PetscCall(VecScatterEnd(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD));
1708   PetscCall(MatDenseRestoreArray(X, &array));
1709 
1710   /* free spaces */
1711   mumps->id.sol_loc  = (MumpsScalar *)sol_loc_save;
1712   mumps->id.isol_loc = isol_loc_save;
1713 
1714   PetscCall(PetscFree2(sol_loc, isol_loc));
1715   PetscCall(PetscFree(idxx));
1716   PetscCall(VecDestroy(&msol_loc));
1717   PetscCall(VecDestroy(&v_mpi));
1718   if (!denseB) {
1719     if (!mumps->myid) {
1720       b = (Mat_MPIAIJ *)Bt->data;
1721       PetscCall(MatSeqAIJRestoreArray(b->A, &aa));
1722       PetscCall(MatRestoreRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
1723       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure");
1724     }
1725   } else {
1726     if (mumps->ICNTL20 == 0) {
1727       PetscCall(VecDestroy(&b_seq));
1728       PetscCall(VecScatterDestroy(&scat_rhs));
1729     }
1730   }
1731   PetscCall(VecScatterDestroy(&scat_sol));
1732   PetscCall(PetscLogFlops(nrhs * PetscMax(0, (2.0 * (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n))));
1733   PetscFunctionReturn(PETSC_SUCCESS);
1734 }
1735 
1736 PetscErrorCode MatMatSolveTranspose_MUMPS(Mat A, Mat B, Mat X)
1737 {
1738   Mat_MUMPS          *mumps = (Mat_MUMPS *)A->data;
1739   const PetscMUMPSInt value = mumps->id.ICNTL(9);
1740 
1741   PetscFunctionBegin;
1742   mumps->id.ICNTL(9) = 0;
1743   PetscCall(MatMatSolve_MUMPS(A, B, X));
1744   mumps->id.ICNTL(9) = value;
1745   PetscFunctionReturn(PETSC_SUCCESS);
1746 }
1747 
1748 PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A, Mat Bt, Mat X)
1749 {
1750   PetscBool flg;
1751   Mat       B;
1752 
1753   PetscFunctionBegin;
1754   PetscCall(PetscObjectTypeCompareAny((PetscObject)Bt, &flg, MATSEQAIJ, MATMPIAIJ, NULL));
1755   PetscCheck(flg, PetscObjectComm((PetscObject)Bt), PETSC_ERR_ARG_WRONG, "Matrix Bt must be MATAIJ matrix");
1756 
1757   /* Create B=Bt^T that uses Bt's data structure */
1758   PetscCall(MatCreateTranspose(Bt, &B));
1759 
1760   PetscCall(MatMatSolve_MUMPS(A, B, X));
1761   PetscCall(MatDestroy(&B));
1762   PetscFunctionReturn(PETSC_SUCCESS);
1763 }
1764 
1765 #if !defined(PETSC_USE_COMPLEX)
1766 /*
1767   input:
1768    F:        numeric factor
1769   output:
1770    nneg:     total number of negative pivots
1771    nzero:    total number of zero pivots
1772    npos:     (global dimension of F) - nneg - nzero
1773 */
1774 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos)
1775 {
1776   Mat_MUMPS  *mumps = (Mat_MUMPS *)F->data;
1777   PetscMPIInt size;
1778 
1779   PetscFunctionBegin;
1780   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)F), &size));
1781   /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */
1782   PetscCheck(size <= 1 || mumps->id.ICNTL(13) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia", mumps->id.INFOG(13));
1783 
1784   if (nneg) *nneg = mumps->id.INFOG(12);
1785   if (nzero || npos) {
1786     PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection");
1787     if (nzero) *nzero = mumps->id.INFOG(28);
1788     if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28));
1789   }
1790   PetscFunctionReturn(PETSC_SUCCESS);
1791 }
1792 #endif
1793 
1794 PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse, Mat_MUMPS *mumps)
1795 {
1796   PetscInt       i, nreqs;
1797   PetscMUMPSInt *irn, *jcn;
1798   PetscMPIInt    count;
1799   PetscInt64     totnnz, remain;
1800   const PetscInt osize = mumps->omp_comm_size;
1801   PetscScalar   *val;
1802 
1803   PetscFunctionBegin;
1804   if (osize > 1) {
1805     if (reuse == MAT_INITIAL_MATRIX) {
1806       /* master first gathers counts of nonzeros to receive */
1807       if (mumps->is_omp_master) PetscCall(PetscMalloc1(osize, &mumps->recvcount));
1808       PetscCallMPI(MPI_Gather(&mumps->nnz, 1, MPIU_INT64, mumps->recvcount, 1, MPIU_INT64, 0 /*master*/, mumps->omp_comm));
1809 
1810       /* Then each computes number of send/recvs */
1811       if (mumps->is_omp_master) {
1812         /* Start from 1 since self communication is not done in MPI */
1813         nreqs = 0;
1814         for (i = 1; i < osize; i++) nreqs += (mumps->recvcount[i] + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX;
1815       } else {
1816         nreqs = (mumps->nnz + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX;
1817       }
1818       PetscCall(PetscMalloc1(nreqs * 3, &mumps->reqs)); /* Triple the requests since we send irn, jcn and val separately */
1819 
1820       /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others.
1821          MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz
1822          might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size
1823          is very small, the current approach should have no extra overhead compared to MPI_Gatherv.
1824        */
1825       nreqs = 0; /* counter for actual send/recvs */
1826       if (mumps->is_omp_master) {
1827         for (i = 0, totnnz = 0; i < osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */
1828         PetscCall(PetscMalloc2(totnnz, &irn, totnnz, &jcn));
1829         PetscCall(PetscMalloc1(totnnz, &val));
1830 
1831         /* Self communication */
1832         PetscCall(PetscArraycpy(irn, mumps->irn, mumps->nnz));
1833         PetscCall(PetscArraycpy(jcn, mumps->jcn, mumps->nnz));
1834         PetscCall(PetscArraycpy(val, mumps->val, mumps->nnz));
1835 
1836         /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */
1837         PetscCall(PetscFree2(mumps->irn, mumps->jcn));
1838         PetscCall(PetscFree(mumps->val_alloc));
1839         mumps->nnz = totnnz;
1840         mumps->irn = irn;
1841         mumps->jcn = jcn;
1842         mumps->val = mumps->val_alloc = val;
1843 
1844         irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */
1845         jcn += mumps->recvcount[0];
1846         val += mumps->recvcount[0];
1847 
1848         /* Remote communication */
1849         for (i = 1; i < osize; i++) {
1850           count  = PetscMin(mumps->recvcount[i], PETSC_MPI_INT_MAX);
1851           remain = mumps->recvcount[i] - count;
1852           while (count > 0) {
1853             PetscCallMPI(MPI_Irecv(irn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1854             PetscCallMPI(MPI_Irecv(jcn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1855             PetscCallMPI(MPI_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1856             irn += count;
1857             jcn += count;
1858             val += count;
1859             count = PetscMin(remain, PETSC_MPI_INT_MAX);
1860             remain -= count;
1861           }
1862         }
1863       } else {
1864         irn    = mumps->irn;
1865         jcn    = mumps->jcn;
1866         val    = mumps->val;
1867         count  = PetscMin(mumps->nnz, PETSC_MPI_INT_MAX);
1868         remain = mumps->nnz - count;
1869         while (count > 0) {
1870           PetscCallMPI(MPI_Isend(irn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1871           PetscCallMPI(MPI_Isend(jcn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1872           PetscCallMPI(MPI_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1873           irn += count;
1874           jcn += count;
1875           val += count;
1876           count = PetscMin(remain, PETSC_MPI_INT_MAX);
1877           remain -= count;
1878         }
1879       }
1880     } else {
1881       nreqs = 0;
1882       if (mumps->is_omp_master) {
1883         val = mumps->val + mumps->recvcount[0];
1884         for (i = 1; i < osize; i++) { /* Remote communication only since self data is already in place */
1885           count  = PetscMin(mumps->recvcount[i], PETSC_MPI_INT_MAX);
1886           remain = mumps->recvcount[i] - count;
1887           while (count > 0) {
1888             PetscCallMPI(MPI_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1889             val += count;
1890             count = PetscMin(remain, PETSC_MPI_INT_MAX);
1891             remain -= count;
1892           }
1893         }
1894       } else {
1895         val    = mumps->val;
1896         count  = PetscMin(mumps->nnz, PETSC_MPI_INT_MAX);
1897         remain = mumps->nnz - count;
1898         while (count > 0) {
1899           PetscCallMPI(MPI_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1900           val += count;
1901           count = PetscMin(remain, PETSC_MPI_INT_MAX);
1902           remain -= count;
1903         }
1904       }
1905     }
1906     PetscCallMPI(MPI_Waitall(nreqs, mumps->reqs, MPI_STATUSES_IGNORE));
1907     mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */
1908   }
1909   PetscFunctionReturn(PETSC_SUCCESS);
1910 }
1911 
1912 PetscErrorCode MatFactorNumeric_MUMPS(Mat F, Mat A, const MatFactorInfo *info)
1913 {
1914   Mat_MUMPS *mumps = (Mat_MUMPS *)(F)->data;
1915   PetscBool  isMPIAIJ;
1916 
1917   PetscFunctionBegin;
1918   if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) {
1919     if (mumps->id.INFOG(1) == -6) PetscCall(PetscInfo(A, "MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1920     PetscCall(PetscInfo(A, "MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1921     PetscFunctionReturn(PETSC_SUCCESS);
1922   }
1923 
1924   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps));
1925   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX, mumps));
1926 
1927   /* numerical factorization phase */
1928   mumps->id.job = JOB_FACTNUMERIC;
1929   if (!mumps->id.ICNTL(18)) { /* A is centralized */
1930     if (!mumps->myid) mumps->id.a = (MumpsScalar *)mumps->val;
1931   } else {
1932     mumps->id.a_loc = (MumpsScalar *)mumps->val;
1933   }
1934   PetscMUMPS_c(mumps);
1935   if (mumps->id.INFOG(1) < 0) {
1936     PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2));
1937     if (mumps->id.INFOG(1) == -10) {
1938       PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1939       F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
1940     } else if (mumps->id.INFOG(1) == -13) {
1941       PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, cannot allocate required memory %d megabytes\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1942       F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1943     } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) {
1944       PetscCall(PetscInfo(F, "MUMPS error in numerical factorizatione: INFOG(1)=%d, INFO(2)=%d, problem with work array\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1945       F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1946     } else {
1947       PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1948       F->factorerrortype = MAT_FACTOR_OTHER;
1949     }
1950   }
1951   PetscCheck(mumps->myid || mumps->id.ICNTL(16) <= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in numerical factorization: ICNTL(16)=%d " MUMPS_MANUALS, mumps->id.INFOG(16));
1952 
1953   F->assembled = PETSC_TRUE;
1954 
1955   if (F->schur) { /* reset Schur status to unfactored */
1956 #if defined(PETSC_HAVE_CUDA)
1957     F->schur->offloadmask = PETSC_OFFLOAD_CPU;
1958 #endif
1959     if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
1960       mumps->id.ICNTL(19) = 2;
1961       PetscCall(MatTranspose(F->schur, MAT_INPLACE_MATRIX, &F->schur));
1962     }
1963     PetscCall(MatFactorRestoreSchurComplement(F, NULL, MAT_FACTOR_SCHUR_UNFACTORED));
1964   }
1965 
1966   /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */
1967   if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3;
1968 
1969   if (!mumps->is_omp_master) mumps->id.INFO(23) = 0;
1970   if (mumps->petsc_size > 1) {
1971     PetscInt     lsol_loc;
1972     PetscScalar *sol_loc;
1973 
1974     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &isMPIAIJ));
1975 
1976     /* distributed solution; Create x_seq=sol_loc for repeated use */
1977     if (mumps->x_seq) {
1978       PetscCall(VecScatterDestroy(&mumps->scat_sol));
1979       PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc));
1980       PetscCall(VecDestroy(&mumps->x_seq));
1981     }
1982     lsol_loc = mumps->id.INFO(23); /* length of sol_loc */
1983     PetscCall(PetscMalloc2(lsol_loc, &sol_loc, lsol_loc, &mumps->id.isol_loc));
1984     mumps->id.lsol_loc = lsol_loc;
1985     mumps->id.sol_loc  = (MumpsScalar *)sol_loc;
1986     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, lsol_loc, sol_loc, &mumps->x_seq));
1987   }
1988   PetscCall(PetscLogFlops(mumps->id.RINFO(2)));
1989   PetscFunctionReturn(PETSC_SUCCESS);
1990 }
1991 
1992 /* Sets MUMPS options from the options database */
1993 PetscErrorCode MatSetFromOptions_MUMPS(Mat F, Mat A)
1994 {
1995   Mat_MUMPS    *mumps = (Mat_MUMPS *)F->data;
1996   PetscMUMPSInt icntl = 0, size, *listvar_schur;
1997   PetscInt      info[80], i, ninfo = 80, rbs, cbs;
1998   PetscBool     flg = PETSC_FALSE, schur = (PetscBool)(mumps->id.ICNTL(26) == -1);
1999   MumpsScalar  *arr;
2000 
2001   PetscFunctionBegin;
2002   PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "MUMPS Options", "Mat");
2003   if (mumps->id.job == JOB_NULL) { /* MatSetFromOptions_MUMPS() has never been called before */
2004     PetscInt nthreads   = 0;
2005     PetscInt nCNTL_pre  = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0;
2006     PetscInt nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0;
2007 
2008     mumps->petsc_comm = PetscObjectComm((PetscObject)A);
2009     PetscCallMPI(MPI_Comm_size(mumps->petsc_comm, &mumps->petsc_size));
2010     PetscCallMPI(MPI_Comm_rank(mumps->petsc_comm, &mumps->myid)); /* "if (!myid)" still works even if mumps_comm is different */
2011 
2012     PetscCall(PetscOptionsName("-mat_mumps_use_omp_threads", "Convert MPI processes into OpenMP threads", "None", &mumps->use_petsc_omp_support));
2013     if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */
2014     /* do not use PetscOptionsInt() so that the option -mat_mumps_use_omp_threads is not displayed twice in the help */
2015     PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)F)->prefix, "-mat_mumps_use_omp_threads", &nthreads, NULL));
2016     if (mumps->use_petsc_omp_support) {
2017       PetscCheck(PetscDefined(HAVE_OPENMP_SUPPORT), PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "The system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual",
2018                  ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : "");
2019       PetscCheck(!schur, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use -%smat_mumps_use_omp_threads with the Schur complement feature", ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : "");
2020 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
2021       PetscCall(PetscOmpCtrlCreate(mumps->petsc_comm, nthreads, &mumps->omp_ctrl));
2022       PetscCall(PetscOmpCtrlGetOmpComms(mumps->omp_ctrl, &mumps->omp_comm, &mumps->mumps_comm, &mumps->is_omp_master));
2023 #endif
2024     } else {
2025       mumps->omp_comm      = PETSC_COMM_SELF;
2026       mumps->mumps_comm    = mumps->petsc_comm;
2027       mumps->is_omp_master = PETSC_TRUE;
2028     }
2029     PetscCallMPI(MPI_Comm_size(mumps->omp_comm, &mumps->omp_comm_size));
2030     mumps->reqs = NULL;
2031     mumps->tag  = 0;
2032 
2033     if (mumps->mumps_comm != MPI_COMM_NULL) {
2034       if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) {
2035         /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */
2036         MPI_Comm comm;
2037         PetscCallMPI(MPI_Comm_dup(mumps->mumps_comm, &comm));
2038         mumps->mumps_comm = comm;
2039       } else PetscCall(PetscCommGetComm(mumps->petsc_comm, &mumps->mumps_comm));
2040     }
2041 
2042     mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm);
2043     mumps->id.job          = JOB_INIT;
2044     mumps->id.par          = 1; /* host participates factorizaton and solve */
2045     mumps->id.sym          = mumps->sym;
2046 
2047     size          = mumps->id.size_schur;
2048     arr           = mumps->id.schur;
2049     listvar_schur = mumps->id.listvar_schur;
2050     PetscMUMPS_c(mumps);
2051     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
2052     /* restore cached ICNTL and CNTL values */
2053     for (icntl = 0; icntl < nICNTL_pre; ++icntl) mumps->id.ICNTL(mumps->ICNTL_pre[1 + 2 * icntl]) = mumps->ICNTL_pre[2 + 2 * icntl];
2054     for (icntl = 0; icntl < nCNTL_pre; ++icntl) mumps->id.CNTL((PetscInt)mumps->CNTL_pre[1 + 2 * icntl]) = mumps->CNTL_pre[2 + 2 * icntl];
2055     PetscCall(PetscFree(mumps->ICNTL_pre));
2056     PetscCall(PetscFree(mumps->CNTL_pre));
2057 
2058     if (schur) {
2059       mumps->id.size_schur    = size;
2060       mumps->id.schur_lld     = size;
2061       mumps->id.schur         = arr;
2062       mumps->id.listvar_schur = listvar_schur;
2063       if (mumps->petsc_size > 1) {
2064         PetscBool gs; /* gs is false if any rank other than root has non-empty IS */
2065 
2066         mumps->id.ICNTL(19) = 1;                                                                            /* MUMPS returns Schur centralized on the host */
2067         gs                  = mumps->myid ? (mumps->id.size_schur ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */
2068         PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &gs, 1, MPIU_BOOL, MPI_LAND, mumps->petsc_comm));
2069         PetscCheck(gs, PETSC_COMM_SELF, PETSC_ERR_SUP, "MUMPS distributed parallel Schur complements not yet supported from PETSc");
2070       } else {
2071         if (F->factortype == MAT_FACTOR_LU) {
2072           mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */
2073         } else {
2074           mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */
2075         }
2076       }
2077       mumps->id.ICNTL(26) = -1;
2078     }
2079 
2080     /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code.
2081        For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS.
2082      */
2083     PetscCallMPI(MPI_Bcast(mumps->id.icntl, 40, MPI_INT, 0, mumps->omp_comm));
2084     PetscCallMPI(MPI_Bcast(mumps->id.cntl, 15, MPIU_REAL, 0, mumps->omp_comm));
2085 
2086     mumps->scat_rhs = NULL;
2087     mumps->scat_sol = NULL;
2088 
2089     /* set PETSc-MUMPS default options - override MUMPS default */
2090     mumps->id.ICNTL(3) = 0;
2091     mumps->id.ICNTL(4) = 0;
2092     if (mumps->petsc_size == 1) {
2093       mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */
2094       mumps->id.ICNTL(7)  = 7; /* automatic choice of ordering done by the package */
2095     } else {
2096       mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */
2097       mumps->id.ICNTL(21) = 1; /* distributed solution */
2098     }
2099   }
2100   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_1", "ICNTL(1): output stream for error messages", "None", mumps->id.ICNTL(1), &icntl, &flg));
2101   if (flg) mumps->id.ICNTL(1) = icntl;
2102   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_2", "ICNTL(2): output stream for diagnostic printing, statistics, and warning", "None", mumps->id.ICNTL(2), &icntl, &flg));
2103   if (flg) mumps->id.ICNTL(2) = icntl;
2104   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_3", "ICNTL(3): output stream for global information, collected on the host", "None", mumps->id.ICNTL(3), &icntl, &flg));
2105   if (flg) mumps->id.ICNTL(3) = icntl;
2106 
2107   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_4", "ICNTL(4): level of printing (0 to 4)", "None", mumps->id.ICNTL(4), &icntl, &flg));
2108   if (flg) mumps->id.ICNTL(4) = icntl;
2109   if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */
2110 
2111   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_6", "ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)", "None", mumps->id.ICNTL(6), &icntl, &flg));
2112   if (flg) mumps->id.ICNTL(6) = icntl;
2113 
2114   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_7", "ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)", "None", mumps->id.ICNTL(7), &icntl, &flg));
2115   if (flg) {
2116     PetscCheck(icntl != 1 && icntl >= 0 && icntl <= 7, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto");
2117     mumps->id.ICNTL(7) = icntl;
2118   }
2119 
2120   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_8", "ICNTL(8): scaling strategy (-2 to 8 or 77)", "None", mumps->id.ICNTL(8), &mumps->id.ICNTL(8), NULL));
2121   /* PetscCall(PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL)); handled by MatSolveTranspose_MUMPS() */
2122   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_10", "ICNTL(10): max num of refinements", "None", mumps->id.ICNTL(10), &mumps->id.ICNTL(10), NULL));
2123   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_11", "ICNTL(11): statistics related to an error analysis (via -ksp_view)", "None", mumps->id.ICNTL(11), &mumps->id.ICNTL(11), NULL));
2124   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_12", "ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)", "None", mumps->id.ICNTL(12), &mumps->id.ICNTL(12), NULL));
2125   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_13", "ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting", "None", mumps->id.ICNTL(13), &mumps->id.ICNTL(13), NULL));
2126   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_14", "ICNTL(14): percentage increase in the estimated working space", "None", mumps->id.ICNTL(14), &mumps->id.ICNTL(14), NULL));
2127   PetscCall(MatGetBlockSizes(A, &rbs, &cbs));
2128   if (rbs == cbs && rbs > 1) mumps->id.ICNTL(15) = -rbs;
2129   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_15", "ICNTL(15): compression of the input matrix resulting from a block format", "None", mumps->id.ICNTL(15), &mumps->id.ICNTL(15), &flg));
2130   if (flg) {
2131     PetscCheck(mumps->id.ICNTL(15) <= 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Positive -mat_mumps_icntl_15 not handled");
2132     PetscCheck((-mumps->id.ICNTL(15) % cbs == 0) && (-mumps->id.ICNTL(15) % rbs == 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "The opposite of -mat_mumps_icntl_15 must be a multiple of the column and row blocksizes");
2133   }
2134   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_19", "ICNTL(19): computes the Schur complement", "None", mumps->id.ICNTL(19), &mumps->id.ICNTL(19), NULL));
2135   if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */
2136     PetscCall(MatDestroy(&F->schur));
2137     PetscCall(MatMumpsResetSchur_Private(mumps));
2138   }
2139 
2140   /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps",
2141      and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran
2142      and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...'
2143      This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug,
2144      see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590.
2145      In short, we could not use distributed RHS with MPICH until v4.0b1.
2146    */
2147 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) || (defined(PETSC_HAVE_MPICH_NUMVERSION) && (PETSC_HAVE_MPICH_NUMVERSION < 40000101))
2148   mumps->ICNTL20 = 0; /* Centralized dense RHS*/
2149 #else
2150   mumps->ICNTL20     = 10; /* Distributed dense RHS*/
2151 #endif
2152   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_20", "ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides", "None", mumps->ICNTL20, &mumps->ICNTL20, &flg));
2153   PetscCheck(!flg || mumps->ICNTL20 == 10 || mumps->ICNTL20 == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10", (int)mumps->ICNTL20);
2154 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0)
2155   PetscCheck(!flg || mumps->ICNTL20 != 10, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=10 is not supported before MUMPS-5.3.0");
2156 #endif
2157   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL)); we only use distributed solution vector */
2158 
2159   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_22", "ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)", "None", mumps->id.ICNTL(22), &mumps->id.ICNTL(22), NULL));
2160   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_23", "ICNTL(23): max size of the working memory (MB) that can allocate per processor", "None", mumps->id.ICNTL(23), &mumps->id.ICNTL(23), NULL));
2161   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_24", "ICNTL(24): detection of null pivot rows (0 or 1)", "None", mumps->id.ICNTL(24), &mumps->id.ICNTL(24), NULL));
2162   if (mumps->id.ICNTL(24)) { mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ }
2163 
2164   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_25", "ICNTL(25): computes a solution of a deficient matrix and a null space basis", "None", mumps->id.ICNTL(25), &mumps->id.ICNTL(25), NULL));
2165   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_26", "ICNTL(26): drives the solution phase if a Schur complement matrix", "None", mumps->id.ICNTL(26), &mumps->id.ICNTL(26), NULL));
2166   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_27", "ICNTL(27): controls the blocking size for multiple right-hand sides", "None", mumps->id.ICNTL(27), &mumps->id.ICNTL(27), NULL));
2167   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_28", "ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering", "None", mumps->id.ICNTL(28), &mumps->id.ICNTL(28), NULL));
2168   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_29", "ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis", "None", mumps->id.ICNTL(29), &mumps->id.ICNTL(29), NULL));
2169   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL)); */ /* call MatMumpsGetInverse() directly */
2170   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_31", "ICNTL(31): indicates which factors may be discarded during factorization", "None", mumps->id.ICNTL(31), &mumps->id.ICNTL(31), NULL));
2171   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elimination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL));  -- not supported by PETSc API */
2172   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_33", "ICNTL(33): compute determinant", "None", mumps->id.ICNTL(33), &mumps->id.ICNTL(33), NULL));
2173   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_35", "ICNTL(35): activates Block Low Rank (BLR) based factorization", "None", mumps->id.ICNTL(35), &mumps->id.ICNTL(35), NULL));
2174   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_36", "ICNTL(36): choice of BLR factorization variant", "None", mumps->id.ICNTL(36), &mumps->id.ICNTL(36), NULL));
2175   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_38", "ICNTL(38): estimated compression rate of LU factors with BLR", "None", mumps->id.ICNTL(38), &mumps->id.ICNTL(38), NULL));
2176   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_58", "ICNTL(58): defines options for symbolic factorization", "None", mumps->id.ICNTL(58), &mumps->id.ICNTL(58), NULL));
2177 
2178   PetscCall(PetscOptionsReal("-mat_mumps_cntl_1", "CNTL(1): relative pivoting threshold", "None", mumps->id.CNTL(1), &mumps->id.CNTL(1), NULL));
2179   PetscCall(PetscOptionsReal("-mat_mumps_cntl_2", "CNTL(2): stopping criterion of refinement", "None", mumps->id.CNTL(2), &mumps->id.CNTL(2), NULL));
2180   PetscCall(PetscOptionsReal("-mat_mumps_cntl_3", "CNTL(3): absolute pivoting threshold", "None", mumps->id.CNTL(3), &mumps->id.CNTL(3), NULL));
2181   PetscCall(PetscOptionsReal("-mat_mumps_cntl_4", "CNTL(4): value for static pivoting", "None", mumps->id.CNTL(4), &mumps->id.CNTL(4), NULL));
2182   PetscCall(PetscOptionsReal("-mat_mumps_cntl_5", "CNTL(5): fixation for null pivots", "None", mumps->id.CNTL(5), &mumps->id.CNTL(5), NULL));
2183   PetscCall(PetscOptionsReal("-mat_mumps_cntl_7", "CNTL(7): dropping parameter used during BLR", "None", mumps->id.CNTL(7), &mumps->id.CNTL(7), NULL));
2184 
2185   PetscCall(PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL));
2186 
2187   PetscCall(PetscOptionsIntArray("-mat_mumps_view_info", "request INFO local to each processor", "", info, &ninfo, NULL));
2188   if (ninfo) {
2189     PetscCheck(ninfo <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "number of INFO %" PetscInt_FMT " must <= 80", ninfo);
2190     PetscCall(PetscMalloc1(ninfo, &mumps->info));
2191     mumps->ninfo = ninfo;
2192     for (i = 0; i < ninfo; i++) {
2193       PetscCheck(info[i] >= 0 && info[i] <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "index of INFO %" PetscInt_FMT " must between 1 and 80", ninfo);
2194       mumps->info[i] = info[i];
2195     }
2196   }
2197   PetscOptionsEnd();
2198   PetscFunctionReturn(PETSC_SUCCESS);
2199 }
2200 
2201 PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F, Mat A, const MatFactorInfo *info, Mat_MUMPS *mumps)
2202 {
2203   PetscFunctionBegin;
2204   if (mumps->id.INFOG(1) < 0) {
2205     PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in analysis: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
2206     if (mumps->id.INFOG(1) == -6) {
2207       PetscCall(PetscInfo(F, "MUMPS error in analysis: matrix is singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2208       F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT;
2209     } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) {
2210       PetscCall(PetscInfo(F, "MUMPS error in analysis: problem with work array, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2211       F->factorerrortype = MAT_FACTOR_OUTMEMORY;
2212     } else if (mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0) {
2213       PetscCall(PetscInfo(F, "MUMPS error in analysis: empty matrix\n"));
2214     } else {
2215       PetscCall(PetscInfo(F, "MUMPS error in analysis: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS "\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2216       F->factorerrortype = MAT_FACTOR_OTHER;
2217     }
2218   }
2219   PetscFunctionReturn(PETSC_SUCCESS);
2220 }
2221 
2222 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info)
2223 {
2224   Mat_MUMPS     *mumps = (Mat_MUMPS *)F->data;
2225   Vec            b;
2226   const PetscInt M = A->rmap->N;
2227 
2228   PetscFunctionBegin;
2229   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
2230     /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */
2231     PetscFunctionReturn(PETSC_SUCCESS);
2232   }
2233 
2234   /* Set MUMPS options from the options database */
2235   PetscCall(MatSetFromOptions_MUMPS(F, A));
2236 
2237   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
2238   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps));
2239 
2240   /* analysis phase */
2241   mumps->id.job = JOB_FACTSYMBOLIC;
2242   mumps->id.n   = M;
2243   switch (mumps->id.ICNTL(18)) {
2244   case 0: /* centralized assembled matrix input */
2245     if (!mumps->myid) {
2246       mumps->id.nnz = mumps->nnz;
2247       mumps->id.irn = mumps->irn;
2248       mumps->id.jcn = mumps->jcn;
2249       if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val;
2250       if (r) {
2251         mumps->id.ICNTL(7) = 1;
2252         if (!mumps->myid) {
2253           const PetscInt *idx;
2254           PetscInt        i;
2255 
2256           PetscCall(PetscMalloc1(M, &mumps->id.perm_in));
2257           PetscCall(ISGetIndices(r, &idx));
2258           for (i = 0; i < M; i++) PetscCall(PetscMUMPSIntCast(idx[i] + 1, &(mumps->id.perm_in[i]))); /* perm_in[]: start from 1, not 0! */
2259           PetscCall(ISRestoreIndices(r, &idx));
2260         }
2261       }
2262     }
2263     break;
2264   case 3: /* distributed assembled matrix input (size>1) */
2265     mumps->id.nnz_loc = mumps->nnz;
2266     mumps->id.irn_loc = mumps->irn;
2267     mumps->id.jcn_loc = mumps->jcn;
2268     if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val;
2269     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2270       PetscCall(MatCreateVecs(A, NULL, &b));
2271       PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq));
2272       PetscCall(VecDestroy(&b));
2273     }
2274     break;
2275   }
2276   PetscMUMPS_c(mumps);
2277   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps));
2278 
2279   F->ops->lufactornumeric   = MatFactorNumeric_MUMPS;
2280   F->ops->solve             = MatSolve_MUMPS;
2281   F->ops->solvetranspose    = MatSolveTranspose_MUMPS;
2282   F->ops->matsolve          = MatMatSolve_MUMPS;
2283   F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS;
2284   F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS;
2285 
2286   mumps->matstruc = SAME_NONZERO_PATTERN;
2287   PetscFunctionReturn(PETSC_SUCCESS);
2288 }
2289 
2290 /* Note the Petsc r and c permutations are ignored */
2291 PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info)
2292 {
2293   Mat_MUMPS     *mumps = (Mat_MUMPS *)F->data;
2294   Vec            b;
2295   const PetscInt M = A->rmap->N;
2296 
2297   PetscFunctionBegin;
2298   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
2299     /* F is assembled by a previous call of MatLUFactorSymbolic_BAIJMUMPS() */
2300     PetscFunctionReturn(PETSC_SUCCESS);
2301   }
2302 
2303   /* Set MUMPS options from the options database */
2304   PetscCall(MatSetFromOptions_MUMPS(F, A));
2305 
2306   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
2307   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps));
2308 
2309   /* analysis phase */
2310   mumps->id.job = JOB_FACTSYMBOLIC;
2311   mumps->id.n   = M;
2312   switch (mumps->id.ICNTL(18)) {
2313   case 0: /* centralized assembled matrix input */
2314     if (!mumps->myid) {
2315       mumps->id.nnz = mumps->nnz;
2316       mumps->id.irn = mumps->irn;
2317       mumps->id.jcn = mumps->jcn;
2318       if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val;
2319     }
2320     break;
2321   case 3: /* distributed assembled matrix input (size>1) */
2322     mumps->id.nnz_loc = mumps->nnz;
2323     mumps->id.irn_loc = mumps->irn;
2324     mumps->id.jcn_loc = mumps->jcn;
2325     if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val;
2326     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2327       PetscCall(MatCreateVecs(A, NULL, &b));
2328       PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq));
2329       PetscCall(VecDestroy(&b));
2330     }
2331     break;
2332   }
2333   PetscMUMPS_c(mumps);
2334   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps));
2335 
2336   F->ops->lufactornumeric   = MatFactorNumeric_MUMPS;
2337   F->ops->solve             = MatSolve_MUMPS;
2338   F->ops->solvetranspose    = MatSolveTranspose_MUMPS;
2339   F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS;
2340 
2341   mumps->matstruc = SAME_NONZERO_PATTERN;
2342   PetscFunctionReturn(PETSC_SUCCESS);
2343 }
2344 
2345 /* Note the Petsc r permutation and factor info are ignored */
2346 PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F, Mat A, IS r, const MatFactorInfo *info)
2347 {
2348   Mat_MUMPS     *mumps = (Mat_MUMPS *)F->data;
2349   Vec            b;
2350   const PetscInt M = A->rmap->N;
2351 
2352   PetscFunctionBegin;
2353   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
2354     /* F is assembled by a previous call of MatCholeskyFactorSymbolic_MUMPS() */
2355     PetscFunctionReturn(PETSC_SUCCESS);
2356   }
2357 
2358   /* Set MUMPS options from the options database */
2359   PetscCall(MatSetFromOptions_MUMPS(F, A));
2360 
2361   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
2362   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps));
2363 
2364   /* analysis phase */
2365   mumps->id.job = JOB_FACTSYMBOLIC;
2366   mumps->id.n   = M;
2367   switch (mumps->id.ICNTL(18)) {
2368   case 0: /* centralized assembled matrix input */
2369     if (!mumps->myid) {
2370       mumps->id.nnz = mumps->nnz;
2371       mumps->id.irn = mumps->irn;
2372       mumps->id.jcn = mumps->jcn;
2373       if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val;
2374     }
2375     break;
2376   case 3: /* distributed assembled matrix input (size>1) */
2377     mumps->id.nnz_loc = mumps->nnz;
2378     mumps->id.irn_loc = mumps->irn;
2379     mumps->id.jcn_loc = mumps->jcn;
2380     if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val;
2381     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2382       PetscCall(MatCreateVecs(A, NULL, &b));
2383       PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq));
2384       PetscCall(VecDestroy(&b));
2385     }
2386     break;
2387   }
2388   PetscMUMPS_c(mumps);
2389   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps));
2390 
2391   F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS;
2392   F->ops->solve                 = MatSolve_MUMPS;
2393   F->ops->solvetranspose        = MatSolve_MUMPS;
2394   F->ops->matsolve              = MatMatSolve_MUMPS;
2395   F->ops->mattransposesolve     = MatMatTransposeSolve_MUMPS;
2396   F->ops->matsolvetranspose     = MatMatSolveTranspose_MUMPS;
2397 #if defined(PETSC_USE_COMPLEX)
2398   F->ops->getinertia = NULL;
2399 #else
2400   F->ops->getinertia = MatGetInertia_SBAIJMUMPS;
2401 #endif
2402 
2403   mumps->matstruc = SAME_NONZERO_PATTERN;
2404   PetscFunctionReturn(PETSC_SUCCESS);
2405 }
2406 
2407 PetscErrorCode MatView_MUMPS(Mat A, PetscViewer viewer)
2408 {
2409   PetscBool         iascii;
2410   PetscViewerFormat format;
2411   Mat_MUMPS        *mumps = (Mat_MUMPS *)A->data;
2412 
2413   PetscFunctionBegin;
2414   /* check if matrix is mumps type */
2415   if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(PETSC_SUCCESS);
2416 
2417   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
2418   if (iascii) {
2419     PetscCall(PetscViewerGetFormat(viewer, &format));
2420     if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
2421       PetscCall(PetscViewerASCIIPrintf(viewer, "MUMPS run parameters:\n"));
2422       if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
2423         PetscCall(PetscViewerASCIIPrintf(viewer, "  SYM (matrix type):                   %d\n", mumps->id.sym));
2424         PetscCall(PetscViewerASCIIPrintf(viewer, "  PAR (host participation):            %d\n", mumps->id.par));
2425         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(1) (output for error):         %d\n", mumps->id.ICNTL(1)));
2426         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(2) (output of diagnostic msg): %d\n", mumps->id.ICNTL(2)));
2427         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(3) (output for global info):   %d\n", mumps->id.ICNTL(3)));
2428         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(4) (level of printing):        %d\n", mumps->id.ICNTL(4)));
2429         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(5) (input mat struct):         %d\n", mumps->id.ICNTL(5)));
2430         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(6) (matrix prescaling):        %d\n", mumps->id.ICNTL(6)));
2431         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(7) (sequential matrix ordering):%d\n", mumps->id.ICNTL(7)));
2432         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(8) (scaling strategy):         %d\n", mumps->id.ICNTL(8)));
2433         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(10) (max num of refinements):  %d\n", mumps->id.ICNTL(10)));
2434         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(11) (error analysis):          %d\n", mumps->id.ICNTL(11)));
2435         if (mumps->id.ICNTL(11) > 0) {
2436           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(4) (inf norm of input mat):        %g\n", mumps->id.RINFOG(4)));
2437           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(5) (inf norm of solution):         %g\n", mumps->id.RINFOG(5)));
2438           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(6) (inf norm of residual):         %g\n", mumps->id.RINFOG(6)));
2439           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(7),RINFOG(8) (backward error est): %g, %g\n", mumps->id.RINFOG(7), mumps->id.RINFOG(8)));
2440           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(9) (error estimate):               %g\n", mumps->id.RINFOG(9)));
2441           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n", mumps->id.RINFOG(10), mumps->id.RINFOG(11)));
2442         }
2443         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(12) (efficiency control):                         %d\n", mumps->id.ICNTL(12)));
2444         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(13) (sequential factorization of the root node):  %d\n", mumps->id.ICNTL(13)));
2445         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(14) (percentage of estimated workspace increase): %d\n", mumps->id.ICNTL(14)));
2446         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(15) (compression of the input matrix):            %d\n", mumps->id.ICNTL(15)));
2447         /* ICNTL(15-17) not used */
2448         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(18) (input mat struct):                           %d\n", mumps->id.ICNTL(18)));
2449         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(19) (Schur complement info):                      %d\n", mumps->id.ICNTL(19)));
2450         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(20) (RHS sparse pattern):                         %d\n", mumps->id.ICNTL(20)));
2451         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(21) (solution struct):                            %d\n", mumps->id.ICNTL(21)));
2452         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(22) (in-core/out-of-core facility):               %d\n", mumps->id.ICNTL(22)));
2453         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(23) (max size of memory can be allocated locally):%d\n", mumps->id.ICNTL(23)));
2454 
2455         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(24) (detection of null pivot rows):               %d\n", mumps->id.ICNTL(24)));
2456         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(25) (computation of a null space basis):          %d\n", mumps->id.ICNTL(25)));
2457         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(26) (Schur options for RHS or solution):          %d\n", mumps->id.ICNTL(26)));
2458         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(27) (blocking size for multiple RHS):             %d\n", mumps->id.ICNTL(27)));
2459         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(28) (use parallel or sequential ordering):        %d\n", mumps->id.ICNTL(28)));
2460         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(29) (parallel ordering):                          %d\n", mumps->id.ICNTL(29)));
2461 
2462         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(30) (user-specified set of entries in inv(A)):    %d\n", mumps->id.ICNTL(30)));
2463         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(31) (factors is discarded in the solve phase):    %d\n", mumps->id.ICNTL(31)));
2464         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(33) (compute determinant):                        %d\n", mumps->id.ICNTL(33)));
2465         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(35) (activate BLR based factorization):           %d\n", mumps->id.ICNTL(35)));
2466         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(36) (choice of BLR factorization variant):        %d\n", mumps->id.ICNTL(36)));
2467         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(38) (estimated compression rate of LU factors):   %d\n", mumps->id.ICNTL(38)));
2468         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(58) (options for symbolic factorization):         %d\n", mumps->id.ICNTL(58)));
2469 
2470         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(1) (relative pivoting threshold):      %g\n", mumps->id.CNTL(1)));
2471         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(2) (stopping criterion of refinement): %g\n", mumps->id.CNTL(2)));
2472         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(3) (absolute pivoting threshold):      %g\n", mumps->id.CNTL(3)));
2473         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(4) (value of static pivoting):         %g\n", mumps->id.CNTL(4)));
2474         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(5) (fixation for null pivots):         %g\n", mumps->id.CNTL(5)));
2475         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(7) (dropping parameter for BLR):       %g\n", mumps->id.CNTL(7)));
2476 
2477         /* information local to each processor */
2478         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(1) (local estimated flops for the elimination after analysis):\n"));
2479         PetscCall(PetscViewerASCIIPushSynchronized(viewer));
2480         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %g\n", mumps->myid, mumps->id.RINFO(1)));
2481         PetscCall(PetscViewerFlush(viewer));
2482         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(2) (local estimated flops for the assembly after factorization):\n"));
2483         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %g\n", mumps->myid, mumps->id.RINFO(2)));
2484         PetscCall(PetscViewerFlush(viewer));
2485         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(3) (local estimated flops for the elimination after factorization):\n"));
2486         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %g\n", mumps->myid, mumps->id.RINFO(3)));
2487         PetscCall(PetscViewerFlush(viewer));
2488 
2489         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization):\n"));
2490         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(15)));
2491         PetscCall(PetscViewerFlush(viewer));
2492 
2493         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization):\n"));
2494         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(16)));
2495         PetscCall(PetscViewerFlush(viewer));
2496 
2497         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(23) (num of pivots eliminated on this processor after factorization):\n"));
2498         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(23)));
2499         PetscCall(PetscViewerFlush(viewer));
2500 
2501         if (mumps->ninfo && mumps->ninfo <= 80) {
2502           PetscInt i;
2503           for (i = 0; i < mumps->ninfo; i++) {
2504             PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(%" PetscInt_FMT "):\n", mumps->info[i]));
2505             PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(mumps->info[i])));
2506             PetscCall(PetscViewerFlush(viewer));
2507           }
2508         }
2509         PetscCall(PetscViewerASCIIPopSynchronized(viewer));
2510       } else PetscCall(PetscViewerASCIIPrintf(viewer, "  Use -%sksp_view ::ascii_info_detail to display information for all processes\n", ((PetscObject)A)->prefix ? ((PetscObject)A)->prefix : ""));
2511 
2512       if (mumps->myid == 0) { /* information from the host */
2513         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFOG(1) (global estimated flops for the elimination after analysis): %g\n", mumps->id.RINFOG(1)));
2514         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFOG(2) (global estimated flops for the assembly after factorization): %g\n", mumps->id.RINFOG(2)));
2515         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFOG(3) (global estimated flops for the elimination after factorization): %g\n", mumps->id.RINFOG(3)));
2516         PetscCall(PetscViewerASCIIPrintf(viewer, "  (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n", mumps->id.RINFOG(12), mumps->id.RINFOG(13), mumps->id.INFOG(34)));
2517 
2518         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(3)));
2519         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(4)));
2520         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(5) (estimated maximum front size in the complete tree): %d\n", mumps->id.INFOG(5)));
2521         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(6) (number of nodes in the complete tree): %d\n", mumps->id.INFOG(6)));
2522         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(7) (ordering option effectively used after analysis): %d\n", mumps->id.INFOG(7)));
2523         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n", mumps->id.INFOG(8)));
2524         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n", mumps->id.INFOG(9)));
2525         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(10) (total integer space store the matrix factors after factorization): %d\n", mumps->id.INFOG(10)));
2526         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(11) (order of largest frontal matrix after factorization): %d\n", mumps->id.INFOG(11)));
2527         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(12) (number of off-diagonal pivots): %d\n", mumps->id.INFOG(12)));
2528         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(13) (number of delayed pivots after factorization): %d\n", mumps->id.INFOG(13)));
2529         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(14) (number of memory compress after factorization): %d\n", mumps->id.INFOG(14)));
2530         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(15) (number of steps of iterative refinement after solution): %d\n", mumps->id.INFOG(15)));
2531         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n", mumps->id.INFOG(16)));
2532         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n", mumps->id.INFOG(17)));
2533         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n", mumps->id.INFOG(18)));
2534         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n", mumps->id.INFOG(19)));
2535         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(20) (estimated number of entries in the factors): %d\n", mumps->id.INFOG(20)));
2536         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n", mumps->id.INFOG(21)));
2537         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n", mumps->id.INFOG(22)));
2538         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n", mumps->id.INFOG(23)));
2539         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n", mumps->id.INFOG(24)));
2540         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n", mumps->id.INFOG(25)));
2541         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(28) (after factorization: number of null pivots encountered): %d\n", mumps->id.INFOG(28)));
2542         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n", mumps->id.INFOG(29)));
2543         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n", mumps->id.INFOG(30), mumps->id.INFOG(31)));
2544         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(32) (after analysis: type of analysis done): %d\n", mumps->id.INFOG(32)));
2545         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(33) (value used for ICNTL(8)): %d\n", mumps->id.INFOG(33)));
2546         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(34) (exponent of the determinant if determinant is requested): %d\n", mumps->id.INFOG(34)));
2547         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n", mumps->id.INFOG(35)));
2548         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(36)));
2549         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n", mumps->id.INFOG(37)));
2550         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(38)));
2551         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n", mumps->id.INFOG(39)));
2552       }
2553     }
2554   }
2555   PetscFunctionReturn(PETSC_SUCCESS);
2556 }
2557 
2558 PetscErrorCode MatGetInfo_MUMPS(Mat A, MatInfoType flag, MatInfo *info)
2559 {
2560   Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;
2561 
2562   PetscFunctionBegin;
2563   info->block_size        = 1.0;
2564   info->nz_allocated      = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20);
2565   info->nz_used           = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20);
2566   info->nz_unneeded       = 0.0;
2567   info->assemblies        = 0.0;
2568   info->mallocs           = 0.0;
2569   info->memory            = 0.0;
2570   info->fill_ratio_given  = 0;
2571   info->fill_ratio_needed = 0;
2572   info->factor_mallocs    = 0;
2573   PetscFunctionReturn(PETSC_SUCCESS);
2574 }
2575 
2576 PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is)
2577 {
2578   Mat_MUMPS         *mumps = (Mat_MUMPS *)F->data;
2579   const PetscScalar *arr;
2580   const PetscInt    *idxs;
2581   PetscInt           size, i;
2582 
2583   PetscFunctionBegin;
2584   PetscCall(ISGetLocalSize(is, &size));
2585   /* Schur complement matrix */
2586   PetscCall(MatDestroy(&F->schur));
2587   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size, size, NULL, &F->schur));
2588   PetscCall(MatDenseGetArrayRead(F->schur, &arr));
2589   mumps->id.schur      = (MumpsScalar *)arr;
2590   mumps->id.size_schur = size;
2591   mumps->id.schur_lld  = size;
2592   PetscCall(MatDenseRestoreArrayRead(F->schur, &arr));
2593   if (mumps->sym == 1) PetscCall(MatSetOption(F->schur, MAT_SPD, PETSC_TRUE));
2594 
2595   /* MUMPS expects Fortran style indices */
2596   PetscCall(PetscFree(mumps->id.listvar_schur));
2597   PetscCall(PetscMalloc1(size, &mumps->id.listvar_schur));
2598   PetscCall(ISGetIndices(is, &idxs));
2599   for (i = 0; i < size; i++) PetscCall(PetscMUMPSIntCast(idxs[i] + 1, &(mumps->id.listvar_schur[i])));
2600   PetscCall(ISRestoreIndices(is, &idxs));
2601   /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */
2602   mumps->id.ICNTL(26) = -1;
2603   PetscFunctionReturn(PETSC_SUCCESS);
2604 }
2605 
2606 PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F, Mat *S)
2607 {
2608   Mat          St;
2609   Mat_MUMPS   *mumps = (Mat_MUMPS *)F->data;
2610   PetscScalar *array;
2611 
2612   PetscFunctionBegin;
2613   PetscCheck(mumps->id.ICNTL(19), PetscObjectComm((PetscObject)F), PETSC_ERR_ORDER, "Schur complement mode not selected! Call MatFactorSetSchurIS() to enable it");
2614   PetscCall(MatCreate(PETSC_COMM_SELF, &St));
2615   PetscCall(MatSetSizes(St, PETSC_DECIDE, PETSC_DECIDE, mumps->id.size_schur, mumps->id.size_schur));
2616   PetscCall(MatSetType(St, MATDENSE));
2617   PetscCall(MatSetUp(St));
2618   PetscCall(MatDenseGetArray(St, &array));
2619   if (!mumps->sym) {                /* MUMPS always return a full matrix */
2620     if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
2621       PetscInt i, j, N = mumps->id.size_schur;
2622       for (i = 0; i < N; i++) {
2623         for (j = 0; j < N; j++) {
2624 #if !defined(PETSC_USE_COMPLEX)
2625           PetscScalar val = mumps->id.schur[i * N + j];
2626 #else
2627           PetscScalar val = mumps->id.schur[i * N + j].r + PETSC_i * mumps->id.schur[i * N + j].i;
2628 #endif
2629           array[j * N + i] = val;
2630         }
2631       }
2632     } else { /* stored by columns */
2633       PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur));
2634     }
2635   } else {                          /* either full or lower-triangular (not packed) */
2636     if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */
2637       PetscInt i, j, N = mumps->id.size_schur;
2638       for (i = 0; i < N; i++) {
2639         for (j = i; j < N; j++) {
2640 #if !defined(PETSC_USE_COMPLEX)
2641           PetscScalar val = mumps->id.schur[i * N + j];
2642 #else
2643           PetscScalar val = mumps->id.schur[i * N + j].r + PETSC_i * mumps->id.schur[i * N + j].i;
2644 #endif
2645           array[i * N + j] = array[j * N + i] = val;
2646         }
2647       }
2648     } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */
2649       PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur));
2650     } else { /* ICNTL(19) == 1 lower triangular stored by rows */
2651       PetscInt i, j, N = mumps->id.size_schur;
2652       for (i = 0; i < N; i++) {
2653         for (j = 0; j < i + 1; j++) {
2654 #if !defined(PETSC_USE_COMPLEX)
2655           PetscScalar val = mumps->id.schur[i * N + j];
2656 #else
2657           PetscScalar val = mumps->id.schur[i * N + j].r + PETSC_i * mumps->id.schur[i * N + j].i;
2658 #endif
2659           array[i * N + j] = array[j * N + i] = val;
2660         }
2661       }
2662     }
2663   }
2664   PetscCall(MatDenseRestoreArray(St, &array));
2665   *S = St;
2666   PetscFunctionReturn(PETSC_SUCCESS);
2667 }
2668 
2669 PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt ival)
2670 {
2671   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2672 
2673   PetscFunctionBegin;
2674   if (mumps->id.job == JOB_NULL) {                                       /* need to cache icntl and ival since PetscMUMPS_c() has never been called */
2675     PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; /* number of already cached ICNTL */
2676     for (i = 0; i < nICNTL_pre; ++i)
2677       if (mumps->ICNTL_pre[1 + 2 * i] == icntl) break; /* is this ICNTL already cached? */
2678     if (i == nICNTL_pre) {                             /* not already cached */
2679       if (i > 0) PetscCall(PetscRealloc(sizeof(PetscMUMPSInt) * (2 * nICNTL_pre + 3), &mumps->ICNTL_pre));
2680       else PetscCall(PetscCalloc(sizeof(PetscMUMPSInt) * 3, &mumps->ICNTL_pre));
2681       mumps->ICNTL_pre[0]++;
2682     }
2683     mumps->ICNTL_pre[1 + 2 * i] = icntl;
2684     PetscCall(PetscMUMPSIntCast(ival, mumps->ICNTL_pre + 2 + 2 * i));
2685   } else PetscCall(PetscMUMPSIntCast(ival, &mumps->id.ICNTL(icntl)));
2686   PetscFunctionReturn(PETSC_SUCCESS);
2687 }
2688 
2689 PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt *ival)
2690 {
2691   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2692 
2693   PetscFunctionBegin;
2694   if (mumps->id.job == JOB_NULL) {
2695     PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0;
2696     *ival = 0;
2697     for (i = 0; i < nICNTL_pre; ++i) {
2698       if (mumps->ICNTL_pre[1 + 2 * i] == icntl) *ival = mumps->ICNTL_pre[2 + 2 * i];
2699     }
2700   } else *ival = mumps->id.ICNTL(icntl);
2701   PetscFunctionReturn(PETSC_SUCCESS);
2702 }
2703 
2704 /*@
2705   MatMumpsSetIcntl - Set MUMPS parameter ICNTL()
2706 
2707   Logically Collective
2708 
2709   Input Parameters:
2710 + F     - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2711 . icntl - index of MUMPS parameter array ICNTL()
2712 - ival  - value of MUMPS ICNTL(icntl)
2713 
2714   Options Database Key:
2715 . -mat_mumps_icntl_<icntl> <ival> - change the option numbered icntl to ival
2716 
2717   Level: beginner
2718 
2719   References:
2720 .  * - MUMPS Users' Guide
2721 
2722 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2723 @*/
2724 PetscErrorCode MatMumpsSetIcntl(Mat F, PetscInt icntl, PetscInt ival)
2725 {
2726   PetscFunctionBegin;
2727   PetscValidType(F, 1);
2728   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2729   PetscValidLogicalCollectiveInt(F, icntl, 2);
2730   PetscValidLogicalCollectiveInt(F, ival, 3);
2731   PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl);
2732   PetscTryMethod(F, "MatMumpsSetIcntl_C", (Mat, PetscInt, PetscInt), (F, icntl, ival));
2733   PetscFunctionReturn(PETSC_SUCCESS);
2734 }
2735 
2736 /*@
2737   MatMumpsGetIcntl - Get MUMPS parameter ICNTL()
2738 
2739   Logically Collective
2740 
2741   Input Parameters:
2742 + F     - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2743 - icntl - index of MUMPS parameter array ICNTL()
2744 
2745   Output Parameter:
2746 . ival - value of MUMPS ICNTL(icntl)
2747 
2748   Level: beginner
2749 
2750   References:
2751 .  * - MUMPS Users' Guide
2752 
2753 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2754 @*/
2755 PetscErrorCode MatMumpsGetIcntl(Mat F, PetscInt icntl, PetscInt *ival)
2756 {
2757   PetscFunctionBegin;
2758   PetscValidType(F, 1);
2759   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2760   PetscValidLogicalCollectiveInt(F, icntl, 2);
2761   PetscAssertPointer(ival, 3);
2762   PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl);
2763   PetscUseMethod(F, "MatMumpsGetIcntl_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival));
2764   PetscFunctionReturn(PETSC_SUCCESS);
2765 }
2766 
2767 PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal val)
2768 {
2769   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2770 
2771   PetscFunctionBegin;
2772   if (mumps->id.job == JOB_NULL) {
2773     PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0;
2774     for (i = 0; i < nCNTL_pre; ++i)
2775       if (mumps->CNTL_pre[1 + 2 * i] == icntl) break;
2776     if (i == nCNTL_pre) {
2777       if (i > 0) PetscCall(PetscRealloc(sizeof(PetscReal) * (2 * nCNTL_pre + 3), &mumps->CNTL_pre));
2778       else PetscCall(PetscCalloc(sizeof(PetscReal) * 3, &mumps->CNTL_pre));
2779       mumps->CNTL_pre[0]++;
2780     }
2781     mumps->CNTL_pre[1 + 2 * i] = icntl;
2782     mumps->CNTL_pre[2 + 2 * i] = val;
2783   } else mumps->id.CNTL(icntl) = val;
2784   PetscFunctionReturn(PETSC_SUCCESS);
2785 }
2786 
2787 PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal *val)
2788 {
2789   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2790 
2791   PetscFunctionBegin;
2792   if (mumps->id.job == JOB_NULL) {
2793     PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0;
2794     *val = 0.0;
2795     for (i = 0; i < nCNTL_pre; ++i) {
2796       if (mumps->CNTL_pre[1 + 2 * i] == icntl) *val = mumps->CNTL_pre[2 + 2 * i];
2797     }
2798   } else *val = mumps->id.CNTL(icntl);
2799   PetscFunctionReturn(PETSC_SUCCESS);
2800 }
2801 
2802 /*@
2803   MatMumpsSetCntl - Set MUMPS parameter CNTL()
2804 
2805   Logically Collective
2806 
2807   Input Parameters:
2808 + F     - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2809 . icntl - index of MUMPS parameter array CNTL()
2810 - val   - value of MUMPS CNTL(icntl)
2811 
2812   Options Database Key:
2813 . -mat_mumps_cntl_<icntl> <val> - change the option numbered icntl to ival
2814 
2815   Level: beginner
2816 
2817   References:
2818 .  * - MUMPS Users' Guide
2819 
2820 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2821 @*/
2822 PetscErrorCode MatMumpsSetCntl(Mat F, PetscInt icntl, PetscReal val)
2823 {
2824   PetscFunctionBegin;
2825   PetscValidType(F, 1);
2826   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2827   PetscValidLogicalCollectiveInt(F, icntl, 2);
2828   PetscValidLogicalCollectiveReal(F, val, 3);
2829   PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl);
2830   PetscTryMethod(F, "MatMumpsSetCntl_C", (Mat, PetscInt, PetscReal), (F, icntl, val));
2831   PetscFunctionReturn(PETSC_SUCCESS);
2832 }
2833 
2834 /*@
2835   MatMumpsGetCntl - Get MUMPS parameter CNTL()
2836 
2837   Logically Collective
2838 
2839   Input Parameters:
2840 + F     - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2841 - icntl - index of MUMPS parameter array CNTL()
2842 
2843   Output Parameter:
2844 . val - value of MUMPS CNTL(icntl)
2845 
2846   Level: beginner
2847 
2848   References:
2849 .  * - MUMPS Users' Guide
2850 
2851 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2852 @*/
2853 PetscErrorCode MatMumpsGetCntl(Mat F, PetscInt icntl, PetscReal *val)
2854 {
2855   PetscFunctionBegin;
2856   PetscValidType(F, 1);
2857   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2858   PetscValidLogicalCollectiveInt(F, icntl, 2);
2859   PetscAssertPointer(val, 3);
2860   PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl);
2861   PetscUseMethod(F, "MatMumpsGetCntl_C", (Mat, PetscInt, PetscReal *), (F, icntl, val));
2862   PetscFunctionReturn(PETSC_SUCCESS);
2863 }
2864 
2865 PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F, PetscInt icntl, PetscInt *info)
2866 {
2867   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2868 
2869   PetscFunctionBegin;
2870   *info = mumps->id.INFO(icntl);
2871   PetscFunctionReturn(PETSC_SUCCESS);
2872 }
2873 
2874 PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F, PetscInt icntl, PetscInt *infog)
2875 {
2876   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2877 
2878   PetscFunctionBegin;
2879   *infog = mumps->id.INFOG(icntl);
2880   PetscFunctionReturn(PETSC_SUCCESS);
2881 }
2882 
2883 PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfo)
2884 {
2885   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2886 
2887   PetscFunctionBegin;
2888   *rinfo = mumps->id.RINFO(icntl);
2889   PetscFunctionReturn(PETSC_SUCCESS);
2890 }
2891 
2892 PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfog)
2893 {
2894   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2895 
2896   PetscFunctionBegin;
2897   *rinfog = mumps->id.RINFOG(icntl);
2898   PetscFunctionReturn(PETSC_SUCCESS);
2899 }
2900 
2901 PetscErrorCode MatMumpsGetNullPivots_MUMPS(Mat F, PetscInt *size, PetscInt **array)
2902 {
2903   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2904 
2905   PetscFunctionBegin;
2906   PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection");
2907   *size  = 0;
2908   *array = NULL;
2909   if (!mumps->myid) {
2910     *size = mumps->id.INFOG(28);
2911     PetscCall(PetscMalloc1(*size, array));
2912     for (int i = 0; i < *size; i++) (*array)[i] = mumps->id.pivnul_list[i] - 1;
2913   }
2914   PetscFunctionReturn(PETSC_SUCCESS);
2915 }
2916 
2917 PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F, Mat spRHS)
2918 {
2919   Mat          Bt = NULL, Btseq = NULL;
2920   PetscBool    flg;
2921   Mat_MUMPS   *mumps = (Mat_MUMPS *)F->data;
2922   PetscScalar *aa;
2923   PetscInt     spnr, *ia, *ja, M, nrhs;
2924 
2925   PetscFunctionBegin;
2926   PetscAssertPointer(spRHS, 2);
2927   PetscCall(PetscObjectTypeCompare((PetscObject)spRHS, MATTRANSPOSEVIRTUAL, &flg));
2928   if (flg) {
2929     PetscCall(MatTransposeGetMat(spRHS, &Bt));
2930   } else SETERRQ(PetscObjectComm((PetscObject)spRHS), PETSC_ERR_ARG_WRONG, "Matrix spRHS must be type MATTRANSPOSEVIRTUAL matrix");
2931 
2932   PetscCall(MatMumpsSetIcntl(F, 30, 1));
2933 
2934   if (mumps->petsc_size > 1) {
2935     Mat_MPIAIJ *b = (Mat_MPIAIJ *)Bt->data;
2936     Btseq         = b->A;
2937   } else {
2938     Btseq = Bt;
2939   }
2940 
2941   PetscCall(MatGetSize(spRHS, &M, &nrhs));
2942   mumps->id.nrhs = nrhs;
2943   mumps->id.lrhs = M;
2944   mumps->id.rhs  = NULL;
2945 
2946   if (!mumps->myid) {
2947     PetscCall(MatSeqAIJGetArray(Btseq, &aa));
2948     PetscCall(MatGetRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
2949     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
2950     PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs));
2951     mumps->id.rhs_sparse = (MumpsScalar *)aa;
2952   } else {
2953     mumps->id.irhs_ptr    = NULL;
2954     mumps->id.irhs_sparse = NULL;
2955     mumps->id.nz_rhs      = 0;
2956     mumps->id.rhs_sparse  = NULL;
2957   }
2958   mumps->id.ICNTL(20) = 1; /* rhs is sparse */
2959   mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */
2960 
2961   /* solve phase */
2962   mumps->id.job = JOB_SOLVE;
2963   PetscMUMPS_c(mumps);
2964   PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2));
2965 
2966   if (!mumps->myid) {
2967     PetscCall(MatSeqAIJRestoreArray(Btseq, &aa));
2968     PetscCall(MatRestoreRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
2969     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
2970   }
2971   PetscFunctionReturn(PETSC_SUCCESS);
2972 }
2973 
2974 /*@
2975   MatMumpsGetInverse - Get user-specified set of entries in inverse of `A`
2976 
2977   Logically Collective
2978 
2979   Input Parameter:
2980 . F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2981 
2982   Output Parameter:
2983 . spRHS - sequential sparse matrix in `MATTRANSPOSEVIRTUAL` format with requested entries of inverse of `A`
2984 
2985   Level: beginner
2986 
2987   References:
2988 .  * - MUMPS Users' Guide
2989 
2990 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()`
2991 @*/
2992 PetscErrorCode MatMumpsGetInverse(Mat F, Mat spRHS)
2993 {
2994   PetscFunctionBegin;
2995   PetscValidType(F, 1);
2996   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2997   PetscUseMethod(F, "MatMumpsGetInverse_C", (Mat, Mat), (F, spRHS));
2998   PetscFunctionReturn(PETSC_SUCCESS);
2999 }
3000 
3001 PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F, Mat spRHST)
3002 {
3003   Mat spRHS;
3004 
3005   PetscFunctionBegin;
3006   PetscCall(MatCreateTranspose(spRHST, &spRHS));
3007   PetscCall(MatMumpsGetInverse_MUMPS(F, spRHS));
3008   PetscCall(MatDestroy(&spRHS));
3009   PetscFunctionReturn(PETSC_SUCCESS);
3010 }
3011 
3012 /*@
3013   MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix `A`^T
3014 
3015   Logically Collective
3016 
3017   Input Parameter:
3018 . F - the factored matrix of A obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
3019 
3020   Output Parameter:
3021 . spRHST - sequential sparse matrix in `MATAIJ` format containing the requested entries of inverse of `A`^T
3022 
3023   Level: beginner
3024 
3025   References:
3026 .  * - MUMPS Users' Guide
3027 
3028 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()`, `MatMumpsGetInverse()`
3029 @*/
3030 PetscErrorCode MatMumpsGetInverseTranspose(Mat F, Mat spRHST)
3031 {
3032   PetscBool flg;
3033 
3034   PetscFunctionBegin;
3035   PetscValidType(F, 1);
3036   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3037   PetscCall(PetscObjectTypeCompareAny((PetscObject)spRHST, &flg, MATSEQAIJ, MATMPIAIJ, NULL));
3038   PetscCheck(flg, PetscObjectComm((PetscObject)spRHST), PETSC_ERR_ARG_WRONG, "Matrix spRHST must be MATAIJ matrix");
3039 
3040   PetscUseMethod(F, "MatMumpsGetInverseTranspose_C", (Mat, Mat), (F, spRHST));
3041   PetscFunctionReturn(PETSC_SUCCESS);
3042 }
3043 
3044 /*@
3045   MatMumpsGetInfo - Get MUMPS parameter INFO()
3046 
3047   Logically Collective
3048 
3049   Input Parameters:
3050 + F     - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
3051 - icntl - index of MUMPS parameter array INFO()
3052 
3053   Output Parameter:
3054 . ival - value of MUMPS INFO(icntl)
3055 
3056   Level: beginner
3057 
3058   References:
3059 .  * - MUMPS Users' Guide
3060 
3061 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
3062 @*/
3063 PetscErrorCode MatMumpsGetInfo(Mat F, PetscInt icntl, PetscInt *ival)
3064 {
3065   PetscFunctionBegin;
3066   PetscValidType(F, 1);
3067   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3068   PetscAssertPointer(ival, 3);
3069   PetscUseMethod(F, "MatMumpsGetInfo_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival));
3070   PetscFunctionReturn(PETSC_SUCCESS);
3071 }
3072 
3073 /*@
3074   MatMumpsGetInfog - Get MUMPS parameter INFOG()
3075 
3076   Logically Collective
3077 
3078   Input Parameters:
3079 + F     - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
3080 - icntl - index of MUMPS parameter array INFOG()
3081 
3082   Output Parameter:
3083 . ival - value of MUMPS INFOG(icntl)
3084 
3085   Level: beginner
3086 
3087   References:
3088 .  * - MUMPS Users' Guide
3089 
3090 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
3091 @*/
3092 PetscErrorCode MatMumpsGetInfog(Mat F, PetscInt icntl, PetscInt *ival)
3093 {
3094   PetscFunctionBegin;
3095   PetscValidType(F, 1);
3096   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3097   PetscAssertPointer(ival, 3);
3098   PetscUseMethod(F, "MatMumpsGetInfog_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival));
3099   PetscFunctionReturn(PETSC_SUCCESS);
3100 }
3101 
3102 /*@
3103   MatMumpsGetRinfo - Get MUMPS parameter RINFO()
3104 
3105   Logically Collective
3106 
3107   Input Parameters:
3108 + F     - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
3109 - icntl - index of MUMPS parameter array RINFO()
3110 
3111   Output Parameter:
3112 . val - value of MUMPS RINFO(icntl)
3113 
3114   Level: beginner
3115 
3116   References:
3117 .  * - MUMPS Users' Guide
3118 
3119 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfog()`
3120 @*/
3121 PetscErrorCode MatMumpsGetRinfo(Mat F, PetscInt icntl, PetscReal *val)
3122 {
3123   PetscFunctionBegin;
3124   PetscValidType(F, 1);
3125   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3126   PetscAssertPointer(val, 3);
3127   PetscUseMethod(F, "MatMumpsGetRinfo_C", (Mat, PetscInt, PetscReal *), (F, icntl, val));
3128   PetscFunctionReturn(PETSC_SUCCESS);
3129 }
3130 
3131 /*@
3132   MatMumpsGetRinfog - Get MUMPS parameter RINFOG()
3133 
3134   Logically Collective
3135 
3136   Input Parameters:
3137 + F     - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
3138 - icntl - index of MUMPS parameter array RINFOG()
3139 
3140   Output Parameter:
3141 . val - value of MUMPS RINFOG(icntl)
3142 
3143   Level: beginner
3144 
3145   References:
3146 .  * - MUMPS Users' Guide
3147 
3148 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`
3149 @*/
3150 PetscErrorCode MatMumpsGetRinfog(Mat F, PetscInt icntl, PetscReal *val)
3151 {
3152   PetscFunctionBegin;
3153   PetscValidType(F, 1);
3154   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3155   PetscAssertPointer(val, 3);
3156   PetscUseMethod(F, "MatMumpsGetRinfog_C", (Mat, PetscInt, PetscReal *), (F, icntl, val));
3157   PetscFunctionReturn(PETSC_SUCCESS);
3158 }
3159 
3160 /*@
3161   MatMumpsGetNullPivots - Get MUMPS parameter PIVNUL_LIST()
3162 
3163   Logically Collective
3164 
3165   Input Parameter:
3166 . F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
3167 
3168   Output Parameters:
3169 + size  - local size of the array. The size of the array is non-zero only on the host.
3170 - array - array of rows with null pivot, these rows follow 0-based indexing. The array gets allocated within the function and the user is responsible
3171            for freeing this array.
3172 
3173   Level: beginner
3174 
3175   References:
3176 .  * - MUMPS Users' Guide
3177 
3178 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`
3179 @*/
3180 PetscErrorCode MatMumpsGetNullPivots(Mat F, PetscInt *size, PetscInt **array)
3181 {
3182   PetscFunctionBegin;
3183   PetscValidType(F, 1);
3184   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3185   PetscAssertPointer(size, 2);
3186   PetscAssertPointer(array, 3);
3187   PetscUseMethod(F, "MatMumpsGetNullPivots_C", (Mat, PetscInt *, PetscInt **), (F, size, array));
3188   PetscFunctionReturn(PETSC_SUCCESS);
3189 }
3190 
3191 /*MC
3192   MATSOLVERMUMPS -  A matrix type providing direct solvers (LU and Cholesky) for
3193   distributed and sequential matrices via the external package MUMPS.
3194 
3195   Works with `MATAIJ` and `MATSBAIJ` matrices
3196 
3197   Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS
3198 
3199   Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode.
3200   See details below.
3201 
3202   Use `-pc_type cholesky` or `lu` `-pc_factor_mat_solver_type mumps` to use this direct solver
3203 
3204   Options Database Keys:
3205 +  -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages
3206 .  -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning
3207 .  -mat_mumps_icntl_3 -  ICNTL(3): output stream for global information, collected on the host
3208 .  -mat_mumps_icntl_4 -  ICNTL(4): level of printing (0 to 4)
3209 .  -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)
3210 .  -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto
3211                         Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only)
3212 .  -mat_mumps_icntl_8  - ICNTL(8): scaling strategy (-2 to 8 or 77)
3213 .  -mat_mumps_icntl_10  - ICNTL(10): max num of refinements
3214 .  -mat_mumps_icntl_11  - ICNTL(11): statistics related to an error analysis (via -ksp_view)
3215 .  -mat_mumps_icntl_12  - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)
3216 .  -mat_mumps_icntl_13  - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting
3217 .  -mat_mumps_icntl_14  - ICNTL(14): percentage increase in the estimated working space
3218 .  -mat_mumps_icntl_15  - ICNTL(15): compression of the input matrix resulting from a block format
3219 .  -mat_mumps_icntl_19  - ICNTL(19): computes the Schur complement
3220 .  -mat_mumps_icntl_20  - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS
3221 .  -mat_mumps_icntl_22  - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)
3222 .  -mat_mumps_icntl_23  - ICNTL(23): max size of the working memory (MB) that can allocate per processor
3223 .  -mat_mumps_icntl_24  - ICNTL(24): detection of null pivot rows (0 or 1)
3224 .  -mat_mumps_icntl_25  - ICNTL(25): compute a solution of a deficient matrix and a null space basis
3225 .  -mat_mumps_icntl_26  - ICNTL(26): drives the solution phase if a Schur complement matrix
3226 .  -mat_mumps_icntl_28  - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering
3227 .  -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis
3228 .  -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A)
3229 .  -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization
3230 .  -mat_mumps_icntl_33 - ICNTL(33): compute determinant
3231 .  -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature
3232 .  -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant
3233 .  -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR
3234 .  -mat_mumps_icntl_58 - ICNTL(58): options for symbolic factorization
3235 .  -mat_mumps_cntl_1  - CNTL(1): relative pivoting threshold
3236 .  -mat_mumps_cntl_2  -  CNTL(2): stopping criterion of refinement
3237 .  -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold
3238 .  -mat_mumps_cntl_4 - CNTL(4): value for static pivoting
3239 .  -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots
3240 .  -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization
3241 -  -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS.
3242                                    Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual.
3243 
3244   Level: beginner
3245 
3246     Notes:
3247     MUMPS Cholesky does not handle (complex) Hermitian matrices (see User's Guide at https://mumps-solver.org/index.php?page=doc) so using it will
3248     error if the matrix is Hermitian.
3249 
3250     When used within a `KSP`/`PC` solve the options are prefixed with that of the `PC`. Otherwise one can set the options prefix by calling
3251     `MatSetOptionsPrefixFactor()` on the matrix from which the factor was obtained or `MatSetOptionsPrefix()` on the factor matrix.
3252 
3253     When a MUMPS factorization fails inside a KSP solve, for example with a `KSP_DIVERGED_PC_FAILED`, one can find the MUMPS information about
3254     the failure with
3255 .vb
3256           KSPGetPC(ksp,&pc);
3257           PCFactorGetMatrix(pc,&mat);
3258           MatMumpsGetInfo(mat,....);
3259           MatMumpsGetInfog(mat,....); etc.
3260 .ve
3261     Or run with `-ksp_error_if_not_converged` and the program will be stopped and the information printed in the error message.
3262 
3263     MUMPS provides 64-bit integer support in two build modes:
3264       full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and
3265       requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI).
3266 
3267       selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices,
3268       MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and
3269       columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit
3270       integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS.
3271 
3272     With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc.
3273 
3274   Two modes to run MUMPS/PETSc with OpenMP
3275 .vb
3276      Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP
3277      threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test".
3278 .ve
3279 
3280 .vb
3281      -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example,
3282     if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16"
3283 .ve
3284 
3285    To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part
3286    (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with `--with-openmp` `--download-hwloc`
3287    (or `--with-hwloc`), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS
3288    libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS
3289    (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided).
3290 
3291    If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI
3292    processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of
3293    size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm
3294    are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set
3295    by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs.
3296    In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets,
3297    if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind
3298    MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half
3299    cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the
3300    problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding.
3301    For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and
3302    examine the mapping result.
3303 
3304    PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set `OMP_PROC_BIND` and `OMP_PLACES` in job scripts,
3305    for example, export `OMP_PLACES`=threads and export `OMP_PROC_BIND`=spread. One does not need to export `OMP_NUM_THREADS`=m in job scripts as PETSc
3306    calls `omp_set_num_threads`(m) internally before calling MUMPS.
3307 
3308    References:
3309 +  * - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011).
3310 -  * - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017.
3311 
3312 .seealso: [](ch_matrices), `Mat`, `PCFactorSetMatSolverType()`, `MatSolverType`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`, `KSPGetPC()`, `PCFactorGetMatrix()`
3313 M*/
3314 
3315 static PetscErrorCode MatFactorGetSolverType_mumps(Mat A, MatSolverType *type)
3316 {
3317   PetscFunctionBegin;
3318   *type = MATSOLVERMUMPS;
3319   PetscFunctionReturn(PETSC_SUCCESS);
3320 }
3321 
3322 /* MatGetFactor for Seq and MPI AIJ matrices */
3323 static PetscErrorCode MatGetFactor_aij_mumps(Mat A, MatFactorType ftype, Mat *F)
3324 {
3325   Mat         B;
3326   Mat_MUMPS  *mumps;
3327   PetscBool   isSeqAIJ;
3328   PetscMPIInt size;
3329 
3330   PetscFunctionBegin;
3331 #if defined(PETSC_USE_COMPLEX)
3332   if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) {
3333     PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n"));
3334     *F = NULL;
3335     PetscFunctionReturn(PETSC_SUCCESS);
3336   }
3337 #endif
3338   /* Create the factorization matrix */
3339   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isSeqAIJ));
3340   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
3341   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
3342   PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name));
3343   PetscCall(MatSetUp(B));
3344 
3345   PetscCall(PetscNew(&mumps));
3346 
3347   B->ops->view    = MatView_MUMPS;
3348   B->ops->getinfo = MatGetInfo_MUMPS;
3349 
3350   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
3351   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
3352   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
3353   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
3354   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
3355   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
3356   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
3357   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
3358   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
3359   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
3360   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
3361   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
3362   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
3363   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
3364 
3365   if (ftype == MAT_FACTOR_LU) {
3366     B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
3367     B->factortype            = MAT_FACTOR_LU;
3368     if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij;
3369     else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij;
3370     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]));
3371     mumps->sym = 0;
3372   } else {
3373     B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
3374     B->factortype                  = MAT_FACTOR_CHOLESKY;
3375     if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij;
3376     else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij;
3377     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY]));
3378 #if defined(PETSC_USE_COMPLEX)
3379     mumps->sym = 2;
3380 #else
3381     if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1;
3382     else mumps->sym = 2;
3383 #endif
3384   }
3385 
3386   /* set solvertype */
3387   PetscCall(PetscFree(B->solvertype));
3388   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
3389   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
3390   if (size == 1) {
3391     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3392     B->canuseordering = PETSC_TRUE;
3393   }
3394   B->ops->destroy = MatDestroy_MUMPS;
3395   B->data         = (void *)mumps;
3396 
3397   *F               = B;
3398   mumps->id.job    = JOB_NULL;
3399   mumps->ICNTL_pre = NULL;
3400   mumps->CNTL_pre  = NULL;
3401   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
3402   PetscFunctionReturn(PETSC_SUCCESS);
3403 }
3404 
3405 /* MatGetFactor for Seq and MPI SBAIJ matrices */
3406 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A, MatFactorType ftype, Mat *F)
3407 {
3408   Mat         B;
3409   Mat_MUMPS  *mumps;
3410   PetscBool   isSeqSBAIJ;
3411   PetscMPIInt size;
3412 
3413   PetscFunctionBegin;
3414 #if defined(PETSC_USE_COMPLEX)
3415   if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) {
3416     PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n"));
3417     *F = NULL;
3418     PetscFunctionReturn(PETSC_SUCCESS);
3419   }
3420 #endif
3421   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
3422   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
3423   PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name));
3424   PetscCall(MatSetUp(B));
3425 
3426   PetscCall(PetscNew(&mumps));
3427   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &isSeqSBAIJ));
3428   if (isSeqSBAIJ) {
3429     mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij;
3430   } else {
3431     mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij;
3432   }
3433 
3434   B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
3435   B->ops->view                   = MatView_MUMPS;
3436   B->ops->getinfo                = MatGetInfo_MUMPS;
3437 
3438   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
3439   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
3440   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
3441   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
3442   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
3443   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
3444   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
3445   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
3446   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
3447   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
3448   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
3449   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
3450   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
3451   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
3452 
3453   B->factortype = MAT_FACTOR_CHOLESKY;
3454 #if defined(PETSC_USE_COMPLEX)
3455   mumps->sym = 2;
3456 #else
3457   if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1;
3458   else mumps->sym = 2;
3459 #endif
3460 
3461   /* set solvertype */
3462   PetscCall(PetscFree(B->solvertype));
3463   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
3464   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
3465   if (size == 1) {
3466     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3467     B->canuseordering = PETSC_TRUE;
3468   }
3469   PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY]));
3470   B->ops->destroy = MatDestroy_MUMPS;
3471   B->data         = (void *)mumps;
3472 
3473   *F               = B;
3474   mumps->id.job    = JOB_NULL;
3475   mumps->ICNTL_pre = NULL;
3476   mumps->CNTL_pre  = NULL;
3477   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
3478   PetscFunctionReturn(PETSC_SUCCESS);
3479 }
3480 
3481 static PetscErrorCode MatGetFactor_baij_mumps(Mat A, MatFactorType ftype, Mat *F)
3482 {
3483   Mat         B;
3484   Mat_MUMPS  *mumps;
3485   PetscBool   isSeqBAIJ;
3486   PetscMPIInt size;
3487 
3488   PetscFunctionBegin;
3489   /* Create the factorization matrix */
3490   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &isSeqBAIJ));
3491   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
3492   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
3493   PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name));
3494   PetscCall(MatSetUp(B));
3495 
3496   PetscCall(PetscNew(&mumps));
3497   if (ftype == MAT_FACTOR_LU) {
3498     B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS;
3499     B->factortype            = MAT_FACTOR_LU;
3500     if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij;
3501     else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij;
3502     mumps->sym = 0;
3503     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]));
3504   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead");
3505 
3506   B->ops->view    = MatView_MUMPS;
3507   B->ops->getinfo = MatGetInfo_MUMPS;
3508 
3509   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
3510   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
3511   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
3512   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
3513   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
3514   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
3515   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
3516   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
3517   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
3518   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
3519   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
3520   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
3521   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
3522   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
3523 
3524   /* set solvertype */
3525   PetscCall(PetscFree(B->solvertype));
3526   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
3527   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
3528   if (size == 1) {
3529     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3530     B->canuseordering = PETSC_TRUE;
3531   }
3532   B->ops->destroy = MatDestroy_MUMPS;
3533   B->data         = (void *)mumps;
3534 
3535   *F               = B;
3536   mumps->id.job    = JOB_NULL;
3537   mumps->ICNTL_pre = NULL;
3538   mumps->CNTL_pre  = NULL;
3539   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
3540   PetscFunctionReturn(PETSC_SUCCESS);
3541 }
3542 
3543 /* MatGetFactor for Seq and MPI SELL matrices */
3544 static PetscErrorCode MatGetFactor_sell_mumps(Mat A, MatFactorType ftype, Mat *F)
3545 {
3546   Mat         B;
3547   Mat_MUMPS  *mumps;
3548   PetscBool   isSeqSELL;
3549   PetscMPIInt size;
3550 
3551   PetscFunctionBegin;
3552   /* Create the factorization matrix */
3553   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSELL, &isSeqSELL));
3554   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
3555   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
3556   PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name));
3557   PetscCall(MatSetUp(B));
3558 
3559   PetscCall(PetscNew(&mumps));
3560 
3561   B->ops->view    = MatView_MUMPS;
3562   B->ops->getinfo = MatGetInfo_MUMPS;
3563 
3564   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
3565   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
3566   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
3567   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
3568   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
3569   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
3570   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
3571   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
3572   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
3573   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
3574   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
3575   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
3576 
3577   if (ftype == MAT_FACTOR_LU) {
3578     B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
3579     B->factortype            = MAT_FACTOR_LU;
3580     if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij;
3581     else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented");
3582     mumps->sym = 0;
3583     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]));
3584   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented");
3585 
3586   /* set solvertype */
3587   PetscCall(PetscFree(B->solvertype));
3588   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
3589   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
3590   if (size == 1) {
3591     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization  */
3592     B->canuseordering = PETSC_TRUE;
3593   }
3594   B->ops->destroy = MatDestroy_MUMPS;
3595   B->data         = (void *)mumps;
3596 
3597   *F               = B;
3598   mumps->id.job    = JOB_NULL;
3599   mumps->ICNTL_pre = NULL;
3600   mumps->CNTL_pre  = NULL;
3601   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
3602   PetscFunctionReturn(PETSC_SUCCESS);
3603 }
3604 
3605 /* MatGetFactor for MATNEST matrices */
3606 static PetscErrorCode MatGetFactor_nest_mumps(Mat A, MatFactorType ftype, Mat *F)
3607 {
3608   Mat         B, **mats;
3609   Mat_MUMPS  *mumps;
3610   PetscInt    nr, nc;
3611   PetscMPIInt size;
3612   PetscBool   flg = PETSC_TRUE;
3613 
3614   PetscFunctionBegin;
3615 #if defined(PETSC_USE_COMPLEX)
3616   if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) {
3617     PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n"));
3618     *F = NULL;
3619     PetscFunctionReturn(PETSC_SUCCESS);
3620   }
3621 #endif
3622 
3623   /* Return if some condition is not satisfied */
3624   *F = NULL;
3625   PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats));
3626   if (ftype == MAT_FACTOR_CHOLESKY) {
3627     IS       *rows, *cols;
3628     PetscInt *m, *M;
3629 
3630     PetscCheck(nr == nc, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MAT_FACTOR_CHOLESKY not supported for nest sizes %" PetscInt_FMT " != %" PetscInt_FMT ". Use MAT_FACTOR_LU.", nr, nc);
3631     PetscCall(PetscMalloc2(nr, &rows, nc, &cols));
3632     PetscCall(MatNestGetISs(A, rows, cols));
3633     for (PetscInt r = 0; flg && r < nr; r++) PetscCall(ISEqualUnsorted(rows[r], cols[r], &flg));
3634     if (!flg) {
3635       PetscCall(PetscFree2(rows, cols));
3636       PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for unequal row and column maps. Use MAT_FACTOR_LU.\n"));
3637       PetscFunctionReturn(PETSC_SUCCESS);
3638     }
3639     PetscCall(PetscMalloc2(nr, &m, nr, &M));
3640     for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetMinMax(rows[r], &m[r], &M[r]));
3641     for (PetscInt r = 0; flg && r < nr; r++)
3642       for (PetscInt k = r + 1; flg && k < nr; k++)
3643         if ((m[k] <= m[r] && m[r] <= M[k]) || (m[k] <= M[r] && M[r] <= M[k])) flg = PETSC_FALSE;
3644     PetscCall(PetscFree2(m, M));
3645     PetscCall(PetscFree2(rows, cols));
3646     if (!flg) {
3647       PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for intersecting row maps. Use MAT_FACTOR_LU.\n"));
3648       PetscFunctionReturn(PETSC_SUCCESS);
3649     }
3650   }
3651 
3652   for (PetscInt r = 0; r < nr; r++) {
3653     for (PetscInt c = 0; c < nc; c++) {
3654       Mat       sub = mats[r][c];
3655       PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isTrans;
3656 
3657       if (!sub || (ftype == MAT_FACTOR_CHOLESKY && c < r)) continue;
3658       PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans));
3659       if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub));
3660       else {
3661         PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isTrans));
3662         if (isTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub));
3663       }
3664       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ));
3665       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ));
3666       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ));
3667       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ));
3668       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ));
3669       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ));
3670       if (ftype == MAT_FACTOR_CHOLESKY) {
3671         if (r == c && !isSeqAIJ && !isMPIAIJ && !isSeqSBAIJ && !isMPISBAIJ) {
3672           PetscCall(PetscInfo(sub, "MAT_CHOLESKY_FACTOR not supported for diagonal block of type %s.\n", ((PetscObject)sub)->type_name));
3673           flg = PETSC_FALSE;
3674         } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ) {
3675           PetscCall(PetscInfo(sub, "MAT_CHOLESKY_FACTOR not supported for off-diagonal block of type %s.\n", ((PetscObject)sub)->type_name));
3676           flg = PETSC_FALSE;
3677         }
3678       } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ) {
3679         PetscCall(PetscInfo(sub, "MAT_LU_FACTOR not supported for block of type %s.\n", ((PetscObject)sub)->type_name));
3680         flg = PETSC_FALSE;
3681       }
3682     }
3683   }
3684   if (!flg) PetscFunctionReturn(PETSC_SUCCESS);
3685 
3686   /* Create the factorization matrix */
3687   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
3688   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
3689   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name));
3690   PetscCall(MatSetUp(B));
3691 
3692   PetscCall(PetscNew(&mumps));
3693 
3694   B->ops->view    = MatView_MUMPS;
3695   B->ops->getinfo = MatGetInfo_MUMPS;
3696 
3697   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
3698   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
3699   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
3700   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
3701   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
3702   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
3703   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
3704   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
3705   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
3706   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
3707   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
3708   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
3709   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
3710   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
3711 
3712   if (ftype == MAT_FACTOR_LU) {
3713     B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
3714     B->factortype            = MAT_FACTOR_LU;
3715     mumps->sym               = 0;
3716   } else {
3717     B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
3718     B->factortype                  = MAT_FACTOR_CHOLESKY;
3719 #if defined(PETSC_USE_COMPLEX)
3720     mumps->sym = 2;
3721 #else
3722     if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1;
3723     else mumps->sym = 2;
3724 #endif
3725   }
3726   mumps->ConvertToTriples = MatConvertToTriples_nest_xaij;
3727   PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[ftype]));
3728 
3729   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
3730   if (size == 1) {
3731     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3732     B->canuseordering = PETSC_TRUE;
3733   }
3734 
3735   /* set solvertype */
3736   PetscCall(PetscFree(B->solvertype));
3737   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
3738   B->ops->destroy = MatDestroy_MUMPS;
3739   B->data         = (void *)mumps;
3740 
3741   *F               = B;
3742   mumps->id.job    = JOB_NULL;
3743   mumps->ICNTL_pre = NULL;
3744   mumps->CNTL_pre  = NULL;
3745   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
3746   PetscFunctionReturn(PETSC_SUCCESS);
3747 }
3748 
3749 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void)
3750 {
3751   PetscFunctionBegin;
3752   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
3753   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
3754   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps));
3755   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps));
3756   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPISBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps));
3757   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
3758   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
3759   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps));
3760   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps));
3761   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps));
3762   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSELL, MAT_FACTOR_LU, MatGetFactor_sell_mumps));
3763   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_LU, MatGetFactor_nest_mumps));
3764   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_CHOLESKY, MatGetFactor_nest_mumps));
3765   PetscFunctionReturn(PETSC_SUCCESS);
3766 }
3767