xref: /petsc/src/mat/impls/aij/mpi/mumps/mumps.c (revision 2d30e087755efd99e28fdfe792ffbeb2ee1ea928)
1 
2 /*
3     Provides an interface to the MUMPS sparse solver
4 */
5 #include <petscpkg_version.h>
6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I  "petscmat.h"  I*/
7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
8 #include <../src/mat/impls/sell/mpi/mpisell.h>
9 
10 EXTERN_C_BEGIN
11 #if defined(PETSC_USE_COMPLEX)
12 #if defined(PETSC_USE_REAL_SINGLE)
13 #include <cmumps_c.h>
14 #else
15 #include <zmumps_c.h>
16 #endif
17 #else
18 #if defined(PETSC_USE_REAL_SINGLE)
19 #include <smumps_c.h>
20 #else
21 #include <dmumps_c.h>
22 #endif
23 #endif
24 EXTERN_C_END
25 #define JOB_INIT         -1
26 #define JOB_NULL         0
27 #define JOB_FACTSYMBOLIC 1
28 #define JOB_FACTNUMERIC  2
29 #define JOB_SOLVE        3
30 #define JOB_END          -2
31 
32 /* calls to MUMPS */
33 #if defined(PETSC_USE_COMPLEX)
34 #if defined(PETSC_USE_REAL_SINGLE)
35 #define MUMPS_c cmumps_c
36 #else
37 #define MUMPS_c zmumps_c
38 #endif
39 #else
40 #if defined(PETSC_USE_REAL_SINGLE)
41 #define MUMPS_c smumps_c
42 #else
43 #define MUMPS_c dmumps_c
44 #endif
45 #endif
46 
47 /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for
48    number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the
49    naming convention in PetscMPIInt, PetscBLASInt etc.
50 */
51 typedef MUMPS_INT PetscMUMPSInt;
52 
53 #if PETSC_PKG_MUMPS_VERSION_GE(5, 3, 0)
54 #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */
55 #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out"
56 #endif
57 #else
58 #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */
59 #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out"
60 #endif
61 #endif
62 
63 #define MPIU_MUMPSINT       MPI_INT
64 #define PETSC_MUMPS_INT_MAX 2147483647
65 #define PETSC_MUMPS_INT_MIN -2147483648
66 
67 /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/
68 static inline PetscErrorCode PetscMUMPSIntCast(PetscInt a, PetscMUMPSInt *b) {
69   PetscFunctionBegin;
70 #if PetscDefined(USE_64BIT_INDICES)
71   PetscAssert(a <= PETSC_MUMPS_INT_MAX && a >= PETSC_MUMPS_INT_MIN, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt");
72 #endif
73   *b = (PetscMUMPSInt)(a);
74   PetscFunctionReturn(0);
75 }
76 
77 /* Put these utility routines here since they are only used in this file */
78 static inline PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems *PetscOptionsObject, const char opt[], const char text[], const char man[], PetscMUMPSInt currentvalue, PetscMUMPSInt *value, PetscBool *set, PetscMUMPSInt lb, PetscMUMPSInt ub) {
79   PetscInt  myval;
80   PetscBool myset;
81   PetscFunctionBegin;
82   /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */
83   PetscCall(PetscOptionsInt_Private(PetscOptionsObject, opt, text, man, (PetscInt)currentvalue, &myval, &myset, lb, ub));
84   if (myset) PetscCall(PetscMUMPSIntCast(myval, value));
85   if (set) *set = myset;
86   PetscFunctionReturn(0);
87 }
88 #define PetscOptionsMUMPSInt(a, b, c, d, e, f) PetscOptionsMUMPSInt_Private(PetscOptionsObject, a, b, c, d, e, f, PETSC_MUMPS_INT_MIN, PETSC_MUMPS_INT_MAX)
89 
90 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */
91 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
92 #define PetscMUMPS_c(mumps) \
93   do { \
94     if (mumps->use_petsc_omp_support) { \
95       if (mumps->is_omp_master) { \
96         PetscCall(PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl)); \
97         MUMPS_c(&mumps->id); \
98         PetscCall(PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl)); \
99       } \
100       PetscCall(PetscOmpCtrlBarrier(mumps->omp_ctrl)); \
101       /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific      \
102          to processes, so we only Bcast info[1], an error code and leave others (since they do not have   \
103          an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82.                   \
104          omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \
105       */ \
106       PetscCallMPI(MPI_Bcast(mumps->id.infog, 40, MPIU_MUMPSINT, 0, mumps->omp_comm)); \
107       PetscCallMPI(MPI_Bcast(mumps->id.rinfog, 20, MPIU_REAL, 0, mumps->omp_comm)); \
108       PetscCallMPI(MPI_Bcast(mumps->id.info, 1, MPIU_MUMPSINT, 0, mumps->omp_comm)); \
109     } else { \
110       MUMPS_c(&mumps->id); \
111     } \
112   } while (0)
113 #else
114 #define PetscMUMPS_c(mumps) \
115   do { MUMPS_c(&mumps->id); } while (0)
116 #endif
117 
118 /* declare MumpsScalar */
119 #if defined(PETSC_USE_COMPLEX)
120 #if defined(PETSC_USE_REAL_SINGLE)
121 #define MumpsScalar mumps_complex
122 #else
123 #define MumpsScalar mumps_double_complex
124 #endif
125 #else
126 #define MumpsScalar PetscScalar
127 #endif
128 
129 /* macros s.t. indices match MUMPS documentation */
130 #define ICNTL(I)  icntl[(I)-1]
131 #define CNTL(I)   cntl[(I)-1]
132 #define INFOG(I)  infog[(I)-1]
133 #define INFO(I)   info[(I)-1]
134 #define RINFOG(I) rinfog[(I)-1]
135 #define RINFO(I)  rinfo[(I)-1]
136 
137 typedef struct Mat_MUMPS Mat_MUMPS;
138 struct Mat_MUMPS {
139 #if defined(PETSC_USE_COMPLEX)
140 #if defined(PETSC_USE_REAL_SINGLE)
141   CMUMPS_STRUC_C id;
142 #else
143   ZMUMPS_STRUC_C id;
144 #endif
145 #else
146 #if defined(PETSC_USE_REAL_SINGLE)
147   SMUMPS_STRUC_C id;
148 #else
149   DMUMPS_STRUC_C id;
150 #endif
151 #endif
152 
153   MatStructure   matstruc;
154   PetscMPIInt    myid, petsc_size;
155   PetscMUMPSInt *irn, *jcn;       /* the (i,j,v) triplets passed to mumps. */
156   PetscScalar   *val, *val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */
157   PetscInt64     nnz;             /* number of nonzeros. The type is called selective 64-bit in mumps */
158   PetscMUMPSInt  sym;
159   MPI_Comm       mumps_comm;
160   PetscMUMPSInt *ICNTL_pre;
161   PetscReal     *CNTL_pre;
162   PetscMUMPSInt  ICNTL9_pre;         /* check if ICNTL(9) is changed from previous MatSolve */
163   VecScatter     scat_rhs, scat_sol; /* used by MatSolve() */
164   PetscMUMPSInt  ICNTL20;            /* use centralized (0) or distributed (10) dense RHS */
165   PetscMUMPSInt  lrhs_loc, nloc_rhs, *irhs_loc;
166 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
167   PetscInt    *rhs_nrow, max_nrhs;
168   PetscMPIInt *rhs_recvcounts, *rhs_disps;
169   PetscScalar *rhs_loc, *rhs_recvbuf;
170 #endif
171   Vec            b_seq, x_seq;
172   PetscInt       ninfo, *info; /* which INFO to display */
173   PetscInt       sizeredrhs;
174   PetscScalar   *schur_sol;
175   PetscInt       schur_sizesol;
176   PetscMUMPSInt *ia_alloc, *ja_alloc; /* work arrays used for the CSR struct for sparse rhs */
177   PetscInt64     cur_ilen, cur_jlen;  /* current len of ia_alloc[], ja_alloc[] */
178   PetscErrorCode (*ConvertToTriples)(Mat, PetscInt, MatReuse, Mat_MUMPS *);
179 
180   /* stuff used by petsc/mumps OpenMP support*/
181   PetscBool    use_petsc_omp_support;
182   PetscOmpCtrl omp_ctrl;             /* an OpenMP controler that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */
183   MPI_Comm     petsc_comm, omp_comm; /* petsc_comm is petsc matrix's comm */
184   PetscInt64  *recvcount;            /* a collection of nnz on omp_master */
185   PetscMPIInt  tag, omp_comm_size;
186   PetscBool    is_omp_master; /* is this rank the master of omp_comm */
187   MPI_Request *reqs;
188 };
189 
190 /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt.
191    Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices.
192  */
193 static PetscErrorCode PetscMUMPSIntCSRCast(Mat_MUMPS *mumps, PetscInt nrow, PetscInt *ia, PetscInt *ja, PetscMUMPSInt **ia_mumps, PetscMUMPSInt **ja_mumps, PetscMUMPSInt *nnz_mumps) {
194   PetscInt nnz = ia[nrow] - 1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscInt64 since mumps only uses PetscMUMPSInt for rhs */
195 
196   PetscFunctionBegin;
197 #if defined(PETSC_USE_64BIT_INDICES)
198   {
199     PetscInt i;
200     if (nrow + 1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */
201       PetscCall(PetscFree(mumps->ia_alloc));
202       PetscCall(PetscMalloc1(nrow + 1, &mumps->ia_alloc));
203       mumps->cur_ilen = nrow + 1;
204     }
205     if (nnz > mumps->cur_jlen) {
206       PetscCall(PetscFree(mumps->ja_alloc));
207       PetscCall(PetscMalloc1(nnz, &mumps->ja_alloc));
208       mumps->cur_jlen = nnz;
209     }
210     for (i = 0; i < nrow + 1; i++) PetscCall(PetscMUMPSIntCast(ia[i], &(mumps->ia_alloc[i])));
211     for (i = 0; i < nnz; i++) PetscCall(PetscMUMPSIntCast(ja[i], &(mumps->ja_alloc[i])));
212     *ia_mumps = mumps->ia_alloc;
213     *ja_mumps = mumps->ja_alloc;
214   }
215 #else
216   *ia_mumps          = ia;
217   *ja_mumps          = ja;
218 #endif
219   PetscCall(PetscMUMPSIntCast(nnz, nnz_mumps));
220   PetscFunctionReturn(0);
221 }
222 
223 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS *mumps) {
224   PetscFunctionBegin;
225   PetscCall(PetscFree(mumps->id.listvar_schur));
226   PetscCall(PetscFree(mumps->id.redrhs));
227   PetscCall(PetscFree(mumps->schur_sol));
228   mumps->id.size_schur = 0;
229   mumps->id.schur_lld  = 0;
230   mumps->id.ICNTL(19)  = 0;
231   PetscFunctionReturn(0);
232 }
233 
234 /* solve with rhs in mumps->id.redrhs and return in the same location */
235 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F) {
236   Mat_MUMPS           *mumps = (Mat_MUMPS *)F->data;
237   Mat                  S, B, X;
238   MatFactorSchurStatus schurstatus;
239   PetscInt             sizesol;
240 
241   PetscFunctionBegin;
242   PetscCall(MatFactorFactorizeSchurComplement(F));
243   PetscCall(MatFactorGetSchurComplement(F, &S, &schurstatus));
244   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &B));
245   PetscCall(MatSetType(B, ((PetscObject)S)->type_name));
246 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
247   PetscCall(MatBindToCPU(B, S->boundtocpu));
248 #endif
249   switch (schurstatus) {
250   case MAT_FACTOR_SCHUR_FACTORED: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &X)); PetscCall(MatSetType(X, ((PetscObject)S)->type_name));
251 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
252     PetscCall(MatBindToCPU(X, S->boundtocpu));
253 #endif
254     if (!mumps->id.ICNTL(9)) { /* transpose solve */
255       PetscCall(MatMatSolveTranspose(S, B, X));
256     } else {
257       PetscCall(MatMatSolve(S, B, X));
258     }
259     break;
260   case MAT_FACTOR_SCHUR_INVERTED:
261     sizesol = mumps->id.nrhs * mumps->id.size_schur;
262     if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) {
263       PetscCall(PetscFree(mumps->schur_sol));
264       PetscCall(PetscMalloc1(sizesol, &mumps->schur_sol));
265       mumps->schur_sizesol = sizesol;
266     }
267     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->schur_sol, &X));
268     PetscCall(MatSetType(X, ((PetscObject)S)->type_name));
269 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
270     PetscCall(MatBindToCPU(X, S->boundtocpu));
271 #endif
272     PetscCall(MatProductCreateWithMat(S, B, NULL, X));
273     if (!mumps->id.ICNTL(9)) { /* transpose solve */
274       PetscCall(MatProductSetType(X, MATPRODUCT_AtB));
275     } else {
276       PetscCall(MatProductSetType(X, MATPRODUCT_AB));
277     }
278     PetscCall(MatProductSetFromOptions(X));
279     PetscCall(MatProductSymbolic(X));
280     PetscCall(MatProductNumeric(X));
281 
282     PetscCall(MatCopy(X, B, SAME_NONZERO_PATTERN));
283     break;
284   default: SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unhandled MatFactorSchurStatus %d", F->schur_status);
285   }
286   PetscCall(MatFactorRestoreSchurComplement(F, &S, schurstatus));
287   PetscCall(MatDestroy(&B));
288   PetscCall(MatDestroy(&X));
289   PetscFunctionReturn(0);
290 }
291 
292 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion) {
293   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
294 
295   PetscFunctionBegin;
296   if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */
297     PetscFunctionReturn(0);
298   }
299   if (!expansion) { /* prepare for the condensation step */
300     PetscInt sizeredrhs = mumps->id.nrhs * mumps->id.size_schur;
301     /* allocate MUMPS internal array to store reduced right-hand sides */
302     if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) {
303       PetscCall(PetscFree(mumps->id.redrhs));
304       mumps->id.lredrhs = mumps->id.size_schur;
305       PetscCall(PetscMalloc1(mumps->id.nrhs * mumps->id.lredrhs, &mumps->id.redrhs));
306       mumps->sizeredrhs = mumps->id.nrhs * mumps->id.lredrhs;
307     }
308     mumps->id.ICNTL(26) = 1; /* condensation phase */
309   } else {                   /* prepare for the expansion step */
310     /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */
311     PetscCall(MatMumpsSolveSchur_Private(F));
312     mumps->id.ICNTL(26) = 2; /* expansion phase */
313     PetscMUMPS_c(mumps);
314     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1));
315     /* restore defaults */
316     mumps->id.ICNTL(26) = -1;
317     /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */
318     if (mumps->id.nrhs > 1) {
319       PetscCall(PetscFree(mumps->id.redrhs));
320       mumps->id.lredrhs = 0;
321       mumps->sizeredrhs = 0;
322     }
323   }
324   PetscFunctionReturn(0);
325 }
326 
327 /*
328   MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz]
329 
330   input:
331     A       - matrix in aij,baij or sbaij format
332     shift   - 0: C style output triple; 1: Fortran style output triple.
333     reuse   - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple
334               MAT_REUSE_MATRIX:   only the values in v array are updated
335   output:
336     nnz     - dim of r, c, and v (number of local nonzero entries of A)
337     r, c, v - row and col index, matrix values (matrix triples)
338 
339   The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is
340   freed with PetscFree(mumps->irn);  This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means
341   that the PetscMalloc() cannot easily be replaced with a PetscMalloc3().
342 
343  */
344 
345 PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) {
346   const PetscScalar *av;
347   const PetscInt    *ai, *aj, *ajj, M = A->rmap->n;
348   PetscInt64         nz, rnz, i, j, k;
349   PetscMUMPSInt     *row, *col;
350   Mat_SeqAIJ        *aa = (Mat_SeqAIJ *)A->data;
351 
352   PetscFunctionBegin;
353   PetscCall(MatSeqAIJGetArrayRead(A, &av));
354   mumps->val = (PetscScalar *)av;
355   if (reuse == MAT_INITIAL_MATRIX) {
356     nz = aa->nz;
357     ai = aa->i;
358     aj = aa->j;
359     PetscCall(PetscMalloc2(nz, &row, nz, &col));
360     for (i = k = 0; i < M; i++) {
361       rnz = ai[i + 1] - ai[i];
362       ajj = aj + ai[i];
363       for (j = 0; j < rnz; j++) {
364         PetscCall(PetscMUMPSIntCast(i + shift, &row[k]));
365         PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[k]));
366         k++;
367       }
368     }
369     mumps->irn = row;
370     mumps->jcn = col;
371     mumps->nnz = nz;
372   }
373   PetscCall(MatSeqAIJRestoreArrayRead(A, &av));
374   PetscFunctionReturn(0);
375 }
376 
377 PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) {
378   PetscInt64     nz, i, j, k, r;
379   Mat_SeqSELL   *a = (Mat_SeqSELL *)A->data;
380   PetscMUMPSInt *row, *col;
381 
382   PetscFunctionBegin;
383   mumps->val = a->val;
384   if (reuse == MAT_INITIAL_MATRIX) {
385     nz = a->sliidx[a->totalslices];
386     PetscCall(PetscMalloc2(nz, &row, nz, &col));
387     for (i = k = 0; i < a->totalslices; i++) {
388       for (j = a->sliidx[i], r = 0; j < a->sliidx[i + 1]; j++, r = ((r + 1) & 0x07)) PetscCall(PetscMUMPSIntCast(8 * i + r + shift, &row[k++]));
389     }
390     for (i = 0; i < nz; i++) PetscCall(PetscMUMPSIntCast(a->colidx[i] + shift, &col[i]));
391     mumps->irn = row;
392     mumps->jcn = col;
393     mumps->nnz = nz;
394   }
395   PetscFunctionReturn(0);
396 }
397 
398 PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) {
399   Mat_SeqBAIJ    *aa = (Mat_SeqBAIJ *)A->data;
400   const PetscInt *ai, *aj, *ajj, bs2 = aa->bs2;
401   PetscInt64      M, nz, idx = 0, rnz, i, j, k, m;
402   PetscInt        bs;
403   PetscMUMPSInt  *row, *col;
404 
405   PetscFunctionBegin;
406   PetscCall(MatGetBlockSize(A, &bs));
407   M          = A->rmap->N / bs;
408   mumps->val = aa->a;
409   if (reuse == MAT_INITIAL_MATRIX) {
410     ai = aa->i;
411     aj = aa->j;
412     nz = bs2 * aa->nz;
413     PetscCall(PetscMalloc2(nz, &row, nz, &col));
414     for (i = 0; i < M; i++) {
415       ajj = aj + ai[i];
416       rnz = ai[i + 1] - ai[i];
417       for (k = 0; k < rnz; k++) {
418         for (j = 0; j < bs; j++) {
419           for (m = 0; m < bs; m++) {
420             PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[idx]));
421             PetscCall(PetscMUMPSIntCast(bs * ajj[k] + j + shift, &col[idx]));
422             idx++;
423           }
424         }
425       }
426     }
427     mumps->irn = row;
428     mumps->jcn = col;
429     mumps->nnz = nz;
430   }
431   PetscFunctionReturn(0);
432 }
433 
434 PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) {
435   const PetscInt *ai, *aj, *ajj;
436   PetscInt        bs;
437   PetscInt64      nz, rnz, i, j, k, m;
438   PetscMUMPSInt  *row, *col;
439   PetscScalar    *val;
440   Mat_SeqSBAIJ   *aa  = (Mat_SeqSBAIJ *)A->data;
441   const PetscInt  bs2 = aa->bs2, mbs = aa->mbs;
442 #if defined(PETSC_USE_COMPLEX)
443   PetscBool isset, hermitian;
444 #endif
445 
446   PetscFunctionBegin;
447 #if defined(PETSC_USE_COMPLEX)
448   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
449   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
450 #endif
451   ai = aa->i;
452   aj = aa->j;
453   PetscCall(MatGetBlockSize(A, &bs));
454   if (reuse == MAT_INITIAL_MATRIX) {
455     nz = aa->nz;
456     PetscCall(PetscMalloc2(bs2 * nz, &row, bs2 * nz, &col));
457     if (bs > 1) {
458       PetscCall(PetscMalloc1(bs2 * nz, &mumps->val_alloc));
459       mumps->val = mumps->val_alloc;
460     } else {
461       mumps->val = aa->a;
462     }
463     mumps->irn = row;
464     mumps->jcn = col;
465   } else {
466     if (bs == 1) mumps->val = aa->a;
467     row = mumps->irn;
468     col = mumps->jcn;
469   }
470   val = mumps->val;
471 
472   nz = 0;
473   if (bs > 1) {
474     for (i = 0; i < mbs; i++) {
475       rnz = ai[i + 1] - ai[i];
476       ajj = aj + ai[i];
477       for (j = 0; j < rnz; j++) {
478         for (k = 0; k < bs; k++) {
479           for (m = 0; m < bs; m++) {
480             if (ajj[j] > i || k >= m) {
481               if (reuse == MAT_INITIAL_MATRIX) {
482                 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[nz]));
483                 PetscCall(PetscMUMPSIntCast(ajj[j] * bs + k + shift, &col[nz]));
484               }
485               val[nz++] = aa->a[(ai[i] + j) * bs2 + m + k * bs];
486             }
487           }
488         }
489       }
490     }
491   } else if (reuse == MAT_INITIAL_MATRIX) {
492     for (i = 0; i < mbs; i++) {
493       rnz = ai[i + 1] - ai[i];
494       ajj = aj + ai[i];
495       for (j = 0; j < rnz; j++) {
496         PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
497         PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz]));
498         nz++;
499       }
500     }
501     PetscCheck(nz == aa->nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different numbers of nonzeros %" PetscInt64_FMT " != %" PetscInt_FMT, nz, aa->nz);
502   }
503   if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz;
504   PetscFunctionReturn(0);
505 }
506 
507 PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) {
508   const PetscInt    *ai, *aj, *ajj, *adiag, M = A->rmap->n;
509   PetscInt64         nz, rnz, i, j;
510   const PetscScalar *av, *v1;
511   PetscScalar       *val;
512   PetscMUMPSInt     *row, *col;
513   Mat_SeqAIJ        *aa = (Mat_SeqAIJ *)A->data;
514   PetscBool          missing;
515 #if defined(PETSC_USE_COMPLEX)
516   PetscBool hermitian, isset;
517 #endif
518 
519   PetscFunctionBegin;
520 #if defined(PETSC_USE_COMPLEX)
521   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
522   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
523 #endif
524   PetscCall(MatSeqAIJGetArrayRead(A, &av));
525   ai    = aa->i;
526   aj    = aa->j;
527   adiag = aa->diag;
528   PetscCall(MatMissingDiagonal_SeqAIJ(A, &missing, NULL));
529   if (reuse == MAT_INITIAL_MATRIX) {
530     /* count nz in the upper triangular part of A */
531     nz = 0;
532     if (missing) {
533       for (i = 0; i < M; i++) {
534         if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
535           for (j = ai[i]; j < ai[i + 1]; j++) {
536             if (aj[j] < i) continue;
537             nz++;
538           }
539         } else {
540           nz += ai[i + 1] - adiag[i];
541         }
542       }
543     } else {
544       for (i = 0; i < M; i++) nz += ai[i + 1] - adiag[i];
545     }
546     PetscCall(PetscMalloc2(nz, &row, nz, &col));
547     PetscCall(PetscMalloc1(nz, &val));
548     mumps->nnz = nz;
549     mumps->irn = row;
550     mumps->jcn = col;
551     mumps->val = mumps->val_alloc = val;
552 
553     nz = 0;
554     if (missing) {
555       for (i = 0; i < M; i++) {
556         if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
557           for (j = ai[i]; j < ai[i + 1]; j++) {
558             if (aj[j] < i) continue;
559             PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
560             PetscCall(PetscMUMPSIntCast(aj[j] + shift, &col[nz]));
561             val[nz] = av[j];
562             nz++;
563           }
564         } else {
565           rnz = ai[i + 1] - adiag[i];
566           ajj = aj + adiag[i];
567           v1  = av + adiag[i];
568           for (j = 0; j < rnz; j++) {
569             PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
570             PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz]));
571             val[nz++] = v1[j];
572           }
573         }
574       }
575     } else {
576       for (i = 0; i < M; i++) {
577         rnz = ai[i + 1] - adiag[i];
578         ajj = aj + adiag[i];
579         v1  = av + adiag[i];
580         for (j = 0; j < rnz; j++) {
581           PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
582           PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz]));
583           val[nz++] = v1[j];
584         }
585       }
586     }
587   } else {
588     nz  = 0;
589     val = mumps->val;
590     if (missing) {
591       for (i = 0; i < M; i++) {
592         if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
593           for (j = ai[i]; j < ai[i + 1]; j++) {
594             if (aj[j] < i) continue;
595             val[nz++] = av[j];
596           }
597         } else {
598           rnz = ai[i + 1] - adiag[i];
599           v1  = av + adiag[i];
600           for (j = 0; j < rnz; j++) val[nz++] = v1[j];
601         }
602       }
603     } else {
604       for (i = 0; i < M; i++) {
605         rnz = ai[i + 1] - adiag[i];
606         v1  = av + adiag[i];
607         for (j = 0; j < rnz; j++) val[nz++] = v1[j];
608       }
609     }
610   }
611   PetscCall(MatSeqAIJRestoreArrayRead(A, &av));
612   PetscFunctionReturn(0);
613 }
614 
615 PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) {
616   const PetscInt    *ai, *aj, *bi, *bj, *garray, *ajj, *bjj;
617   PetscInt           bs;
618   PetscInt64         rstart, nz, i, j, k, m, jj, irow, countA, countB;
619   PetscMUMPSInt     *row, *col;
620   const PetscScalar *av, *bv, *v1, *v2;
621   PetscScalar       *val;
622   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ *)A->data;
623   Mat_SeqSBAIJ      *aa  = (Mat_SeqSBAIJ *)(mat->A)->data;
624   Mat_SeqBAIJ       *bb  = (Mat_SeqBAIJ *)(mat->B)->data;
625   const PetscInt     bs2 = aa->bs2, mbs = aa->mbs;
626 #if defined(PETSC_USE_COMPLEX)
627   PetscBool hermitian, isset;
628 #endif
629 
630   PetscFunctionBegin;
631 #if defined(PETSC_USE_COMPLEX)
632   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
633   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
634 #endif
635   PetscCall(MatGetBlockSize(A, &bs));
636   rstart = A->rmap->rstart;
637   ai     = aa->i;
638   aj     = aa->j;
639   bi     = bb->i;
640   bj     = bb->j;
641   av     = aa->a;
642   bv     = bb->a;
643 
644   garray = mat->garray;
645 
646   if (reuse == MAT_INITIAL_MATRIX) {
647     nz = (aa->nz + bb->nz) * bs2; /* just a conservative estimate */
648     PetscCall(PetscMalloc2(nz, &row, nz, &col));
649     PetscCall(PetscMalloc1(nz, &val));
650     /* can not decide the exact mumps->nnz now because of the SBAIJ */
651     mumps->irn = row;
652     mumps->jcn = col;
653     mumps->val = mumps->val_alloc = val;
654   } else {
655     val = mumps->val;
656   }
657 
658   jj   = 0;
659   irow = rstart;
660   for (i = 0; i < mbs; i++) {
661     ajj    = aj + ai[i]; /* ptr to the beginning of this row */
662     countA = ai[i + 1] - ai[i];
663     countB = bi[i + 1] - bi[i];
664     bjj    = bj + bi[i];
665     v1     = av + ai[i] * bs2;
666     v2     = bv + bi[i] * bs2;
667 
668     if (bs > 1) {
669       /* A-part */
670       for (j = 0; j < countA; j++) {
671         for (k = 0; k < bs; k++) {
672           for (m = 0; m < bs; m++) {
673             if (rstart + ajj[j] * bs > irow || k >= m) {
674               if (reuse == MAT_INITIAL_MATRIX) {
675                 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj]));
676                 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] * bs + k + shift, &col[jj]));
677               }
678               val[jj++] = v1[j * bs2 + m + k * bs];
679             }
680           }
681         }
682       }
683 
684       /* B-part */
685       for (j = 0; j < countB; j++) {
686         for (k = 0; k < bs; k++) {
687           for (m = 0; m < bs; m++) {
688             if (reuse == MAT_INITIAL_MATRIX) {
689               PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj]));
690               PetscCall(PetscMUMPSIntCast(garray[bjj[j]] * bs + k + shift, &col[jj]));
691             }
692             val[jj++] = v2[j * bs2 + m + k * bs];
693           }
694         }
695       }
696     } else {
697       /* A-part */
698       for (j = 0; j < countA; j++) {
699         if (reuse == MAT_INITIAL_MATRIX) {
700           PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
701           PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj]));
702         }
703         val[jj++] = v1[j];
704       }
705 
706       /* B-part */
707       for (j = 0; j < countB; j++) {
708         if (reuse == MAT_INITIAL_MATRIX) {
709           PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
710           PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]));
711         }
712         val[jj++] = v2[j];
713       }
714     }
715     irow += bs;
716   }
717   mumps->nnz = jj;
718   PetscFunctionReturn(0);
719 }
720 
721 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) {
722   const PetscInt    *ai, *aj, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj;
723   PetscInt64         rstart, nz, i, j, jj, irow, countA, countB;
724   PetscMUMPSInt     *row, *col;
725   const PetscScalar *av, *bv, *v1, *v2;
726   PetscScalar       *val;
727   Mat                Ad, Ao;
728   Mat_SeqAIJ        *aa;
729   Mat_SeqAIJ        *bb;
730 
731   PetscFunctionBegin;
732   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray));
733   PetscCall(MatSeqAIJGetArrayRead(Ad, &av));
734   PetscCall(MatSeqAIJGetArrayRead(Ao, &bv));
735 
736   aa = (Mat_SeqAIJ *)(Ad)->data;
737   bb = (Mat_SeqAIJ *)(Ao)->data;
738   ai = aa->i;
739   aj = aa->j;
740   bi = bb->i;
741   bj = bb->j;
742 
743   rstart = A->rmap->rstart;
744 
745   if (reuse == MAT_INITIAL_MATRIX) {
746     nz = (PetscInt64)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */
747     PetscCall(PetscMalloc2(nz, &row, nz, &col));
748     PetscCall(PetscMalloc1(nz, &val));
749     mumps->nnz = nz;
750     mumps->irn = row;
751     mumps->jcn = col;
752     mumps->val = mumps->val_alloc = val;
753   } else {
754     val = mumps->val;
755   }
756 
757   jj   = 0;
758   irow = rstart;
759   for (i = 0; i < m; i++) {
760     ajj    = aj + ai[i]; /* ptr to the beginning of this row */
761     countA = ai[i + 1] - ai[i];
762     countB = bi[i + 1] - bi[i];
763     bjj    = bj + bi[i];
764     v1     = av + ai[i];
765     v2     = bv + bi[i];
766 
767     /* A-part */
768     for (j = 0; j < countA; j++) {
769       if (reuse == MAT_INITIAL_MATRIX) {
770         PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
771         PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj]));
772       }
773       val[jj++] = v1[j];
774     }
775 
776     /* B-part */
777     for (j = 0; j < countB; j++) {
778       if (reuse == MAT_INITIAL_MATRIX) {
779         PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
780         PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]));
781       }
782       val[jj++] = v2[j];
783     }
784     irow++;
785   }
786   PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av));
787   PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv));
788   PetscFunctionReturn(0);
789 }
790 
791 PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) {
792   Mat_MPIBAIJ       *mat = (Mat_MPIBAIJ *)A->data;
793   Mat_SeqBAIJ       *aa  = (Mat_SeqBAIJ *)(mat->A)->data;
794   Mat_SeqBAIJ       *bb  = (Mat_SeqBAIJ *)(mat->B)->data;
795   const PetscInt    *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j, *ajj, *bjj;
796   const PetscInt    *garray = mat->garray, mbs = mat->mbs, rstart = A->rmap->rstart;
797   const PetscInt     bs2 = mat->bs2;
798   PetscInt           bs;
799   PetscInt64         nz, i, j, k, n, jj, irow, countA, countB, idx;
800   PetscMUMPSInt     *row, *col;
801   const PetscScalar *av = aa->a, *bv = bb->a, *v1, *v2;
802   PetscScalar       *val;
803 
804   PetscFunctionBegin;
805   PetscCall(MatGetBlockSize(A, &bs));
806   if (reuse == MAT_INITIAL_MATRIX) {
807     nz = bs2 * (aa->nz + bb->nz);
808     PetscCall(PetscMalloc2(nz, &row, nz, &col));
809     PetscCall(PetscMalloc1(nz, &val));
810     mumps->nnz = nz;
811     mumps->irn = row;
812     mumps->jcn = col;
813     mumps->val = mumps->val_alloc = val;
814   } else {
815     val = mumps->val;
816   }
817 
818   jj   = 0;
819   irow = rstart;
820   for (i = 0; i < mbs; i++) {
821     countA = ai[i + 1] - ai[i];
822     countB = bi[i + 1] - bi[i];
823     ajj    = aj + ai[i];
824     bjj    = bj + bi[i];
825     v1     = av + bs2 * ai[i];
826     v2     = bv + bs2 * bi[i];
827 
828     idx = 0;
829     /* A-part */
830     for (k = 0; k < countA; k++) {
831       for (j = 0; j < bs; j++) {
832         for (n = 0; n < bs; n++) {
833           if (reuse == MAT_INITIAL_MATRIX) {
834             PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj]));
835             PetscCall(PetscMUMPSIntCast(rstart + bs * ajj[k] + j + shift, &col[jj]));
836           }
837           val[jj++] = v1[idx++];
838         }
839       }
840     }
841 
842     idx = 0;
843     /* B-part */
844     for (k = 0; k < countB; k++) {
845       for (j = 0; j < bs; j++) {
846         for (n = 0; n < bs; n++) {
847           if (reuse == MAT_INITIAL_MATRIX) {
848             PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj]));
849             PetscCall(PetscMUMPSIntCast(bs * garray[bjj[k]] + j + shift, &col[jj]));
850           }
851           val[jj++] = v2[idx++];
852         }
853       }
854     }
855     irow += bs;
856   }
857   PetscFunctionReturn(0);
858 }
859 
860 PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) {
861   const PetscInt    *ai, *aj, *adiag, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj;
862   PetscInt64         rstart, nz, nza, nzb, i, j, jj, irow, countA, countB;
863   PetscMUMPSInt     *row, *col;
864   const PetscScalar *av, *bv, *v1, *v2;
865   PetscScalar       *val;
866   Mat                Ad, Ao;
867   Mat_SeqAIJ        *aa;
868   Mat_SeqAIJ        *bb;
869 #if defined(PETSC_USE_COMPLEX)
870   PetscBool hermitian, isset;
871 #endif
872 
873   PetscFunctionBegin;
874 #if defined(PETSC_USE_COMPLEX)
875   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
876   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
877 #endif
878   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray));
879   PetscCall(MatSeqAIJGetArrayRead(Ad, &av));
880   PetscCall(MatSeqAIJGetArrayRead(Ao, &bv));
881 
882   aa    = (Mat_SeqAIJ *)(Ad)->data;
883   bb    = (Mat_SeqAIJ *)(Ao)->data;
884   ai    = aa->i;
885   aj    = aa->j;
886   adiag = aa->diag;
887   bi    = bb->i;
888   bj    = bb->j;
889 
890   rstart = A->rmap->rstart;
891 
892   if (reuse == MAT_INITIAL_MATRIX) {
893     nza = 0; /* num of upper triangular entries in mat->A, including diagonals */
894     nzb = 0; /* num of upper triangular entries in mat->B */
895     for (i = 0; i < m; i++) {
896       nza += (ai[i + 1] - adiag[i]);
897       countB = bi[i + 1] - bi[i];
898       bjj    = bj + bi[i];
899       for (j = 0; j < countB; j++) {
900         if (garray[bjj[j]] > rstart) nzb++;
901       }
902     }
903 
904     nz = nza + nzb; /* total nz of upper triangular part of mat */
905     PetscCall(PetscMalloc2(nz, &row, nz, &col));
906     PetscCall(PetscMalloc1(nz, &val));
907     mumps->nnz = nz;
908     mumps->irn = row;
909     mumps->jcn = col;
910     mumps->val = mumps->val_alloc = val;
911   } else {
912     val = mumps->val;
913   }
914 
915   jj   = 0;
916   irow = rstart;
917   for (i = 0; i < m; i++) {
918     ajj    = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */
919     v1     = av + adiag[i];
920     countA = ai[i + 1] - adiag[i];
921     countB = bi[i + 1] - bi[i];
922     bjj    = bj + bi[i];
923     v2     = bv + bi[i];
924 
925     /* A-part */
926     for (j = 0; j < countA; j++) {
927       if (reuse == MAT_INITIAL_MATRIX) {
928         PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
929         PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj]));
930       }
931       val[jj++] = v1[j];
932     }
933 
934     /* B-part */
935     for (j = 0; j < countB; j++) {
936       if (garray[bjj[j]] > rstart) {
937         if (reuse == MAT_INITIAL_MATRIX) {
938           PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
939           PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]));
940         }
941         val[jj++] = v2[j];
942       }
943     }
944     irow++;
945   }
946   PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av));
947   PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv));
948   PetscFunctionReturn(0);
949 }
950 
951 PetscErrorCode MatDestroy_MUMPS(Mat A) {
952   Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;
953 
954   PetscFunctionBegin;
955   PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc));
956   PetscCall(VecScatterDestroy(&mumps->scat_rhs));
957   PetscCall(VecScatterDestroy(&mumps->scat_sol));
958   PetscCall(VecDestroy(&mumps->b_seq));
959   PetscCall(VecDestroy(&mumps->x_seq));
960   PetscCall(PetscFree(mumps->id.perm_in));
961   PetscCall(PetscFree2(mumps->irn, mumps->jcn));
962   PetscCall(PetscFree(mumps->val_alloc));
963   PetscCall(PetscFree(mumps->info));
964   PetscCall(PetscFree(mumps->ICNTL_pre));
965   PetscCall(PetscFree(mumps->CNTL_pre));
966   PetscCall(MatMumpsResetSchur_Private(mumps));
967   if (mumps->id.job != JOB_NULL) { /* cannot call PetscMUMPS_c() if JOB_INIT has never been called for this instance */
968     mumps->id.job = JOB_END;
969     PetscMUMPS_c(mumps);
970     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in MatDestroy_MUMPS: INFOG(1)=%d", mumps->id.INFOG(1));
971     if (mumps->mumps_comm != MPI_COMM_NULL) {
972       if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) PetscCallMPI(MPI_Comm_free(&mumps->mumps_comm));
973       else PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A), &mumps->mumps_comm));
974     }
975   }
976 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
977   if (mumps->use_petsc_omp_support) {
978     PetscCall(PetscOmpCtrlDestroy(&mumps->omp_ctrl));
979     PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf));
980     PetscCall(PetscFree3(mumps->rhs_nrow, mumps->rhs_recvcounts, mumps->rhs_disps));
981   }
982 #endif
983   PetscCall(PetscFree(mumps->ia_alloc));
984   PetscCall(PetscFree(mumps->ja_alloc));
985   PetscCall(PetscFree(mumps->recvcount));
986   PetscCall(PetscFree(mumps->reqs));
987   PetscCall(PetscFree(mumps->irhs_loc));
988   PetscCall(PetscFree(A->data));
989 
990   /* clear composed functions */
991   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
992   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorSetSchurIS_C", NULL));
993   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorCreateSchurComplement_C", NULL));
994   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetIcntl_C", NULL));
995   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetIcntl_C", NULL));
996   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetCntl_C", NULL));
997   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetCntl_C", NULL));
998   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfo_C", NULL));
999   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfog_C", NULL));
1000   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfo_C", NULL));
1001   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfog_C", NULL));
1002   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverse_C", NULL));
1003   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverseTranspose_C", NULL));
1004   PetscFunctionReturn(0);
1005 }
1006 
1007 /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */
1008 static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A, PetscInt nrhs, const PetscScalar *array) {
1009   Mat_MUMPS        *mumps   = (Mat_MUMPS *)A->data;
1010   const PetscMPIInt ompsize = mumps->omp_comm_size;
1011   PetscInt          i, m, M, rstart;
1012 
1013   PetscFunctionBegin;
1014   PetscCall(MatGetSize(A, &M, NULL));
1015   PetscCall(MatGetLocalSize(A, &m, NULL));
1016   PetscCheck(M <= PETSC_MUMPS_INT_MAX, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt");
1017   if (ompsize == 1) {
1018     if (!mumps->irhs_loc) {
1019       mumps->nloc_rhs = m;
1020       PetscCall(PetscMalloc1(m, &mumps->irhs_loc));
1021       PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
1022       for (i = 0; i < m; i++) mumps->irhs_loc[i] = rstart + i + 1; /* use 1-based indices */
1023     }
1024     mumps->id.rhs_loc = (MumpsScalar *)array;
1025   } else {
1026 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1027     const PetscInt *ranges;
1028     PetscMPIInt     j, k, sendcount, *petsc_ranks, *omp_ranks;
1029     MPI_Group       petsc_group, omp_group;
1030     PetscScalar    *recvbuf = NULL;
1031 
1032     if (mumps->is_omp_master) {
1033       /* Lazily initialize the omp stuff for distributed rhs */
1034       if (!mumps->irhs_loc) {
1035         PetscCall(PetscMalloc2(ompsize, &omp_ranks, ompsize, &petsc_ranks));
1036         PetscCall(PetscMalloc3(ompsize, &mumps->rhs_nrow, ompsize, &mumps->rhs_recvcounts, ompsize, &mumps->rhs_disps));
1037         PetscCallMPI(MPI_Comm_group(mumps->petsc_comm, &petsc_group));
1038         PetscCallMPI(MPI_Comm_group(mumps->omp_comm, &omp_group));
1039         for (j = 0; j < ompsize; j++) omp_ranks[j] = j;
1040         PetscCallMPI(MPI_Group_translate_ranks(omp_group, ompsize, omp_ranks, petsc_group, petsc_ranks));
1041 
1042         /* Populate mumps->irhs_loc[], rhs_nrow[] */
1043         mumps->nloc_rhs = 0;
1044         PetscCall(MatGetOwnershipRanges(A, &ranges));
1045         for (j = 0; j < ompsize; j++) {
1046           mumps->rhs_nrow[j] = ranges[petsc_ranks[j] + 1] - ranges[petsc_ranks[j]];
1047           mumps->nloc_rhs += mumps->rhs_nrow[j];
1048         }
1049         PetscCall(PetscMalloc1(mumps->nloc_rhs, &mumps->irhs_loc));
1050         for (j = k = 0; j < ompsize; j++) {
1051           for (i = ranges[petsc_ranks[j]]; i < ranges[petsc_ranks[j] + 1]; i++, k++) mumps->irhs_loc[k] = i + 1; /* uses 1-based indices */
1052         }
1053 
1054         PetscCall(PetscFree2(omp_ranks, petsc_ranks));
1055         PetscCallMPI(MPI_Group_free(&petsc_group));
1056         PetscCallMPI(MPI_Group_free(&omp_group));
1057       }
1058 
1059       /* Realloc buffers when current nrhs is bigger than what we have met */
1060       if (nrhs > mumps->max_nrhs) {
1061         PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf));
1062         PetscCall(PetscMalloc2(mumps->nloc_rhs * nrhs, &mumps->rhs_loc, mumps->nloc_rhs * nrhs, &mumps->rhs_recvbuf));
1063         mumps->max_nrhs = nrhs;
1064       }
1065 
1066       /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */
1067       for (j = 0; j < ompsize; j++) PetscCall(PetscMPIIntCast(mumps->rhs_nrow[j] * nrhs, &mumps->rhs_recvcounts[j]));
1068       mumps->rhs_disps[0] = 0;
1069       for (j = 1; j < ompsize; j++) {
1070         mumps->rhs_disps[j] = mumps->rhs_disps[j - 1] + mumps->rhs_recvcounts[j - 1];
1071         PetscCheck(mumps->rhs_disps[j] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscMPIInt overflow!");
1072       }
1073       recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */
1074     }
1075 
1076     PetscCall(PetscMPIIntCast(m * nrhs, &sendcount));
1077     PetscCallMPI(MPI_Gatherv(array, sendcount, MPIU_SCALAR, recvbuf, mumps->rhs_recvcounts, mumps->rhs_disps, MPIU_SCALAR, 0, mumps->omp_comm));
1078 
1079     if (mumps->is_omp_master) {
1080       if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */
1081         PetscScalar *dst, *dstbase = mumps->rhs_loc;
1082         for (j = 0; j < ompsize; j++) {
1083           const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j];
1084           dst                    = dstbase;
1085           for (i = 0; i < nrhs; i++) {
1086             PetscCall(PetscArraycpy(dst, src, mumps->rhs_nrow[j]));
1087             src += mumps->rhs_nrow[j];
1088             dst += mumps->nloc_rhs;
1089           }
1090           dstbase += mumps->rhs_nrow[j];
1091         }
1092       }
1093       mumps->id.rhs_loc = (MumpsScalar *)mumps->rhs_loc;
1094     }
1095 #endif /* PETSC_HAVE_OPENMP_SUPPORT */
1096   }
1097   mumps->id.nrhs     = nrhs;
1098   mumps->id.nloc_rhs = mumps->nloc_rhs;
1099   mumps->id.lrhs_loc = mumps->nloc_rhs;
1100   mumps->id.irhs_loc = mumps->irhs_loc;
1101   PetscFunctionReturn(0);
1102 }
1103 
1104 PetscErrorCode MatSolve_MUMPS(Mat A, Vec b, Vec x) {
1105   Mat_MUMPS         *mumps  = (Mat_MUMPS *)A->data;
1106   const PetscScalar *rarray = NULL;
1107   PetscScalar       *array;
1108   IS                 is_iden, is_petsc;
1109   PetscInt           i;
1110   PetscBool          second_solve = PETSC_FALSE;
1111   static PetscBool   cite1 = PETSC_FALSE, cite2 = PETSC_FALSE;
1112 
1113   PetscFunctionBegin;
1114   PetscCall(PetscCitationsRegister("@article{MUMPS01,\n  author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n  title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n  journal = {SIAM "
1115                                    "Journal on Matrix Analysis and Applications},\n  volume = {23},\n  number = {1},\n  pages = {15--41},\n  year = {2001}\n}\n",
1116                                    &cite1));
1117   PetscCall(PetscCitationsRegister("@article{MUMPS02,\n  author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n  title = {Hybrid scheduling for the parallel solution of linear systems},\n  journal = {Parallel "
1118                                    "Computing},\n  volume = {32},\n  number = {2},\n  pages = {136--156},\n  year = {2006}\n}\n",
1119                                    &cite2));
1120 
1121   if (A->factorerrortype) {
1122     PetscCall(PetscInfo(A, "MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1123     PetscCall(VecSetInf(x));
1124     PetscFunctionReturn(0);
1125   }
1126 
1127   mumps->id.nrhs = 1;
1128   if (mumps->petsc_size > 1) {
1129     if (mumps->ICNTL20 == 10) {
1130       mumps->id.ICNTL(20) = 10; /* dense distributed RHS */
1131       PetscCall(VecGetArrayRead(b, &rarray));
1132       PetscCall(MatMumpsSetUpDistRHSInfo(A, 1, rarray));
1133     } else {
1134       mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential rhs vector*/
1135       PetscCall(VecScatterBegin(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD));
1136       PetscCall(VecScatterEnd(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD));
1137       if (!mumps->myid) {
1138         PetscCall(VecGetArray(mumps->b_seq, &array));
1139         mumps->id.rhs = (MumpsScalar *)array;
1140       }
1141     }
1142   } else {                   /* petsc_size == 1 */
1143     mumps->id.ICNTL(20) = 0; /* dense centralized RHS */
1144     PetscCall(VecCopy(b, x));
1145     PetscCall(VecGetArray(x, &array));
1146     mumps->id.rhs = (MumpsScalar *)array;
1147   }
1148 
1149   /*
1150      handle condensation step of Schur complement (if any)
1151      We set by default ICNTL(26) == -1 when Schur indices have been provided by the user.
1152      According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase
1153      Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system.
1154      This requires an extra call to PetscMUMPS_c and the computation of the factors for S
1155   */
1156   if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) {
1157     PetscCheck(mumps->petsc_size <= 1, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc");
1158     second_solve = PETSC_TRUE;
1159     PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE));
1160   }
1161   /* solve phase */
1162   /*-------------*/
1163   mumps->id.job = JOB_SOLVE;
1164   PetscMUMPS_c(mumps);
1165   PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1));
1166 
1167   /* handle expansion step of Schur complement (if any) */
1168   if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE));
1169 
1170   if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */
1171     if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) {
1172       /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */
1173       PetscCall(VecScatterDestroy(&mumps->scat_sol));
1174     }
1175     if (!mumps->scat_sol) { /* create scatter scat_sol */
1176       PetscInt *isol2_loc = NULL;
1177       PetscCall(ISCreateStride(PETSC_COMM_SELF, mumps->id.lsol_loc, 0, 1, &is_iden)); /* from */
1178       PetscCall(PetscMalloc1(mumps->id.lsol_loc, &isol2_loc));
1179       for (i = 0; i < mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i] - 1;                        /* change Fortran style to C style */
1180       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, mumps->id.lsol_loc, isol2_loc, PETSC_OWN_POINTER, &is_petsc)); /* to */
1181       PetscCall(VecScatterCreate(mumps->x_seq, is_iden, x, is_petsc, &mumps->scat_sol));
1182       PetscCall(ISDestroy(&is_iden));
1183       PetscCall(ISDestroy(&is_petsc));
1184       mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */
1185     }
1186 
1187     PetscCall(VecScatterBegin(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD));
1188     PetscCall(VecScatterEnd(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD));
1189   }
1190 
1191   if (mumps->petsc_size > 1) {
1192     if (mumps->ICNTL20 == 10) {
1193       PetscCall(VecRestoreArrayRead(b, &rarray));
1194     } else if (!mumps->myid) {
1195       PetscCall(VecRestoreArray(mumps->b_seq, &array));
1196     }
1197   } else PetscCall(VecRestoreArray(x, &array));
1198 
1199   PetscCall(PetscLogFlops(2.0 * mumps->id.RINFO(3)));
1200   PetscFunctionReturn(0);
1201 }
1202 
1203 PetscErrorCode MatSolveTranspose_MUMPS(Mat A, Vec b, Vec x) {
1204   Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;
1205 
1206   PetscFunctionBegin;
1207   mumps->id.ICNTL(9) = 0;
1208   PetscCall(MatSolve_MUMPS(A, b, x));
1209   mumps->id.ICNTL(9) = 1;
1210   PetscFunctionReturn(0);
1211 }
1212 
1213 PetscErrorCode MatMatSolve_MUMPS(Mat A, Mat B, Mat X) {
1214   Mat                Bt = NULL;
1215   PetscBool          denseX, denseB, flg, flgT;
1216   Mat_MUMPS         *mumps = (Mat_MUMPS *)A->data;
1217   PetscInt           i, nrhs, M;
1218   PetscScalar       *array;
1219   const PetscScalar *rbray;
1220   PetscInt           lsol_loc, nlsol_loc, *idxx, iidx = 0;
1221   PetscMUMPSInt     *isol_loc, *isol_loc_save;
1222   PetscScalar       *bray, *sol_loc, *sol_loc_save;
1223   IS                 is_to, is_from;
1224   PetscInt           k, proc, j, m, myrstart;
1225   const PetscInt    *rstart;
1226   Vec                v_mpi, msol_loc;
1227   VecScatter         scat_sol;
1228   Vec                b_seq;
1229   VecScatter         scat_rhs;
1230   PetscScalar       *aa;
1231   PetscInt           spnr, *ia, *ja;
1232   Mat_MPIAIJ        *b = NULL;
1233 
1234   PetscFunctionBegin;
1235   PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &denseX, MATSEQDENSE, MATMPIDENSE, NULL));
1236   PetscCheck(denseX, PetscObjectComm((PetscObject)X), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix");
1237 
1238   PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &denseB, MATSEQDENSE, MATMPIDENSE, NULL));
1239   if (denseB) {
1240     PetscCheck(B->rmap->n == X->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Matrix B and X must have same row distribution");
1241     mumps->id.ICNTL(20) = 0; /* dense RHS */
1242   } else {                   /* sparse B */
1243     PetscCheck(X != B, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_IDN, "X and B must be different matrices");
1244     PetscCall(PetscObjectTypeCompare((PetscObject)B, MATTRANSPOSEVIRTUAL, &flgT));
1245     if (flgT) { /* input B is transpose of actural RHS matrix,
1246                  because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */
1247       PetscCall(MatTransposeGetMat(B, &Bt));
1248     } else SETERRQ(PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_WRONG, "Matrix B must be MATTRANSPOSEVIRTUAL matrix");
1249     mumps->id.ICNTL(20) = 1; /* sparse RHS */
1250   }
1251 
1252   PetscCall(MatGetSize(B, &M, &nrhs));
1253   mumps->id.nrhs = nrhs;
1254   mumps->id.lrhs = M;
1255   mumps->id.rhs  = NULL;
1256 
1257   if (mumps->petsc_size == 1) {
1258     PetscScalar *aa;
1259     PetscInt     spnr, *ia, *ja;
1260     PetscBool    second_solve = PETSC_FALSE;
1261 
1262     PetscCall(MatDenseGetArray(X, &array));
1263     mumps->id.rhs = (MumpsScalar *)array;
1264 
1265     if (denseB) {
1266       /* copy B to X */
1267       PetscCall(MatDenseGetArrayRead(B, &rbray));
1268       PetscCall(PetscArraycpy(array, rbray, M * nrhs));
1269       PetscCall(MatDenseRestoreArrayRead(B, &rbray));
1270     } else { /* sparse B */
1271       PetscCall(MatSeqAIJGetArray(Bt, &aa));
1272       PetscCall(MatGetRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
1273       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
1274       PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs));
1275       mumps->id.rhs_sparse = (MumpsScalar *)aa;
1276     }
1277     /* handle condensation step of Schur complement (if any) */
1278     if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) {
1279       second_solve = PETSC_TRUE;
1280       PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE));
1281     }
1282     /* solve phase */
1283     /*-------------*/
1284     mumps->id.job = JOB_SOLVE;
1285     PetscMUMPS_c(mumps);
1286     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1));
1287 
1288     /* handle expansion step of Schur complement (if any) */
1289     if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE));
1290     if (!denseB) { /* sparse B */
1291       PetscCall(MatSeqAIJRestoreArray(Bt, &aa));
1292       PetscCall(MatRestoreRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
1293       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure");
1294     }
1295     PetscCall(MatDenseRestoreArray(X, &array));
1296     PetscFunctionReturn(0);
1297   }
1298 
1299   /*--------- parallel case: MUMPS requires rhs B to be centralized on the host! --------*/
1300   PetscCheck(mumps->petsc_size <= 1 || !mumps->id.ICNTL(19), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc");
1301 
1302   /* create msol_loc to hold mumps local solution */
1303   isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */
1304   sol_loc_save  = (PetscScalar *)mumps->id.sol_loc;
1305 
1306   lsol_loc  = mumps->id.lsol_loc;
1307   nlsol_loc = nrhs * lsol_loc; /* length of sol_loc */
1308   PetscCall(PetscMalloc2(nlsol_loc, &sol_loc, lsol_loc, &isol_loc));
1309   mumps->id.sol_loc  = (MumpsScalar *)sol_loc;
1310   mumps->id.isol_loc = isol_loc;
1311 
1312   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nlsol_loc, (PetscScalar *)sol_loc, &msol_loc));
1313 
1314   if (denseB) {
1315     if (mumps->ICNTL20 == 10) {
1316       mumps->id.ICNTL(20) = 10; /* dense distributed RHS */
1317       PetscCall(MatDenseGetArrayRead(B, &rbray));
1318       PetscCall(MatMumpsSetUpDistRHSInfo(A, nrhs, rbray));
1319       PetscCall(MatDenseRestoreArrayRead(B, &rbray));
1320       PetscCall(MatGetLocalSize(B, &m, NULL));
1321       PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhs * M, NULL, &v_mpi));
1322     } else {
1323       mumps->id.ICNTL(20) = 0; /* dense centralized RHS */
1324       /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in
1325         very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank
1326         0, re-arrange B into desired order, which is a local operation.
1327       */
1328 
1329       /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */
1330       /* wrap dense rhs matrix B into a vector v_mpi */
1331       PetscCall(MatGetLocalSize(B, &m, NULL));
1332       PetscCall(MatDenseGetArray(B, &bray));
1333       PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhs * M, (const PetscScalar *)bray, &v_mpi));
1334       PetscCall(MatDenseRestoreArray(B, &bray));
1335 
1336       /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */
1337       if (!mumps->myid) {
1338         PetscInt *idx;
1339         /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */
1340         PetscCall(PetscMalloc1(nrhs * M, &idx));
1341         PetscCall(MatGetOwnershipRanges(B, &rstart));
1342         k = 0;
1343         for (proc = 0; proc < mumps->petsc_size; proc++) {
1344           for (j = 0; j < nrhs; j++) {
1345             for (i = rstart[proc]; i < rstart[proc + 1]; i++) idx[k++] = j * M + i;
1346           }
1347         }
1348 
1349         PetscCall(VecCreateSeq(PETSC_COMM_SELF, nrhs * M, &b_seq));
1350         PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nrhs * M, idx, PETSC_OWN_POINTER, &is_to));
1351         PetscCall(ISCreateStride(PETSC_COMM_SELF, nrhs * M, 0, 1, &is_from));
1352       } else {
1353         PetscCall(VecCreateSeq(PETSC_COMM_SELF, 0, &b_seq));
1354         PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_to));
1355         PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_from));
1356       }
1357       PetscCall(VecScatterCreate(v_mpi, is_from, b_seq, is_to, &scat_rhs));
1358       PetscCall(VecScatterBegin(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD));
1359       PetscCall(ISDestroy(&is_to));
1360       PetscCall(ISDestroy(&is_from));
1361       PetscCall(VecScatterEnd(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD));
1362 
1363       if (!mumps->myid) { /* define rhs on the host */
1364         PetscCall(VecGetArray(b_seq, &bray));
1365         mumps->id.rhs = (MumpsScalar *)bray;
1366         PetscCall(VecRestoreArray(b_seq, &bray));
1367       }
1368     }
1369   } else { /* sparse B */
1370     b = (Mat_MPIAIJ *)Bt->data;
1371 
1372     /* wrap dense X into a vector v_mpi */
1373     PetscCall(MatGetLocalSize(X, &m, NULL));
1374     PetscCall(MatDenseGetArray(X, &bray));
1375     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)X), 1, nrhs * m, nrhs * M, (const PetscScalar *)bray, &v_mpi));
1376     PetscCall(MatDenseRestoreArray(X, &bray));
1377 
1378     if (!mumps->myid) {
1379       PetscCall(MatSeqAIJGetArray(b->A, &aa));
1380       PetscCall(MatGetRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
1381       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
1382       PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs));
1383       mumps->id.rhs_sparse = (MumpsScalar *)aa;
1384     } else {
1385       mumps->id.irhs_ptr    = NULL;
1386       mumps->id.irhs_sparse = NULL;
1387       mumps->id.nz_rhs      = 0;
1388       mumps->id.rhs_sparse  = NULL;
1389     }
1390   }
1391 
1392   /* solve phase */
1393   /*-------------*/
1394   mumps->id.job = JOB_SOLVE;
1395   PetscMUMPS_c(mumps);
1396   PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1));
1397 
1398   /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */
1399   PetscCall(MatDenseGetArray(X, &array));
1400   PetscCall(VecPlaceArray(v_mpi, array));
1401 
1402   /* create scatter scat_sol */
1403   PetscCall(MatGetOwnershipRanges(X, &rstart));
1404   /* iidx: index for scatter mumps solution to petsc X */
1405 
1406   PetscCall(ISCreateStride(PETSC_COMM_SELF, nlsol_loc, 0, 1, &is_from));
1407   PetscCall(PetscMalloc1(nlsol_loc, &idxx));
1408   for (i = 0; i < lsol_loc; i++) {
1409     isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */
1410 
1411     for (proc = 0; proc < mumps->petsc_size; proc++) {
1412       if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc + 1]) {
1413         myrstart = rstart[proc];
1414         k        = isol_loc[i] - myrstart;          /* local index on 1st column of petsc vector X */
1415         iidx     = k + myrstart * nrhs;             /* maps mumps isol_loc[i] to petsc index in X */
1416         m        = rstart[proc + 1] - rstart[proc]; /* rows of X for this proc */
1417         break;
1418       }
1419     }
1420 
1421     for (j = 0; j < nrhs; j++) idxx[i + j * lsol_loc] = iidx + j * m;
1422   }
1423   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nlsol_loc, idxx, PETSC_COPY_VALUES, &is_to));
1424   PetscCall(VecScatterCreate(msol_loc, is_from, v_mpi, is_to, &scat_sol));
1425   PetscCall(VecScatterBegin(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD));
1426   PetscCall(ISDestroy(&is_from));
1427   PetscCall(ISDestroy(&is_to));
1428   PetscCall(VecScatterEnd(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD));
1429   PetscCall(MatDenseRestoreArray(X, &array));
1430 
1431   /* free spaces */
1432   mumps->id.sol_loc  = (MumpsScalar *)sol_loc_save;
1433   mumps->id.isol_loc = isol_loc_save;
1434 
1435   PetscCall(PetscFree2(sol_loc, isol_loc));
1436   PetscCall(PetscFree(idxx));
1437   PetscCall(VecDestroy(&msol_loc));
1438   PetscCall(VecDestroy(&v_mpi));
1439   if (!denseB) {
1440     if (!mumps->myid) {
1441       b = (Mat_MPIAIJ *)Bt->data;
1442       PetscCall(MatSeqAIJRestoreArray(b->A, &aa));
1443       PetscCall(MatRestoreRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
1444       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure");
1445     }
1446   } else {
1447     if (mumps->ICNTL20 == 0) {
1448       PetscCall(VecDestroy(&b_seq));
1449       PetscCall(VecScatterDestroy(&scat_rhs));
1450     }
1451   }
1452   PetscCall(VecScatterDestroy(&scat_sol));
1453   PetscCall(PetscLogFlops(2.0 * nrhs * mumps->id.RINFO(3)));
1454   PetscFunctionReturn(0);
1455 }
1456 
1457 PetscErrorCode MatMatSolveTranspose_MUMPS(Mat A, Mat B, Mat X) {
1458   Mat_MUMPS    *mumps    = (Mat_MUMPS *)A->data;
1459   PetscMUMPSInt oldvalue = mumps->id.ICNTL(9);
1460 
1461   PetscFunctionBegin;
1462   mumps->id.ICNTL(9) = 0;
1463   PetscCall(MatMatSolve_MUMPS(A, B, X));
1464   mumps->id.ICNTL(9) = oldvalue;
1465   PetscFunctionReturn(0);
1466 }
1467 
1468 PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A, Mat Bt, Mat X) {
1469   PetscBool flg;
1470   Mat       B;
1471 
1472   PetscFunctionBegin;
1473   PetscCall(PetscObjectTypeCompareAny((PetscObject)Bt, &flg, MATSEQAIJ, MATMPIAIJ, NULL));
1474   PetscCheck(flg, PetscObjectComm((PetscObject)Bt), PETSC_ERR_ARG_WRONG, "Matrix Bt must be MATAIJ matrix");
1475 
1476   /* Create B=Bt^T that uses Bt's data structure */
1477   PetscCall(MatCreateTranspose(Bt, &B));
1478 
1479   PetscCall(MatMatSolve_MUMPS(A, B, X));
1480   PetscCall(MatDestroy(&B));
1481   PetscFunctionReturn(0);
1482 }
1483 
1484 #if !defined(PETSC_USE_COMPLEX)
1485 /*
1486   input:
1487    F:        numeric factor
1488   output:
1489    nneg:     total number of negative pivots
1490    nzero:    total number of zero pivots
1491    npos:     (global dimension of F) - nneg - nzero
1492 */
1493 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos) {
1494   Mat_MUMPS  *mumps = (Mat_MUMPS *)F->data;
1495   PetscMPIInt size;
1496 
1497   PetscFunctionBegin;
1498   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)F), &size));
1499   /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */
1500   PetscCheck(size <= 1 || mumps->id.ICNTL(13) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia", mumps->id.INFOG(13));
1501 
1502   if (nneg) *nneg = mumps->id.INFOG(12);
1503   if (nzero || npos) {
1504     PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection");
1505     if (nzero) *nzero = mumps->id.INFOG(28);
1506     if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28));
1507   }
1508   PetscFunctionReturn(0);
1509 }
1510 #endif
1511 
1512 PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse, Mat_MUMPS *mumps) {
1513   PetscInt       i, nreqs;
1514   PetscMUMPSInt *irn, *jcn;
1515   PetscMPIInt    count;
1516   PetscInt64     totnnz, remain;
1517   const PetscInt osize = mumps->omp_comm_size;
1518   PetscScalar   *val;
1519 
1520   PetscFunctionBegin;
1521   if (osize > 1) {
1522     if (reuse == MAT_INITIAL_MATRIX) {
1523       /* master first gathers counts of nonzeros to receive */
1524       if (mumps->is_omp_master) PetscCall(PetscMalloc1(osize, &mumps->recvcount));
1525       PetscCallMPI(MPI_Gather(&mumps->nnz, 1, MPIU_INT64, mumps->recvcount, 1, MPIU_INT64, 0 /*master*/, mumps->omp_comm));
1526 
1527       /* Then each computes number of send/recvs */
1528       if (mumps->is_omp_master) {
1529         /* Start from 1 since self communication is not done in MPI */
1530         nreqs = 0;
1531         for (i = 1; i < osize; i++) nreqs += (mumps->recvcount[i] + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX;
1532       } else {
1533         nreqs = (mumps->nnz + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX;
1534       }
1535       PetscCall(PetscMalloc1(nreqs * 3, &mumps->reqs)); /* Triple the requests since we send irn, jcn and val seperately */
1536 
1537       /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others.
1538          MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz
1539          might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size
1540          is very small, the current approach should have no extra overhead compared to MPI_Gatherv.
1541        */
1542       nreqs = 0; /* counter for actual send/recvs */
1543       if (mumps->is_omp_master) {
1544         for (i = 0, totnnz = 0; i < osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */
1545         PetscCall(PetscMalloc2(totnnz, &irn, totnnz, &jcn));
1546         PetscCall(PetscMalloc1(totnnz, &val));
1547 
1548         /* Self communication */
1549         PetscCall(PetscArraycpy(irn, mumps->irn, mumps->nnz));
1550         PetscCall(PetscArraycpy(jcn, mumps->jcn, mumps->nnz));
1551         PetscCall(PetscArraycpy(val, mumps->val, mumps->nnz));
1552 
1553         /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */
1554         PetscCall(PetscFree2(mumps->irn, mumps->jcn));
1555         PetscCall(PetscFree(mumps->val_alloc));
1556         mumps->nnz = totnnz;
1557         mumps->irn = irn;
1558         mumps->jcn = jcn;
1559         mumps->val = mumps->val_alloc = val;
1560 
1561         irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */
1562         jcn += mumps->recvcount[0];
1563         val += mumps->recvcount[0];
1564 
1565         /* Remote communication */
1566         for (i = 1; i < osize; i++) {
1567           count  = PetscMin(mumps->recvcount[i], PETSC_MPI_INT_MAX);
1568           remain = mumps->recvcount[i] - count;
1569           while (count > 0) {
1570             PetscCallMPI(MPI_Irecv(irn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1571             PetscCallMPI(MPI_Irecv(jcn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1572             PetscCallMPI(MPI_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1573             irn += count;
1574             jcn += count;
1575             val += count;
1576             count = PetscMin(remain, PETSC_MPI_INT_MAX);
1577             remain -= count;
1578           }
1579         }
1580       } else {
1581         irn    = mumps->irn;
1582         jcn    = mumps->jcn;
1583         val    = mumps->val;
1584         count  = PetscMin(mumps->nnz, PETSC_MPI_INT_MAX);
1585         remain = mumps->nnz - count;
1586         while (count > 0) {
1587           PetscCallMPI(MPI_Isend(irn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1588           PetscCallMPI(MPI_Isend(jcn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1589           PetscCallMPI(MPI_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1590           irn += count;
1591           jcn += count;
1592           val += count;
1593           count = PetscMin(remain, PETSC_MPI_INT_MAX);
1594           remain -= count;
1595         }
1596       }
1597     } else {
1598       nreqs = 0;
1599       if (mumps->is_omp_master) {
1600         val = mumps->val + mumps->recvcount[0];
1601         for (i = 1; i < osize; i++) { /* Remote communication only since self data is already in place */
1602           count  = PetscMin(mumps->recvcount[i], PETSC_MPI_INT_MAX);
1603           remain = mumps->recvcount[i] - count;
1604           while (count > 0) {
1605             PetscCallMPI(MPI_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1606             val += count;
1607             count = PetscMin(remain, PETSC_MPI_INT_MAX);
1608             remain -= count;
1609           }
1610         }
1611       } else {
1612         val    = mumps->val;
1613         count  = PetscMin(mumps->nnz, PETSC_MPI_INT_MAX);
1614         remain = mumps->nnz - count;
1615         while (count > 0) {
1616           PetscCallMPI(MPI_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1617           val += count;
1618           count = PetscMin(remain, PETSC_MPI_INT_MAX);
1619           remain -= count;
1620         }
1621       }
1622     }
1623     PetscCallMPI(MPI_Waitall(nreqs, mumps->reqs, MPI_STATUSES_IGNORE));
1624     mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */
1625   }
1626   PetscFunctionReturn(0);
1627 }
1628 
1629 PetscErrorCode MatFactorNumeric_MUMPS(Mat F, Mat A, const MatFactorInfo *info) {
1630   Mat_MUMPS *mumps = (Mat_MUMPS *)(F)->data;
1631   PetscBool  isMPIAIJ;
1632 
1633   PetscFunctionBegin;
1634   if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) {
1635     if (mumps->id.INFOG(1) == -6) PetscCall(PetscInfo(A, "MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1636     PetscCall(PetscInfo(A, "MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1637     PetscFunctionReturn(0);
1638   }
1639 
1640   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps));
1641   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX, mumps));
1642 
1643   /* numerical factorization phase */
1644   /*-------------------------------*/
1645   mumps->id.job = JOB_FACTNUMERIC;
1646   if (!mumps->id.ICNTL(18)) { /* A is centralized */
1647     if (!mumps->myid) mumps->id.a = (MumpsScalar *)mumps->val;
1648   } else {
1649     mumps->id.a_loc = (MumpsScalar *)mumps->val;
1650   }
1651   PetscMUMPS_c(mumps);
1652   if (mumps->id.INFOG(1) < 0) {
1653     PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d", mumps->id.INFOG(1), mumps->id.INFO(2));
1654     if (mumps->id.INFOG(1) == -10) { /* numerically singular matrix */
1655       PetscCall(PetscInfo(F, "matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1656       F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
1657     } else if (mumps->id.INFOG(1) == -13) {
1658       PetscCall(PetscInfo(F, "MUMPS in numerical factorization phase: INFOG(1)=%d, cannot allocate required memory %d megabytes\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1659       F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1660     } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) {
1661       PetscCall(PetscInfo(F, "MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d, problem with workarray\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1662       F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1663     } else {
1664       PetscCall(PetscInfo(F, "MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1665       F->factorerrortype = MAT_FACTOR_OTHER;
1666     }
1667   }
1668   PetscCheck(mumps->myid || mumps->id.ICNTL(16) <= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "  mumps->id.ICNTL(16):=%d", mumps->id.INFOG(16));
1669 
1670   F->assembled = PETSC_TRUE;
1671 
1672   if (F->schur) { /* reset Schur status to unfactored */
1673 #if defined(PETSC_HAVE_CUDA)
1674     F->schur->offloadmask = PETSC_OFFLOAD_CPU;
1675 #endif
1676     if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
1677       mumps->id.ICNTL(19) = 2;
1678       PetscCall(MatTranspose(F->schur, MAT_INPLACE_MATRIX, &F->schur));
1679     }
1680     PetscCall(MatFactorRestoreSchurComplement(F, NULL, MAT_FACTOR_SCHUR_UNFACTORED));
1681   }
1682 
1683   /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */
1684   if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3;
1685 
1686   if (!mumps->is_omp_master) mumps->id.INFO(23) = 0;
1687   if (mumps->petsc_size > 1) {
1688     PetscInt     lsol_loc;
1689     PetscScalar *sol_loc;
1690 
1691     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &isMPIAIJ));
1692 
1693     /* distributed solution; Create x_seq=sol_loc for repeated use */
1694     if (mumps->x_seq) {
1695       PetscCall(VecScatterDestroy(&mumps->scat_sol));
1696       PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc));
1697       PetscCall(VecDestroy(&mumps->x_seq));
1698     }
1699     lsol_loc = mumps->id.INFO(23); /* length of sol_loc */
1700     PetscCall(PetscMalloc2(lsol_loc, &sol_loc, lsol_loc, &mumps->id.isol_loc));
1701     mumps->id.lsol_loc = lsol_loc;
1702     mumps->id.sol_loc  = (MumpsScalar *)sol_loc;
1703     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, lsol_loc, sol_loc, &mumps->x_seq));
1704   }
1705   PetscCall(PetscLogFlops(mumps->id.RINFO(2)));
1706   PetscFunctionReturn(0);
1707 }
1708 
1709 /* Sets MUMPS options from the options database */
1710 PetscErrorCode MatSetFromOptions_MUMPS(Mat F, Mat A) {
1711   Mat_MUMPS    *mumps = (Mat_MUMPS *)F->data;
1712   PetscMUMPSInt icntl = 0, size, *listvar_schur;
1713   PetscInt      info[80], i, ninfo = 80, rbs, cbs;
1714   PetscBool     flg = PETSC_FALSE, schur = (PetscBool)(mumps->id.ICNTL(26) == -1);
1715   MumpsScalar  *arr;
1716 
1717   PetscFunctionBegin;
1718   PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "MUMPS Options", "Mat");
1719   if (mumps->id.job == JOB_NULL) { /* MatSetFromOptions_MUMPS() has never been called before */
1720     PetscInt nthreads   = 0;
1721     PetscInt nCNTL_pre  = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0;
1722     PetscInt nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0;
1723 
1724     mumps->petsc_comm = PetscObjectComm((PetscObject)A);
1725     PetscCallMPI(MPI_Comm_size(mumps->petsc_comm, &mumps->petsc_size));
1726     PetscCallMPI(MPI_Comm_rank(mumps->petsc_comm, &mumps->myid)); /* "if (!myid)" still works even if mumps_comm is different */
1727 
1728     PetscCall(PetscOptionsName("-mat_mumps_use_omp_threads", "Convert MPI processes into OpenMP threads", "None", &mumps->use_petsc_omp_support));
1729     if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */
1730     /* do not use PetscOptionsInt() so that the option -mat_mumps_use_omp_threads is not displayed twice in the help */
1731     PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)F)->prefix, "-mat_mumps_use_omp_threads", &nthreads, NULL));
1732     if (mumps->use_petsc_omp_support) {
1733       PetscCheck(PetscDefined(HAVE_OPENMP_SUPPORT), PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "The system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual",
1734                  ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : "");
1735       PetscCheck(!schur, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use -%smat_mumps_use_omp_threads with the Schur complement feature", ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : "");
1736 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1737       PetscCall(PetscOmpCtrlCreate(mumps->petsc_comm, nthreads, &mumps->omp_ctrl));
1738       PetscCall(PetscOmpCtrlGetOmpComms(mumps->omp_ctrl, &mumps->omp_comm, &mumps->mumps_comm, &mumps->is_omp_master));
1739 #endif
1740     } else {
1741       mumps->omp_comm      = PETSC_COMM_SELF;
1742       mumps->mumps_comm    = mumps->petsc_comm;
1743       mumps->is_omp_master = PETSC_TRUE;
1744     }
1745     PetscCallMPI(MPI_Comm_size(mumps->omp_comm, &mumps->omp_comm_size));
1746     mumps->reqs = NULL;
1747     mumps->tag  = 0;
1748 
1749     if (mumps->mumps_comm != MPI_COMM_NULL) {
1750       if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) {
1751         /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */
1752         MPI_Comm comm;
1753         PetscCallMPI(MPI_Comm_dup(mumps->mumps_comm, &comm));
1754         mumps->mumps_comm = comm;
1755       } else PetscCall(PetscCommGetComm(mumps->petsc_comm, &mumps->mumps_comm));
1756     }
1757 
1758     mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm);
1759     mumps->id.job          = JOB_INIT;
1760     mumps->id.par          = 1; /* host participates factorizaton and solve */
1761     mumps->id.sym          = mumps->sym;
1762 
1763     size          = mumps->id.size_schur;
1764     arr           = mumps->id.schur;
1765     listvar_schur = mumps->id.listvar_schur;
1766     PetscMUMPS_c(mumps);
1767     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS: INFOG(1)=%d", mumps->id.INFOG(1));
1768     /* restore cached ICNTL and CNTL values */
1769     for (icntl = 0; icntl < nICNTL_pre; ++icntl) mumps->id.ICNTL(mumps->ICNTL_pre[1 + 2 * icntl]) = mumps->ICNTL_pre[2 + 2 * icntl];
1770     for (icntl = 0; icntl < nCNTL_pre; ++icntl) mumps->id.CNTL((PetscInt)mumps->CNTL_pre[1 + 2 * icntl]) = mumps->CNTL_pre[2 + 2 * icntl];
1771     PetscCall(PetscFree(mumps->ICNTL_pre));
1772     PetscCall(PetscFree(mumps->CNTL_pre));
1773 
1774     if (schur) {
1775       mumps->id.size_schur    = size;
1776       mumps->id.schur_lld     = size;
1777       mumps->id.schur         = arr;
1778       mumps->id.listvar_schur = listvar_schur;
1779       if (mumps->petsc_size > 1) {
1780         PetscBool gs; /* gs is false if any rank other than root has non-empty IS */
1781 
1782         mumps->id.ICNTL(19) = 1;                                                                            /* MUMPS returns Schur centralized on the host */
1783         gs                  = mumps->myid ? (mumps->id.size_schur ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */
1784         PetscCallMPI(MPI_Allreduce(MPI_IN_PLACE, &gs, 1, MPIU_BOOL, MPI_LAND, mumps->petsc_comm));
1785         PetscCheck(gs, PETSC_COMM_SELF, PETSC_ERR_SUP, "MUMPS distributed parallel Schur complements not yet supported from PETSc");
1786       } else {
1787         if (F->factortype == MAT_FACTOR_LU) {
1788           mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */
1789         } else {
1790           mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */
1791         }
1792       }
1793       mumps->id.ICNTL(26) = -1;
1794     }
1795 
1796     /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code.
1797        For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS.
1798      */
1799     PetscCallMPI(MPI_Bcast(mumps->id.icntl, 40, MPI_INT, 0, mumps->omp_comm));
1800     PetscCallMPI(MPI_Bcast(mumps->id.cntl, 15, MPIU_REAL, 0, mumps->omp_comm));
1801 
1802     mumps->scat_rhs = NULL;
1803     mumps->scat_sol = NULL;
1804 
1805     /* set PETSc-MUMPS default options - override MUMPS default */
1806     mumps->id.ICNTL(3) = 0;
1807     mumps->id.ICNTL(4) = 0;
1808     if (mumps->petsc_size == 1) {
1809       mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */
1810       mumps->id.ICNTL(7)  = 7; /* automatic choice of ordering done by the package */
1811     } else {
1812       mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */
1813       mumps->id.ICNTL(21) = 1; /* distributed solution */
1814     }
1815   }
1816   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_1", "ICNTL(1): output stream for error messages", "None", mumps->id.ICNTL(1), &icntl, &flg));
1817   if (flg) mumps->id.ICNTL(1) = icntl;
1818   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_2", "ICNTL(2): output stream for diagnostic printing, statistics, and warning", "None", mumps->id.ICNTL(2), &icntl, &flg));
1819   if (flg) mumps->id.ICNTL(2) = icntl;
1820   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_3", "ICNTL(3): output stream for global information, collected on the host", "None", mumps->id.ICNTL(3), &icntl, &flg));
1821   if (flg) mumps->id.ICNTL(3) = icntl;
1822 
1823   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_4", "ICNTL(4): level of printing (0 to 4)", "None", mumps->id.ICNTL(4), &icntl, &flg));
1824   if (flg) mumps->id.ICNTL(4) = icntl;
1825   if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */
1826 
1827   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_6", "ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)", "None", mumps->id.ICNTL(6), &icntl, &flg));
1828   if (flg) mumps->id.ICNTL(6) = icntl;
1829 
1830   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_7", "ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)", "None", mumps->id.ICNTL(7), &icntl, &flg));
1831   if (flg) {
1832     PetscCheck(icntl != 1 && icntl >= 0 && icntl <= 7, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto");
1833     mumps->id.ICNTL(7) = icntl;
1834   }
1835 
1836   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_8", "ICNTL(8): scaling strategy (-2 to 8 or 77)", "None", mumps->id.ICNTL(8), &mumps->id.ICNTL(8), NULL));
1837   /* PetscCall(PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL)); handled by MatSolveTranspose_MUMPS() */
1838   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_10", "ICNTL(10): max num of refinements", "None", mumps->id.ICNTL(10), &mumps->id.ICNTL(10), NULL));
1839   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_11", "ICNTL(11): statistics related to an error analysis (via -ksp_view)", "None", mumps->id.ICNTL(11), &mumps->id.ICNTL(11), NULL));
1840   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_12", "ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)", "None", mumps->id.ICNTL(12), &mumps->id.ICNTL(12), NULL));
1841   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_13", "ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting", "None", mumps->id.ICNTL(13), &mumps->id.ICNTL(13), NULL));
1842   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_14", "ICNTL(14): percentage increase in the estimated working space", "None", mumps->id.ICNTL(14), &mumps->id.ICNTL(14), NULL));
1843   PetscCall(MatGetBlockSizes(A, &rbs, &cbs));
1844   if (rbs == cbs && rbs > 1) mumps->id.ICNTL(15) = -rbs;
1845   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_15", "ICNTL(15): compression of the input matrix resulting from a block format", "None", mumps->id.ICNTL(15), &mumps->id.ICNTL(15), &flg));
1846   if (flg) {
1847     PetscCheck(mumps->id.ICNTL(15) <= 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Positive -mat_mumps_icntl_15 not handled");
1848     PetscCheck((-mumps->id.ICNTL(15) % cbs == 0) && (-mumps->id.ICNTL(15) % rbs == 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "The opposite of -mat_mumps_icntl_15 must be a multiple of the column and row blocksizes");
1849   }
1850   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_19", "ICNTL(19): computes the Schur complement", "None", mumps->id.ICNTL(19), &mumps->id.ICNTL(19), NULL));
1851   if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */
1852     PetscCall(MatDestroy(&F->schur));
1853     PetscCall(MatMumpsResetSchur_Private(mumps));
1854   }
1855 
1856   /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps",
1857      and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran
1858      and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...'
1859      This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug,
1860      see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590.
1861      In short, we could not use distributed RHS with MPICH until v4.0b1.
1862    */
1863 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) || (defined(PETSC_HAVE_MPICH_NUMVERSION) && (PETSC_HAVE_MPICH_NUMVERSION < 40000101))
1864   mumps->ICNTL20 = 0; /* Centralized dense RHS*/
1865 #else
1866   mumps->ICNTL20     = 10; /* Distributed dense RHS*/
1867 #endif
1868   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_20", "ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides", "None", mumps->ICNTL20, &mumps->ICNTL20, &flg));
1869   PetscCheck(!flg || mumps->ICNTL20 == 10 || mumps->ICNTL20 == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10", (int)mumps->ICNTL20);
1870 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0)
1871   PetscCheck(!flg || mumps->ICNTL20 != 10, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=10 is not supported before MUMPS-5.3.0");
1872 #endif
1873   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL)); we only use distributed solution vector */
1874 
1875   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_22", "ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)", "None", mumps->id.ICNTL(22), &mumps->id.ICNTL(22), NULL));
1876   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_23", "ICNTL(23): max size of the working memory (MB) that can allocate per processor", "None", mumps->id.ICNTL(23), &mumps->id.ICNTL(23), NULL));
1877   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_24", "ICNTL(24): detection of null pivot rows (0 or 1)", "None", mumps->id.ICNTL(24), &mumps->id.ICNTL(24), NULL));
1878   if (mumps->id.ICNTL(24)) { mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ }
1879 
1880   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_25", "ICNTL(25): computes a solution of a deficient matrix and a null space basis", "None", mumps->id.ICNTL(25), &mumps->id.ICNTL(25), NULL));
1881   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_26", "ICNTL(26): drives the solution phase if a Schur complement matrix", "None", mumps->id.ICNTL(26), &mumps->id.ICNTL(26), NULL));
1882   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_27", "ICNTL(27): controls the blocking size for multiple right-hand sides", "None", mumps->id.ICNTL(27), &mumps->id.ICNTL(27), NULL));
1883   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_28", "ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering", "None", mumps->id.ICNTL(28), &mumps->id.ICNTL(28), NULL));
1884   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_29", "ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis", "None", mumps->id.ICNTL(29), &mumps->id.ICNTL(29), NULL));
1885   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL)); */ /* call MatMumpsGetInverse() directly */
1886   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_31", "ICNTL(31): indicates which factors may be discarded during factorization", "None", mumps->id.ICNTL(31), &mumps->id.ICNTL(31), NULL));
1887   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elemination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL));  -- not supported by PETSc API */
1888   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_33", "ICNTL(33): compute determinant", "None", mumps->id.ICNTL(33), &mumps->id.ICNTL(33), NULL));
1889   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_35", "ICNTL(35): activates Block Low Rank (BLR) based factorization", "None", mumps->id.ICNTL(35), &mumps->id.ICNTL(35), NULL));
1890   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_36", "ICNTL(36): choice of BLR factorization variant", "None", mumps->id.ICNTL(36), &mumps->id.ICNTL(36), NULL));
1891   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_38", "ICNTL(38): estimated compression rate of LU factors with BLR", "None", mumps->id.ICNTL(38), &mumps->id.ICNTL(38), NULL));
1892 
1893   PetscCall(PetscOptionsReal("-mat_mumps_cntl_1", "CNTL(1): relative pivoting threshold", "None", mumps->id.CNTL(1), &mumps->id.CNTL(1), NULL));
1894   PetscCall(PetscOptionsReal("-mat_mumps_cntl_2", "CNTL(2): stopping criterion of refinement", "None", mumps->id.CNTL(2), &mumps->id.CNTL(2), NULL));
1895   PetscCall(PetscOptionsReal("-mat_mumps_cntl_3", "CNTL(3): absolute pivoting threshold", "None", mumps->id.CNTL(3), &mumps->id.CNTL(3), NULL));
1896   PetscCall(PetscOptionsReal("-mat_mumps_cntl_4", "CNTL(4): value for static pivoting", "None", mumps->id.CNTL(4), &mumps->id.CNTL(4), NULL));
1897   PetscCall(PetscOptionsReal("-mat_mumps_cntl_5", "CNTL(5): fixation for null pivots", "None", mumps->id.CNTL(5), &mumps->id.CNTL(5), NULL));
1898   PetscCall(PetscOptionsReal("-mat_mumps_cntl_7", "CNTL(7): dropping parameter used during BLR", "None", mumps->id.CNTL(7), &mumps->id.CNTL(7), NULL));
1899 
1900   PetscCall(PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL));
1901 
1902   PetscCall(PetscOptionsIntArray("-mat_mumps_view_info", "request INFO local to each processor", "", info, &ninfo, NULL));
1903   if (ninfo) {
1904     PetscCheck(ninfo <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "number of INFO %" PetscInt_FMT " must <= 80", ninfo);
1905     PetscCall(PetscMalloc1(ninfo, &mumps->info));
1906     mumps->ninfo = ninfo;
1907     for (i = 0; i < ninfo; i++) {
1908       PetscCheck(info[i] >= 0 && info[i] <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "index of INFO %" PetscInt_FMT " must between 1 and 80", ninfo);
1909       mumps->info[i] = info[i];
1910     }
1911   }
1912   PetscOptionsEnd();
1913   PetscFunctionReturn(0);
1914 }
1915 
1916 PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F, Mat A, const MatFactorInfo *info, Mat_MUMPS *mumps) {
1917   PetscFunctionBegin;
1918   if (mumps->id.INFOG(1) < 0) {
1919     PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in analysis phase: INFOG(1)=%d", mumps->id.INFOG(1));
1920     if (mumps->id.INFOG(1) == -6) {
1921       PetscCall(PetscInfo(F, "matrix is singular in structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1922       F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT;
1923     } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) {
1924       PetscCall(PetscInfo(F, "problem of workspace, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1925       F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1926     } else if (mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0) {
1927       PetscCall(PetscInfo(F, "Empty matrix\n"));
1928     } else {
1929       PetscCall(PetscInfo(F, "Error reported by MUMPS in analysis phase: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1930       F->factorerrortype = MAT_FACTOR_OTHER;
1931     }
1932   }
1933   PetscFunctionReturn(0);
1934 }
1935 
1936 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info) {
1937   Mat_MUMPS     *mumps = (Mat_MUMPS *)F->data;
1938   Vec            b;
1939   const PetscInt M = A->rmap->N;
1940 
1941   PetscFunctionBegin;
1942   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
1943     /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */
1944     PetscFunctionReturn(0);
1945   }
1946 
1947   /* Set MUMPS options from the options database */
1948   PetscCall(MatSetFromOptions_MUMPS(F, A));
1949 
1950   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
1951   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps));
1952 
1953   /* analysis phase */
1954   /*----------------*/
1955   mumps->id.job = JOB_FACTSYMBOLIC;
1956   mumps->id.n   = M;
1957   switch (mumps->id.ICNTL(18)) {
1958   case 0: /* centralized assembled matrix input */
1959     if (!mumps->myid) {
1960       mumps->id.nnz = mumps->nnz;
1961       mumps->id.irn = mumps->irn;
1962       mumps->id.jcn = mumps->jcn;
1963       if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val;
1964       if (r) {
1965         mumps->id.ICNTL(7) = 1;
1966         if (!mumps->myid) {
1967           const PetscInt *idx;
1968           PetscInt        i;
1969 
1970           PetscCall(PetscMalloc1(M, &mumps->id.perm_in));
1971           PetscCall(ISGetIndices(r, &idx));
1972           for (i = 0; i < M; i++) PetscCall(PetscMUMPSIntCast(idx[i] + 1, &(mumps->id.perm_in[i]))); /* perm_in[]: start from 1, not 0! */
1973           PetscCall(ISRestoreIndices(r, &idx));
1974         }
1975       }
1976     }
1977     break;
1978   case 3: /* distributed assembled matrix input (size>1) */
1979     mumps->id.nnz_loc = mumps->nnz;
1980     mumps->id.irn_loc = mumps->irn;
1981     mumps->id.jcn_loc = mumps->jcn;
1982     if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val;
1983     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
1984       PetscCall(MatCreateVecs(A, NULL, &b));
1985       PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq));
1986       PetscCall(VecDestroy(&b));
1987     }
1988     break;
1989   }
1990   PetscMUMPS_c(mumps);
1991   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps));
1992 
1993   F->ops->lufactornumeric   = MatFactorNumeric_MUMPS;
1994   F->ops->solve             = MatSolve_MUMPS;
1995   F->ops->solvetranspose    = MatSolveTranspose_MUMPS;
1996   F->ops->matsolve          = MatMatSolve_MUMPS;
1997   F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS;
1998   F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS;
1999 
2000   mumps->matstruc = SAME_NONZERO_PATTERN;
2001   PetscFunctionReturn(0);
2002 }
2003 
2004 /* Note the Petsc r and c permutations are ignored */
2005 PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info) {
2006   Mat_MUMPS     *mumps = (Mat_MUMPS *)F->data;
2007   Vec            b;
2008   const PetscInt M = A->rmap->N;
2009 
2010   PetscFunctionBegin;
2011   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
2012     /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */
2013     PetscFunctionReturn(0);
2014   }
2015 
2016   /* Set MUMPS options from the options database */
2017   PetscCall(MatSetFromOptions_MUMPS(F, A));
2018 
2019   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
2020   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps));
2021 
2022   /* analysis phase */
2023   /*----------------*/
2024   mumps->id.job = JOB_FACTSYMBOLIC;
2025   mumps->id.n   = M;
2026   switch (mumps->id.ICNTL(18)) {
2027   case 0: /* centralized assembled matrix input */
2028     if (!mumps->myid) {
2029       mumps->id.nnz = mumps->nnz;
2030       mumps->id.irn = mumps->irn;
2031       mumps->id.jcn = mumps->jcn;
2032       if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val;
2033     }
2034     break;
2035   case 3: /* distributed assembled matrix input (size>1) */
2036     mumps->id.nnz_loc = mumps->nnz;
2037     mumps->id.irn_loc = mumps->irn;
2038     mumps->id.jcn_loc = mumps->jcn;
2039     if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val;
2040     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2041       PetscCall(MatCreateVecs(A, NULL, &b));
2042       PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq));
2043       PetscCall(VecDestroy(&b));
2044     }
2045     break;
2046   }
2047   PetscMUMPS_c(mumps);
2048   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps));
2049 
2050   F->ops->lufactornumeric   = MatFactorNumeric_MUMPS;
2051   F->ops->solve             = MatSolve_MUMPS;
2052   F->ops->solvetranspose    = MatSolveTranspose_MUMPS;
2053   F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS;
2054 
2055   mumps->matstruc = SAME_NONZERO_PATTERN;
2056   PetscFunctionReturn(0);
2057 }
2058 
2059 /* Note the Petsc r permutation and factor info are ignored */
2060 PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F, Mat A, IS r, const MatFactorInfo *info) {
2061   Mat_MUMPS     *mumps = (Mat_MUMPS *)F->data;
2062   Vec            b;
2063   const PetscInt M = A->rmap->N;
2064 
2065   PetscFunctionBegin;
2066   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
2067     /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */
2068     PetscFunctionReturn(0);
2069   }
2070 
2071   /* Set MUMPS options from the options database */
2072   PetscCall(MatSetFromOptions_MUMPS(F, A));
2073 
2074   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
2075   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps));
2076 
2077   /* analysis phase */
2078   /*----------------*/
2079   mumps->id.job = JOB_FACTSYMBOLIC;
2080   mumps->id.n   = M;
2081   switch (mumps->id.ICNTL(18)) {
2082   case 0: /* centralized assembled matrix input */
2083     if (!mumps->myid) {
2084       mumps->id.nnz = mumps->nnz;
2085       mumps->id.irn = mumps->irn;
2086       mumps->id.jcn = mumps->jcn;
2087       if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val;
2088     }
2089     break;
2090   case 3: /* distributed assembled matrix input (size>1) */
2091     mumps->id.nnz_loc = mumps->nnz;
2092     mumps->id.irn_loc = mumps->irn;
2093     mumps->id.jcn_loc = mumps->jcn;
2094     if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val;
2095     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2096       PetscCall(MatCreateVecs(A, NULL, &b));
2097       PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq));
2098       PetscCall(VecDestroy(&b));
2099     }
2100     break;
2101   }
2102   PetscMUMPS_c(mumps);
2103   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps));
2104 
2105   F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS;
2106   F->ops->solve                 = MatSolve_MUMPS;
2107   F->ops->solvetranspose        = MatSolve_MUMPS;
2108   F->ops->matsolve              = MatMatSolve_MUMPS;
2109   F->ops->mattransposesolve     = MatMatTransposeSolve_MUMPS;
2110   F->ops->matsolvetranspose     = MatMatSolveTranspose_MUMPS;
2111 #if defined(PETSC_USE_COMPLEX)
2112   F->ops->getinertia = NULL;
2113 #else
2114   F->ops->getinertia = MatGetInertia_SBAIJMUMPS;
2115 #endif
2116 
2117   mumps->matstruc = SAME_NONZERO_PATTERN;
2118   PetscFunctionReturn(0);
2119 }
2120 
2121 PetscErrorCode MatView_MUMPS(Mat A, PetscViewer viewer) {
2122   PetscBool         iascii;
2123   PetscViewerFormat format;
2124   Mat_MUMPS        *mumps = (Mat_MUMPS *)A->data;
2125 
2126   PetscFunctionBegin;
2127   /* check if matrix is mumps type */
2128   if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(0);
2129 
2130   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
2131   if (iascii) {
2132     PetscCall(PetscViewerGetFormat(viewer, &format));
2133     if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
2134       PetscCall(PetscViewerASCIIPrintf(viewer, "MUMPS run parameters:\n"));
2135       if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
2136         PetscCall(PetscViewerASCIIPrintf(viewer, "  SYM (matrix type):                   %d\n", mumps->id.sym));
2137         PetscCall(PetscViewerASCIIPrintf(viewer, "  PAR (host participation):            %d\n", mumps->id.par));
2138         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(1) (output for error):         %d\n", mumps->id.ICNTL(1)));
2139         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(2) (output of diagnostic msg): %d\n", mumps->id.ICNTL(2)));
2140         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(3) (output for global info):   %d\n", mumps->id.ICNTL(3)));
2141         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(4) (level of printing):        %d\n", mumps->id.ICNTL(4)));
2142         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(5) (input mat struct):         %d\n", mumps->id.ICNTL(5)));
2143         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(6) (matrix prescaling):        %d\n", mumps->id.ICNTL(6)));
2144         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(7) (sequential matrix ordering):%d\n", mumps->id.ICNTL(7)));
2145         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(8) (scaling strategy):         %d\n", mumps->id.ICNTL(8)));
2146         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(10) (max num of refinements):  %d\n", mumps->id.ICNTL(10)));
2147         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(11) (error analysis):          %d\n", mumps->id.ICNTL(11)));
2148         if (mumps->id.ICNTL(11) > 0) {
2149           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(4) (inf norm of input mat):        %g\n", mumps->id.RINFOG(4)));
2150           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(5) (inf norm of solution):         %g\n", mumps->id.RINFOG(5)));
2151           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(6) (inf norm of residual):         %g\n", mumps->id.RINFOG(6)));
2152           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(7),RINFOG(8) (backward error est): %g, %g\n", mumps->id.RINFOG(7), mumps->id.RINFOG(8)));
2153           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(9) (error estimate):               %g\n", mumps->id.RINFOG(9)));
2154           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n", mumps->id.RINFOG(10), mumps->id.RINFOG(11)));
2155         }
2156         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(12) (efficiency control):                         %d\n", mumps->id.ICNTL(12)));
2157         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(13) (sequential factorization of the root node):  %d\n", mumps->id.ICNTL(13)));
2158         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(14) (percentage of estimated workspace increase): %d\n", mumps->id.ICNTL(14)));
2159         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(15) (compression of the input matrix):            %d\n", mumps->id.ICNTL(15)));
2160         /* ICNTL(15-17) not used */
2161         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(18) (input mat struct):                           %d\n", mumps->id.ICNTL(18)));
2162         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(19) (Schur complement info):                      %d\n", mumps->id.ICNTL(19)));
2163         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(20) (RHS sparse pattern):                         %d\n", mumps->id.ICNTL(20)));
2164         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(21) (solution struct):                            %d\n", mumps->id.ICNTL(21)));
2165         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(22) (in-core/out-of-core facility):               %d\n", mumps->id.ICNTL(22)));
2166         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(23) (max size of memory can be allocated locally):%d\n", mumps->id.ICNTL(23)));
2167 
2168         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(24) (detection of null pivot rows):               %d\n", mumps->id.ICNTL(24)));
2169         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(25) (computation of a null space basis):          %d\n", mumps->id.ICNTL(25)));
2170         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(26) (Schur options for RHS or solution):          %d\n", mumps->id.ICNTL(26)));
2171         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(27) (blocking size for multiple RHS):             %d\n", mumps->id.ICNTL(27)));
2172         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(28) (use parallel or sequential ordering):        %d\n", mumps->id.ICNTL(28)));
2173         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(29) (parallel ordering):                          %d\n", mumps->id.ICNTL(29)));
2174 
2175         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(30) (user-specified set of entries in inv(A)):    %d\n", mumps->id.ICNTL(30)));
2176         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(31) (factors is discarded in the solve phase):    %d\n", mumps->id.ICNTL(31)));
2177         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(33) (compute determinant):                        %d\n", mumps->id.ICNTL(33)));
2178         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(35) (activate BLR based factorization):           %d\n", mumps->id.ICNTL(35)));
2179         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(36) (choice of BLR factorization variant):        %d\n", mumps->id.ICNTL(36)));
2180         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(38) (estimated compression rate of LU factors):   %d\n", mumps->id.ICNTL(38)));
2181 
2182         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(1) (relative pivoting threshold):      %g\n", mumps->id.CNTL(1)));
2183         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(2) (stopping criterion of refinement): %g\n", mumps->id.CNTL(2)));
2184         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(3) (absolute pivoting threshold):      %g\n", mumps->id.CNTL(3)));
2185         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(4) (value of static pivoting):         %g\n", mumps->id.CNTL(4)));
2186         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(5) (fixation for null pivots):         %g\n", mumps->id.CNTL(5)));
2187         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(7) (dropping parameter for BLR):       %g\n", mumps->id.CNTL(7)));
2188 
2189         /* information local to each processor */
2190         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(1) (local estimated flops for the elimination after analysis):\n"));
2191         PetscCall(PetscViewerASCIIPushSynchronized(viewer));
2192         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %g\n", mumps->myid, mumps->id.RINFO(1)));
2193         PetscCall(PetscViewerFlush(viewer));
2194         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(2) (local estimated flops for the assembly after factorization):\n"));
2195         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %g\n", mumps->myid, mumps->id.RINFO(2)));
2196         PetscCall(PetscViewerFlush(viewer));
2197         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(3) (local estimated flops for the elimination after factorization):\n"));
2198         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %g\n", mumps->myid, mumps->id.RINFO(3)));
2199         PetscCall(PetscViewerFlush(viewer));
2200 
2201         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization):\n"));
2202         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(15)));
2203         PetscCall(PetscViewerFlush(viewer));
2204 
2205         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization):\n"));
2206         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(16)));
2207         PetscCall(PetscViewerFlush(viewer));
2208 
2209         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(23) (num of pivots eliminated on this processor after factorization):\n"));
2210         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(23)));
2211         PetscCall(PetscViewerFlush(viewer));
2212 
2213         if (mumps->ninfo && mumps->ninfo <= 80) {
2214           PetscInt i;
2215           for (i = 0; i < mumps->ninfo; i++) {
2216             PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(%" PetscInt_FMT "):\n", mumps->info[i]));
2217             PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(mumps->info[i])));
2218             PetscCall(PetscViewerFlush(viewer));
2219           }
2220         }
2221         PetscCall(PetscViewerASCIIPopSynchronized(viewer));
2222       } else PetscCall(PetscViewerASCIIPrintf(viewer, "  Use -%sksp_view ::ascii_info_detail to display information for all processes\n", ((PetscObject)A)->prefix ? ((PetscObject)A)->prefix : ""));
2223 
2224       if (mumps->myid == 0) { /* information from the host */
2225         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFOG(1) (global estimated flops for the elimination after analysis): %g\n", mumps->id.RINFOG(1)));
2226         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFOG(2) (global estimated flops for the assembly after factorization): %g\n", mumps->id.RINFOG(2)));
2227         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFOG(3) (global estimated flops for the elimination after factorization): %g\n", mumps->id.RINFOG(3)));
2228         PetscCall(PetscViewerASCIIPrintf(viewer, "  (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n", mumps->id.RINFOG(12), mumps->id.RINFOG(13), mumps->id.INFOG(34)));
2229 
2230         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(3)));
2231         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(4)));
2232         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(5) (estimated maximum front size in the complete tree): %d\n", mumps->id.INFOG(5)));
2233         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(6) (number of nodes in the complete tree): %d\n", mumps->id.INFOG(6)));
2234         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(7) (ordering option effectively used after analysis): %d\n", mumps->id.INFOG(7)));
2235         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n", mumps->id.INFOG(8)));
2236         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n", mumps->id.INFOG(9)));
2237         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(10) (total integer space store the matrix factors after factorization): %d\n", mumps->id.INFOG(10)));
2238         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(11) (order of largest frontal matrix after factorization): %d\n", mumps->id.INFOG(11)));
2239         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(12) (number of off-diagonal pivots): %d\n", mumps->id.INFOG(12)));
2240         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(13) (number of delayed pivots after factorization): %d\n", mumps->id.INFOG(13)));
2241         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(14) (number of memory compress after factorization): %d\n", mumps->id.INFOG(14)));
2242         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(15) (number of steps of iterative refinement after solution): %d\n", mumps->id.INFOG(15)));
2243         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n", mumps->id.INFOG(16)));
2244         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n", mumps->id.INFOG(17)));
2245         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n", mumps->id.INFOG(18)));
2246         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n", mumps->id.INFOG(19)));
2247         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(20) (estimated number of entries in the factors): %d\n", mumps->id.INFOG(20)));
2248         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n", mumps->id.INFOG(21)));
2249         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n", mumps->id.INFOG(22)));
2250         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n", mumps->id.INFOG(23)));
2251         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n", mumps->id.INFOG(24)));
2252         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n", mumps->id.INFOG(25)));
2253         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(28) (after factorization: number of null pivots encountered): %d\n", mumps->id.INFOG(28)));
2254         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n", mumps->id.INFOG(29)));
2255         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n", mumps->id.INFOG(30), mumps->id.INFOG(31)));
2256         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(32) (after analysis: type of analysis done): %d\n", mumps->id.INFOG(32)));
2257         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(33) (value used for ICNTL(8)): %d\n", mumps->id.INFOG(33)));
2258         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(34) (exponent of the determinant if determinant is requested): %d\n", mumps->id.INFOG(34)));
2259         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n", mumps->id.INFOG(35)));
2260         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(36)));
2261         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n", mumps->id.INFOG(37)));
2262         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(38)));
2263         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n", mumps->id.INFOG(39)));
2264       }
2265     }
2266   }
2267   PetscFunctionReturn(0);
2268 }
2269 
2270 PetscErrorCode MatGetInfo_MUMPS(Mat A, MatInfoType flag, MatInfo *info) {
2271   Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;
2272 
2273   PetscFunctionBegin;
2274   info->block_size        = 1.0;
2275   info->nz_allocated      = mumps->id.INFOG(20);
2276   info->nz_used           = mumps->id.INFOG(20);
2277   info->nz_unneeded       = 0.0;
2278   info->assemblies        = 0.0;
2279   info->mallocs           = 0.0;
2280   info->memory            = 0.0;
2281   info->fill_ratio_given  = 0;
2282   info->fill_ratio_needed = 0;
2283   info->factor_mallocs    = 0;
2284   PetscFunctionReturn(0);
2285 }
2286 
2287 /* -------------------------------------------------------------------------------------------*/
2288 PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is) {
2289   Mat_MUMPS         *mumps = (Mat_MUMPS *)F->data;
2290   const PetscScalar *arr;
2291   const PetscInt    *idxs;
2292   PetscInt           size, i;
2293 
2294   PetscFunctionBegin;
2295   PetscCall(ISGetLocalSize(is, &size));
2296   /* Schur complement matrix */
2297   PetscCall(MatDestroy(&F->schur));
2298   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size, size, NULL, &F->schur));
2299   PetscCall(MatDenseGetArrayRead(F->schur, &arr));
2300   mumps->id.schur      = (MumpsScalar *)arr;
2301   mumps->id.size_schur = size;
2302   mumps->id.schur_lld  = size;
2303   PetscCall(MatDenseRestoreArrayRead(F->schur, &arr));
2304   if (mumps->sym == 1) PetscCall(MatSetOption(F->schur, MAT_SPD, PETSC_TRUE));
2305 
2306   /* MUMPS expects Fortran style indices */
2307   PetscCall(PetscFree(mumps->id.listvar_schur));
2308   PetscCall(PetscMalloc1(size, &mumps->id.listvar_schur));
2309   PetscCall(ISGetIndices(is, &idxs));
2310   for (i = 0; i < size; i++) PetscCall(PetscMUMPSIntCast(idxs[i] + 1, &(mumps->id.listvar_schur[i])));
2311   PetscCall(ISRestoreIndices(is, &idxs));
2312   /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */
2313   mumps->id.ICNTL(26) = -1;
2314   PetscFunctionReturn(0);
2315 }
2316 
2317 /* -------------------------------------------------------------------------------------------*/
2318 PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F, Mat *S) {
2319   Mat          St;
2320   Mat_MUMPS   *mumps = (Mat_MUMPS *)F->data;
2321   PetscScalar *array;
2322 #if defined(PETSC_USE_COMPLEX)
2323   PetscScalar im = PetscSqrtScalar((PetscScalar)-1.0);
2324 #endif
2325 
2326   PetscFunctionBegin;
2327   PetscCheck(mumps->id.ICNTL(19), PetscObjectComm((PetscObject)F), PETSC_ERR_ORDER, "Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it");
2328   PetscCall(MatCreate(PETSC_COMM_SELF, &St));
2329   PetscCall(MatSetSizes(St, PETSC_DECIDE, PETSC_DECIDE, mumps->id.size_schur, mumps->id.size_schur));
2330   PetscCall(MatSetType(St, MATDENSE));
2331   PetscCall(MatSetUp(St));
2332   PetscCall(MatDenseGetArray(St, &array));
2333   if (!mumps->sym) {                /* MUMPS always return a full matrix */
2334     if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
2335       PetscInt i, j, N = mumps->id.size_schur;
2336       for (i = 0; i < N; i++) {
2337         for (j = 0; j < N; j++) {
2338 #if !defined(PETSC_USE_COMPLEX)
2339           PetscScalar val = mumps->id.schur[i * N + j];
2340 #else
2341           PetscScalar val = mumps->id.schur[i * N + j].r + im * mumps->id.schur[i * N + j].i;
2342 #endif
2343           array[j * N + i] = val;
2344         }
2345       }
2346     } else { /* stored by columns */
2347       PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur));
2348     }
2349   } else {                          /* either full or lower-triangular (not packed) */
2350     if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */
2351       PetscInt i, j, N = mumps->id.size_schur;
2352       for (i = 0; i < N; i++) {
2353         for (j = i; j < N; j++) {
2354 #if !defined(PETSC_USE_COMPLEX)
2355           PetscScalar val = mumps->id.schur[i * N + j];
2356 #else
2357           PetscScalar val = mumps->id.schur[i * N + j].r + im * mumps->id.schur[i * N + j].i;
2358 #endif
2359           array[i * N + j] = val;
2360           array[j * N + i] = val;
2361         }
2362       }
2363     } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */
2364       PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur));
2365     } else { /* ICNTL(19) == 1 lower triangular stored by rows */
2366       PetscInt i, j, N = mumps->id.size_schur;
2367       for (i = 0; i < N; i++) {
2368         for (j = 0; j < i + 1; j++) {
2369 #if !defined(PETSC_USE_COMPLEX)
2370           PetscScalar val = mumps->id.schur[i * N + j];
2371 #else
2372           PetscScalar val = mumps->id.schur[i * N + j].r + im * mumps->id.schur[i * N + j].i;
2373 #endif
2374           array[i * N + j] = val;
2375           array[j * N + i] = val;
2376         }
2377       }
2378     }
2379   }
2380   PetscCall(MatDenseRestoreArray(St, &array));
2381   *S = St;
2382   PetscFunctionReturn(0);
2383 }
2384 
2385 /* -------------------------------------------------------------------------------------------*/
2386 PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt ival) {
2387   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2388 
2389   PetscFunctionBegin;
2390   if (mumps->id.job == JOB_NULL) {                                       /* need to cache icntl and ival since PetscMUMPS_c() has never been called */
2391     PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; /* number of already cached ICNTL */
2392     for (i = 0; i < nICNTL_pre; ++i)
2393       if (mumps->ICNTL_pre[1 + 2 * i] == icntl) break; /* is this ICNTL already cached? */
2394     if (i == nICNTL_pre) {                             /* not already cached */
2395       if (i > 0) PetscCall(PetscRealloc(sizeof(PetscMUMPSInt) * (2 * nICNTL_pre + 3), &mumps->ICNTL_pre));
2396       else PetscCall(PetscCalloc(sizeof(PetscMUMPSInt) * 3, &mumps->ICNTL_pre));
2397       mumps->ICNTL_pre[0]++;
2398     }
2399     mumps->ICNTL_pre[1 + 2 * i] = icntl;
2400     PetscCall(PetscMUMPSIntCast(ival, mumps->ICNTL_pre + 2 + 2 * i));
2401   } else PetscCall(PetscMUMPSIntCast(ival, &mumps->id.ICNTL(icntl)));
2402   PetscFunctionReturn(0);
2403 }
2404 
2405 PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt *ival) {
2406   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2407 
2408   PetscFunctionBegin;
2409   *ival = mumps->id.ICNTL(icntl);
2410   PetscFunctionReturn(0);
2411 }
2412 
2413 /*@
2414   MatMumpsSetIcntl - Set MUMPS parameter ICNTL()
2415 
2416    Logically Collective on F
2417 
2418    Input Parameters:
2419 +  F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2420 .  icntl - index of MUMPS parameter array ICNTL()
2421 -  ival - value of MUMPS ICNTL(icntl)
2422 
2423   Options Database:
2424 .   -mat_mumps_icntl_<icntl> <ival> - change the option numbered icntl to ival
2425 
2426    Level: beginner
2427 
2428    References:
2429 .  * - MUMPS Users' Guide
2430 
2431 .seealso: `MatGetFactor()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2432 @*/
2433 PetscErrorCode MatMumpsSetIcntl(Mat F, PetscInt icntl, PetscInt ival) {
2434   PetscFunctionBegin;
2435   PetscValidType(F, 1);
2436   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2437   PetscValidLogicalCollectiveInt(F, icntl, 2);
2438   PetscValidLogicalCollectiveInt(F, ival, 3);
2439   PetscCheck(icntl >= 1 && icntl <= 38, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl);
2440   PetscTryMethod(F, "MatMumpsSetIcntl_C", (Mat, PetscInt, PetscInt), (F, icntl, ival));
2441   PetscFunctionReturn(0);
2442 }
2443 
2444 /*@
2445   MatMumpsGetIcntl - Get MUMPS parameter ICNTL()
2446 
2447    Logically Collective on F
2448 
2449    Input Parameters:
2450 +  F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2451 -  icntl - index of MUMPS parameter array ICNTL()
2452 
2453   Output Parameter:
2454 .  ival - value of MUMPS ICNTL(icntl)
2455 
2456    Level: beginner
2457 
2458    References:
2459 .  * - MUMPS Users' Guide
2460 
2461 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2462 @*/
2463 PetscErrorCode MatMumpsGetIcntl(Mat F, PetscInt icntl, PetscInt *ival) {
2464   PetscFunctionBegin;
2465   PetscValidType(F, 1);
2466   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2467   PetscValidLogicalCollectiveInt(F, icntl, 2);
2468   PetscValidIntPointer(ival, 3);
2469   PetscCheck(icntl >= 1 && icntl <= 38, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl);
2470   PetscUseMethod(F, "MatMumpsGetIcntl_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival));
2471   PetscFunctionReturn(0);
2472 }
2473 
2474 /* -------------------------------------------------------------------------------------------*/
2475 PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal val) {
2476   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2477 
2478   PetscFunctionBegin;
2479   if (mumps->id.job == JOB_NULL) {
2480     PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0;
2481     for (i = 0; i < nCNTL_pre; ++i)
2482       if (mumps->CNTL_pre[1 + 2 * i] == icntl) break;
2483     if (i == nCNTL_pre) {
2484       if (i > 0) PetscCall(PetscRealloc(sizeof(PetscReal) * (2 * nCNTL_pre + 3), &mumps->CNTL_pre));
2485       else PetscCall(PetscCalloc(sizeof(PetscReal) * 3, &mumps->CNTL_pre));
2486       mumps->CNTL_pre[0]++;
2487     }
2488     mumps->CNTL_pre[1 + 2 * i] = icntl;
2489     mumps->CNTL_pre[2 + 2 * i] = val;
2490   } else mumps->id.CNTL(icntl) = val;
2491   PetscFunctionReturn(0);
2492 }
2493 
2494 PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal *val) {
2495   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2496 
2497   PetscFunctionBegin;
2498   *val = mumps->id.CNTL(icntl);
2499   PetscFunctionReturn(0);
2500 }
2501 
2502 /*@
2503   MatMumpsSetCntl - Set MUMPS parameter CNTL()
2504 
2505    Logically Collective on F
2506 
2507    Input Parameters:
2508 +  F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2509 .  icntl - index of MUMPS parameter array CNTL()
2510 -  val - value of MUMPS CNTL(icntl)
2511 
2512   Options Database:
2513 .   -mat_mumps_cntl_<icntl> <val>  - change the option numbered icntl to ival
2514 
2515    Level: beginner
2516 
2517    References:
2518 .  * - MUMPS Users' Guide
2519 
2520 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2521 @*/
2522 PetscErrorCode MatMumpsSetCntl(Mat F, PetscInt icntl, PetscReal val) {
2523   PetscFunctionBegin;
2524   PetscValidType(F, 1);
2525   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2526   PetscValidLogicalCollectiveInt(F, icntl, 2);
2527   PetscValidLogicalCollectiveReal(F, val, 3);
2528   PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl);
2529   PetscTryMethod(F, "MatMumpsSetCntl_C", (Mat, PetscInt, PetscReal), (F, icntl, val));
2530   PetscFunctionReturn(0);
2531 }
2532 
2533 /*@
2534   MatMumpsGetCntl - Get MUMPS parameter CNTL()
2535 
2536    Logically Collective on F
2537 
2538    Input Parameters:
2539 +  F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2540 -  icntl - index of MUMPS parameter array CNTL()
2541 
2542   Output Parameter:
2543 .  val - value of MUMPS CNTL(icntl)
2544 
2545    Level: beginner
2546 
2547    References:
2548 .  * - MUMPS Users' Guide
2549 
2550 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2551 @*/
2552 PetscErrorCode MatMumpsGetCntl(Mat F, PetscInt icntl, PetscReal *val) {
2553   PetscFunctionBegin;
2554   PetscValidType(F, 1);
2555   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2556   PetscValidLogicalCollectiveInt(F, icntl, 2);
2557   PetscValidRealPointer(val, 3);
2558   PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl);
2559   PetscUseMethod(F, "MatMumpsGetCntl_C", (Mat, PetscInt, PetscReal *), (F, icntl, val));
2560   PetscFunctionReturn(0);
2561 }
2562 
2563 PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F, PetscInt icntl, PetscInt *info) {
2564   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2565 
2566   PetscFunctionBegin;
2567   *info = mumps->id.INFO(icntl);
2568   PetscFunctionReturn(0);
2569 }
2570 
2571 PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F, PetscInt icntl, PetscInt *infog) {
2572   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2573 
2574   PetscFunctionBegin;
2575   *infog = mumps->id.INFOG(icntl);
2576   PetscFunctionReturn(0);
2577 }
2578 
2579 PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfo) {
2580   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2581 
2582   PetscFunctionBegin;
2583   *rinfo = mumps->id.RINFO(icntl);
2584   PetscFunctionReturn(0);
2585 }
2586 
2587 PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfog) {
2588   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2589 
2590   PetscFunctionBegin;
2591   *rinfog = mumps->id.RINFOG(icntl);
2592   PetscFunctionReturn(0);
2593 }
2594 
2595 PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F, Mat spRHS) {
2596   Mat          Bt = NULL, Btseq = NULL;
2597   PetscBool    flg;
2598   Mat_MUMPS   *mumps = (Mat_MUMPS *)F->data;
2599   PetscScalar *aa;
2600   PetscInt     spnr, *ia, *ja, M, nrhs;
2601 
2602   PetscFunctionBegin;
2603   PetscValidPointer(spRHS, 2);
2604   PetscCall(PetscObjectTypeCompare((PetscObject)spRHS, MATTRANSPOSEVIRTUAL, &flg));
2605   if (flg) {
2606     PetscCall(MatTransposeGetMat(spRHS, &Bt));
2607   } else SETERRQ(PetscObjectComm((PetscObject)spRHS), PETSC_ERR_ARG_WRONG, "Matrix spRHS must be type MATTRANSPOSEVIRTUAL matrix");
2608 
2609   PetscCall(MatMumpsSetIcntl(F, 30, 1));
2610 
2611   if (mumps->petsc_size > 1) {
2612     Mat_MPIAIJ *b = (Mat_MPIAIJ *)Bt->data;
2613     Btseq         = b->A;
2614   } else {
2615     Btseq = Bt;
2616   }
2617 
2618   PetscCall(MatGetSize(spRHS, &M, &nrhs));
2619   mumps->id.nrhs = nrhs;
2620   mumps->id.lrhs = M;
2621   mumps->id.rhs  = NULL;
2622 
2623   if (!mumps->myid) {
2624     PetscCall(MatSeqAIJGetArray(Btseq, &aa));
2625     PetscCall(MatGetRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
2626     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
2627     PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs));
2628     mumps->id.rhs_sparse = (MumpsScalar *)aa;
2629   } else {
2630     mumps->id.irhs_ptr    = NULL;
2631     mumps->id.irhs_sparse = NULL;
2632     mumps->id.nz_rhs      = 0;
2633     mumps->id.rhs_sparse  = NULL;
2634   }
2635   mumps->id.ICNTL(20) = 1; /* rhs is sparse */
2636   mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */
2637 
2638   /* solve phase */
2639   /*-------------*/
2640   mumps->id.job = JOB_SOLVE;
2641   PetscMUMPS_c(mumps);
2642   PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d INFO(2)=%d", mumps->id.INFOG(1), mumps->id.INFO(2));
2643 
2644   if (!mumps->myid) {
2645     PetscCall(MatSeqAIJRestoreArray(Btseq, &aa));
2646     PetscCall(MatRestoreRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
2647     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
2648   }
2649   PetscFunctionReturn(0);
2650 }
2651 
2652 /*@
2653   MatMumpsGetInverse - Get user-specified set of entries in inverse of A
2654 
2655    Logically Collective on F
2656 
2657    Input Parameters:
2658 +  F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2659 -  spRHS - sequential sparse matrix in `MATTRANSPOSEVIRTUAL` format holding specified indices in processor[0]
2660 
2661   Output Parameter:
2662 . spRHS - requested entries of inverse of A
2663 
2664    Level: beginner
2665 
2666    References:
2667 .  * - MUMPS Users' Guide
2668 
2669 .seealso: `MatGetFactor()`, `MatCreateTranspose()`
2670 @*/
2671 PetscErrorCode MatMumpsGetInverse(Mat F, Mat spRHS) {
2672   PetscFunctionBegin;
2673   PetscValidType(F, 1);
2674   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2675   PetscUseMethod(F, "MatMumpsGetInverse_C", (Mat, Mat), (F, spRHS));
2676   PetscFunctionReturn(0);
2677 }
2678 
2679 PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F, Mat spRHST) {
2680   Mat spRHS;
2681 
2682   PetscFunctionBegin;
2683   PetscCall(MatCreateTranspose(spRHST, &spRHS));
2684   PetscCall(MatMumpsGetInverse_MUMPS(F, spRHS));
2685   PetscCall(MatDestroy(&spRHS));
2686   PetscFunctionReturn(0);
2687 }
2688 
2689 /*@
2690   MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix A^T
2691 
2692    Logically Collective on F
2693 
2694    Input Parameters:
2695 +  F - the factored matrix of A obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2696 -  spRHST - sequential sparse matrix in `MATAIJ` format holding specified indices of A^T in processor[0]
2697 
2698   Output Parameter:
2699 . spRHST - requested entries of inverse of A^T
2700 
2701    Level: beginner
2702 
2703    References:
2704 .  * - MUMPS Users' Guide
2705 
2706 .seealso: `MatGetFactor()`, `MatCreateTranspose()`, `MatMumpsGetInverse()`
2707 @*/
2708 PetscErrorCode MatMumpsGetInverseTranspose(Mat F, Mat spRHST) {
2709   PetscBool flg;
2710 
2711   PetscFunctionBegin;
2712   PetscValidType(F, 1);
2713   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2714   PetscCall(PetscObjectTypeCompareAny((PetscObject)spRHST, &flg, MATSEQAIJ, MATMPIAIJ, NULL));
2715   PetscCheck(flg, PetscObjectComm((PetscObject)spRHST), PETSC_ERR_ARG_WRONG, "Matrix spRHST must be MATAIJ matrix");
2716 
2717   PetscUseMethod(F, "MatMumpsGetInverseTranspose_C", (Mat, Mat), (F, spRHST));
2718   PetscFunctionReturn(0);
2719 }
2720 
2721 /*@
2722   MatMumpsGetInfo - Get MUMPS parameter INFO()
2723 
2724    Logically Collective on F
2725 
2726    Input Parameters:
2727 +  F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2728 -  icntl - index of MUMPS parameter array INFO()
2729 
2730   Output Parameter:
2731 .  ival - value of MUMPS INFO(icntl)
2732 
2733    Level: beginner
2734 
2735    References:
2736 .  * - MUMPS Users' Guide
2737 
2738 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2739 @*/
2740 PetscErrorCode MatMumpsGetInfo(Mat F, PetscInt icntl, PetscInt *ival) {
2741   PetscFunctionBegin;
2742   PetscValidType(F, 1);
2743   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2744   PetscValidIntPointer(ival, 3);
2745   PetscUseMethod(F, "MatMumpsGetInfo_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival));
2746   PetscFunctionReturn(0);
2747 }
2748 
2749 /*@
2750   MatMumpsGetInfog - Get MUMPS parameter INFOG()
2751 
2752    Logically Collective on F
2753 
2754    Input Parameters:
2755 +  F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2756 -  icntl - index of MUMPS parameter array INFOG()
2757 
2758   Output Parameter:
2759 .  ival - value of MUMPS INFOG(icntl)
2760 
2761    Level: beginner
2762 
2763    References:
2764 .  * - MUMPS Users' Guide
2765 
2766 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2767 @*/
2768 PetscErrorCode MatMumpsGetInfog(Mat F, PetscInt icntl, PetscInt *ival) {
2769   PetscFunctionBegin;
2770   PetscValidType(F, 1);
2771   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2772   PetscValidIntPointer(ival, 3);
2773   PetscUseMethod(F, "MatMumpsGetInfog_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival));
2774   PetscFunctionReturn(0);
2775 }
2776 
2777 /*@
2778   MatMumpsGetRinfo - Get MUMPS parameter RINFO()
2779 
2780    Logically Collective on F
2781 
2782    Input Parameters:
2783 +  F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2784 -  icntl - index of MUMPS parameter array RINFO()
2785 
2786   Output Parameter:
2787 .  val - value of MUMPS RINFO(icntl)
2788 
2789    Level: beginner
2790 
2791    References:
2792 .  * - MUMPS Users' Guide
2793 
2794 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfog()`
2795 @*/
2796 PetscErrorCode MatMumpsGetRinfo(Mat F, PetscInt icntl, PetscReal *val) {
2797   PetscFunctionBegin;
2798   PetscValidType(F, 1);
2799   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2800   PetscValidRealPointer(val, 3);
2801   PetscUseMethod(F, "MatMumpsGetRinfo_C", (Mat, PetscInt, PetscReal *), (F, icntl, val));
2802   PetscFunctionReturn(0);
2803 }
2804 
2805 /*@
2806   MatMumpsGetRinfog - Get MUMPS parameter RINFOG()
2807 
2808    Logically Collective on F
2809 
2810    Input Parameters:
2811 +  F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2812 -  icntl - index of MUMPS parameter array RINFOG()
2813 
2814   Output Parameter:
2815 .  val - value of MUMPS RINFOG(icntl)
2816 
2817    Level: beginner
2818 
2819    References:
2820 .  * - MUMPS Users' Guide
2821 
2822 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`
2823 @*/
2824 PetscErrorCode MatMumpsGetRinfog(Mat F, PetscInt icntl, PetscReal *val) {
2825   PetscFunctionBegin;
2826   PetscValidType(F, 1);
2827   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2828   PetscValidRealPointer(val, 3);
2829   PetscUseMethod(F, "MatMumpsGetRinfog_C", (Mat, PetscInt, PetscReal *), (F, icntl, val));
2830   PetscFunctionReturn(0);
2831 }
2832 
2833 /*MC
2834   MATSOLVERMUMPS -  A matrix type providing direct solvers (LU and Cholesky) for
2835   distributed and sequential matrices via the external package MUMPS.
2836 
2837   Works with `MATAIJ` and `MATSBAIJ` matrices
2838 
2839   Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS
2840 
2841   Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. See details below.
2842 
2843   Use -pc_type cholesky or lu -pc_factor_mat_solver_type mumps to use this direct solver
2844 
2845   Options Database Keys:
2846 +  -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages
2847 .  -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning
2848 .  -mat_mumps_icntl_3 -  ICNTL(3): output stream for global information, collected on the host
2849 .  -mat_mumps_icntl_4 -  ICNTL(4): level of printing (0 to 4)
2850 .  -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)
2851 .  -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto
2852                         Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only)
2853 .  -mat_mumps_icntl_8  - ICNTL(8): scaling strategy (-2 to 8 or 77)
2854 .  -mat_mumps_icntl_10  - ICNTL(10): max num of refinements
2855 .  -mat_mumps_icntl_11  - ICNTL(11): statistics related to an error analysis (via -ksp_view)
2856 .  -mat_mumps_icntl_12  - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)
2857 .  -mat_mumps_icntl_13  - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting
2858 .  -mat_mumps_icntl_14  - ICNTL(14): percentage increase in the estimated working space
2859 .  -mat_mumps_icntl_15  - ICNTL(15): compression of the input matrix resulting from a block format
2860 .  -mat_mumps_icntl_19  - ICNTL(19): computes the Schur complement
2861 .  -mat_mumps_icntl_20  - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS
2862 .  -mat_mumps_icntl_22  - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)
2863 .  -mat_mumps_icntl_23  - ICNTL(23): max size of the working memory (MB) that can allocate per processor
2864 .  -mat_mumps_icntl_24  - ICNTL(24): detection of null pivot rows (0 or 1)
2865 .  -mat_mumps_icntl_25  - ICNTL(25): compute a solution of a deficient matrix and a null space basis
2866 .  -mat_mumps_icntl_26  - ICNTL(26): drives the solution phase if a Schur complement matrix
2867 .  -mat_mumps_icntl_28  - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering
2868 .  -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis
2869 .  -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A)
2870 .  -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization
2871 .  -mat_mumps_icntl_33 - ICNTL(33): compute determinant
2872 .  -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature
2873 .  -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant
2874 .  -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR
2875 .  -mat_mumps_cntl_1  - CNTL(1): relative pivoting threshold
2876 .  -mat_mumps_cntl_2  -  CNTL(2): stopping criterion of refinement
2877 .  -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold
2878 .  -mat_mumps_cntl_4 - CNTL(4): value for static pivoting
2879 .  -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots
2880 .  -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization
2881 -  -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS.
2882                                    Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual.
2883 
2884   Level: beginner
2885 
2886     Notes:
2887     MUMPS Cholesky does not handle (complex) Hermitian matrices http://mumps.enseeiht.fr/doc/userguide_5.2.1.pdf so using it will error if the matrix is Hermitian.
2888 
2889     When used within a `KSP`/`PC` solve the options are prefixed with that of the `PC`. Otherwise one can set the options prefix by calling
2890     `MatSetOptionsPrefixFactor()` on the matrix from which the factor was obtained or `MatSetOptionsPrefix()` on the factor matrix.
2891 
2892     When a MUMPS factorization fails inside a KSP solve, for example with a `KSP_DIVERGED_PC_FAILED`, one can find the MUMPS information about the failure by calling
2893 $          KSPGetPC(ksp,&pc);
2894 $          PCFactorGetMatrix(pc,&mat);
2895 $          MatMumpsGetInfo(mat,....);
2896 $          MatMumpsGetInfog(mat,....); etc.
2897            Or you can run with -ksp_error_if_not_converged and the program will be stopped and the information printed in the error message.
2898 
2899   Using MUMPS with 64-bit integers
2900     MUMPS provides 64-bit integer support in two build modes:
2901       full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and
2902       requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI).
2903 
2904       selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices,
2905       MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and
2906       columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit
2907       integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS.
2908 
2909     With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc.
2910 
2911   Two modes to run MUMPS/PETSc with OpenMP
2912 $     Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP
2913 $     threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test".
2914 
2915 $     -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example,
2916 $     if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16"
2917 
2918    To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part
2919    (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with --with-openmp --download-hwloc
2920    (or --with-hwloc), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS
2921    libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS
2922    (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided).
2923 
2924    If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI
2925    processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of
2926    size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm
2927    are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set
2928    by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs.
2929    In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets,
2930    if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind
2931    MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half
2932    cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the
2933    problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding.
2934    For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and
2935    examine the mapping result.
2936 
2937    PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set `OMP_PROC_BIND` and `OMP_PLACES` in job scripts,
2938    for example, export `OMP_PLACES`=threads and export `OMP_PROC_BIND`=spread. One does not need to export `OMP_NUM_THREADS`=m in job scripts as PETSc
2939    calls `omp_set_num_threads`(m) internally before calling MUMPS.
2940 
2941    References:
2942 +  * - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011).
2943 -  * - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017.
2944 
2945 .seealso: `PCFactorSetMatSolverType()`, `MatSolverType`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`, `KSPGetPC()`, `PCFactorGetMatrix()`
2946 M*/
2947 
2948 static PetscErrorCode MatFactorGetSolverType_mumps(Mat A, MatSolverType *type) {
2949   PetscFunctionBegin;
2950   *type = MATSOLVERMUMPS;
2951   PetscFunctionReturn(0);
2952 }
2953 
2954 /* MatGetFactor for Seq and MPI AIJ matrices */
2955 static PetscErrorCode MatGetFactor_aij_mumps(Mat A, MatFactorType ftype, Mat *F) {
2956   Mat         B;
2957   Mat_MUMPS  *mumps;
2958   PetscBool   isSeqAIJ;
2959   PetscMPIInt size;
2960 
2961   PetscFunctionBegin;
2962 #if defined(PETSC_USE_COMPLEX)
2963   PetscCheck(A->hermitian != PETSC_BOOL3_TRUE || A->symmetric == PETSC_BOOL3_TRUE || ftype != MAT_FACTOR_CHOLESKY, PETSC_COMM_SELF, PETSC_ERR_SUP, "Hermitian CHOLESKY Factor is not supported");
2964 #endif
2965   /* Create the factorization matrix */
2966   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isSeqAIJ));
2967   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
2968   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
2969   PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name));
2970   PetscCall(MatSetUp(B));
2971 
2972   PetscCall(PetscNewLog(B, &mumps));
2973 
2974   B->ops->view    = MatView_MUMPS;
2975   B->ops->getinfo = MatGetInfo_MUMPS;
2976 
2977   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
2978   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
2979   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
2980   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
2981   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
2982   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
2983   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
2984   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
2985   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
2986   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
2987   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
2988   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
2989   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
2990 
2991   if (ftype == MAT_FACTOR_LU) {
2992     B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
2993     B->factortype            = MAT_FACTOR_LU;
2994     if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij;
2995     else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij;
2996     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]));
2997     mumps->sym = 0;
2998   } else {
2999     B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
3000     B->factortype                  = MAT_FACTOR_CHOLESKY;
3001     if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij;
3002     else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij;
3003     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY]));
3004 #if defined(PETSC_USE_COMPLEX)
3005     mumps->sym = 2;
3006 #else
3007     if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1;
3008     else mumps->sym = 2;
3009 #endif
3010   }
3011 
3012   /* set solvertype */
3013   PetscCall(PetscFree(B->solvertype));
3014   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
3015   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
3016   if (size == 1) {
3017     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3018     B->canuseordering = PETSC_TRUE;
3019   }
3020   B->ops->destroy = MatDestroy_MUMPS;
3021   B->data         = (void *)mumps;
3022 
3023   *F               = B;
3024   mumps->id.job    = JOB_NULL;
3025   mumps->ICNTL_pre = NULL;
3026   mumps->CNTL_pre  = NULL;
3027   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
3028   PetscFunctionReturn(0);
3029 }
3030 
3031 /* MatGetFactor for Seq and MPI SBAIJ matrices */
3032 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A, MatFactorType ftype, Mat *F) {
3033   Mat         B;
3034   Mat_MUMPS  *mumps;
3035   PetscBool   isSeqSBAIJ;
3036   PetscMPIInt size;
3037 
3038   PetscFunctionBegin;
3039 #if defined(PETSC_USE_COMPLEX)
3040   PetscCheck(A->hermitian != PETSC_BOOL3_TRUE || A->symmetric == PETSC_BOOL3_TRUE, PETSC_COMM_SELF, PETSC_ERR_SUP, "Hermitian CHOLESKY Factor is not supported");
3041 #endif
3042   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
3043   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
3044   PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name));
3045   PetscCall(MatSetUp(B));
3046 
3047   PetscCall(PetscNewLog(B, &mumps));
3048   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &isSeqSBAIJ));
3049   if (isSeqSBAIJ) {
3050     mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij;
3051   } else {
3052     mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij;
3053   }
3054 
3055   B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
3056   B->ops->view                   = MatView_MUMPS;
3057   B->ops->getinfo                = MatGetInfo_MUMPS;
3058 
3059   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
3060   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
3061   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
3062   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
3063   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
3064   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
3065   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
3066   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
3067   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
3068   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
3069   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
3070   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
3071   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
3072 
3073   B->factortype = MAT_FACTOR_CHOLESKY;
3074 #if defined(PETSC_USE_COMPLEX)
3075   mumps->sym = 2;
3076 #else
3077   if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1;
3078   else mumps->sym = 2;
3079 #endif
3080 
3081   /* set solvertype */
3082   PetscCall(PetscFree(B->solvertype));
3083   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
3084   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
3085   if (size == 1) {
3086     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3087     B->canuseordering = PETSC_TRUE;
3088   }
3089   PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY]));
3090   B->ops->destroy = MatDestroy_MUMPS;
3091   B->data         = (void *)mumps;
3092 
3093   *F               = B;
3094   mumps->id.job    = JOB_NULL;
3095   mumps->ICNTL_pre = NULL;
3096   mumps->CNTL_pre  = NULL;
3097   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
3098   PetscFunctionReturn(0);
3099 }
3100 
3101 static PetscErrorCode MatGetFactor_baij_mumps(Mat A, MatFactorType ftype, Mat *F) {
3102   Mat         B;
3103   Mat_MUMPS  *mumps;
3104   PetscBool   isSeqBAIJ;
3105   PetscMPIInt size;
3106 
3107   PetscFunctionBegin;
3108   /* Create the factorization matrix */
3109   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &isSeqBAIJ));
3110   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
3111   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
3112   PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name));
3113   PetscCall(MatSetUp(B));
3114 
3115   PetscCall(PetscNewLog(B, &mumps));
3116   if (ftype == MAT_FACTOR_LU) {
3117     B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS;
3118     B->factortype            = MAT_FACTOR_LU;
3119     if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij;
3120     else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij;
3121     mumps->sym = 0;
3122     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]));
3123   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead");
3124 
3125   B->ops->view    = MatView_MUMPS;
3126   B->ops->getinfo = MatGetInfo_MUMPS;
3127 
3128   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
3129   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
3130   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
3131   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
3132   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
3133   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
3134   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
3135   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
3136   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
3137   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
3138   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
3139   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
3140   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
3141 
3142   /* set solvertype */
3143   PetscCall(PetscFree(B->solvertype));
3144   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
3145   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
3146   if (size == 1) {
3147     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3148     B->canuseordering = PETSC_TRUE;
3149   }
3150   B->ops->destroy = MatDestroy_MUMPS;
3151   B->data         = (void *)mumps;
3152 
3153   *F               = B;
3154   mumps->id.job    = JOB_NULL;
3155   mumps->ICNTL_pre = NULL;
3156   mumps->CNTL_pre  = NULL;
3157   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
3158   PetscFunctionReturn(0);
3159 }
3160 
3161 /* MatGetFactor for Seq and MPI SELL matrices */
3162 static PetscErrorCode MatGetFactor_sell_mumps(Mat A, MatFactorType ftype, Mat *F) {
3163   Mat         B;
3164   Mat_MUMPS  *mumps;
3165   PetscBool   isSeqSELL;
3166   PetscMPIInt size;
3167 
3168   PetscFunctionBegin;
3169   /* Create the factorization matrix */
3170   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSELL, &isSeqSELL));
3171   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
3172   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
3173   PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name));
3174   PetscCall(MatSetUp(B));
3175 
3176   PetscCall(PetscNewLog(B, &mumps));
3177 
3178   B->ops->view    = MatView_MUMPS;
3179   B->ops->getinfo = MatGetInfo_MUMPS;
3180 
3181   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
3182   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
3183   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
3184   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
3185   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
3186   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
3187   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
3188   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
3189   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
3190   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
3191   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
3192 
3193   if (ftype == MAT_FACTOR_LU) {
3194     B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
3195     B->factortype            = MAT_FACTOR_LU;
3196     if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij;
3197     else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented");
3198     mumps->sym = 0;
3199     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]));
3200   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented");
3201 
3202   /* set solvertype */
3203   PetscCall(PetscFree(B->solvertype));
3204   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
3205   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
3206   if (size == 1) {
3207     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization  */
3208     B->canuseordering = PETSC_TRUE;
3209   }
3210   B->ops->destroy = MatDestroy_MUMPS;
3211   B->data         = (void *)mumps;
3212 
3213   *F               = B;
3214   mumps->id.job    = JOB_NULL;
3215   mumps->ICNTL_pre = NULL;
3216   mumps->CNTL_pre  = NULL;
3217   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
3218   PetscFunctionReturn(0);
3219 }
3220 
3221 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void) {
3222   PetscFunctionBegin;
3223   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
3224   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
3225   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps));
3226   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps));
3227   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPISBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps));
3228   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
3229   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
3230   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps));
3231   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps));
3232   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps));
3233   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSELL, MAT_FACTOR_LU, MatGetFactor_sell_mumps));
3234   PetscFunctionReturn(0);
3235 }
3236