xref: /petsc/src/mat/impls/aij/mpi/mumps/mumps.c (revision d2522c19e8fa9bca20aaca277941d9a63e71db6a)
1 
2 /*
3     Provides an interface to the MUMPS sparse solver
4 */
5 #include <petscpkg_version.h>
6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I  "petscmat.h"  I*/
7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
8 #include <../src/mat/impls/sell/mpi/mpisell.h>
9 
10 EXTERN_C_BEGIN
11 #if defined(PETSC_USE_COMPLEX)
12 #if defined(PETSC_USE_REAL_SINGLE)
13 #include <cmumps_c.h>
14 #else
15 #include <zmumps_c.h>
16 #endif
17 #else
18 #if defined(PETSC_USE_REAL_SINGLE)
19 #include <smumps_c.h>
20 #else
21 #include <dmumps_c.h>
22 #endif
23 #endif
24 EXTERN_C_END
25 #define JOB_INIT         -1
26 #define JOB_NULL         0
27 #define JOB_FACTSYMBOLIC 1
28 #define JOB_FACTNUMERIC  2
29 #define JOB_SOLVE        3
30 #define JOB_END          -2
31 
32 /* calls to MUMPS */
33 #if defined(PETSC_USE_COMPLEX)
34 #if defined(PETSC_USE_REAL_SINGLE)
35 #define MUMPS_c cmumps_c
36 #else
37 #define MUMPS_c zmumps_c
38 #endif
39 #else
40 #if defined(PETSC_USE_REAL_SINGLE)
41 #define MUMPS_c smumps_c
42 #else
43 #define MUMPS_c dmumps_c
44 #endif
45 #endif
46 
47 /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for
48    number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the
49    naming convention in PetscMPIInt, PetscBLASInt etc.
50 */
51 typedef MUMPS_INT PetscMUMPSInt;
52 
53 #if PETSC_PKG_MUMPS_VERSION_GE(5, 3, 0)
54 #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */
55 #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out"
56 #endif
57 #else
58 #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */
59 #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out"
60 #endif
61 #endif
62 
63 #define MPIU_MUMPSINT       MPI_INT
64 #define PETSC_MUMPS_INT_MAX 2147483647
65 #define PETSC_MUMPS_INT_MIN -2147483648
66 
67 /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/
68 static inline PetscErrorCode PetscMUMPSIntCast(PetscInt a, PetscMUMPSInt *b) {
69   PetscFunctionBegin;
70 #if PetscDefined(USE_64BIT_INDICES)
71   PetscAssert(a <= PETSC_MUMPS_INT_MAX && a >= PETSC_MUMPS_INT_MIN, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt");
72 #endif
73   *b = (PetscMUMPSInt)(a);
74   PetscFunctionReturn(0);
75 }
76 
77 /* Put these utility routines here since they are only used in this file */
78 static inline PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems *PetscOptionsObject, const char opt[], const char text[], const char man[], PetscMUMPSInt currentvalue, PetscMUMPSInt *value, PetscBool *set, PetscMUMPSInt lb, PetscMUMPSInt ub) {
79   PetscInt  myval;
80   PetscBool myset;
81   PetscFunctionBegin;
82   /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */
83   PetscCall(PetscOptionsInt_Private(PetscOptionsObject, opt, text, man, (PetscInt)currentvalue, &myval, &myset, lb, ub));
84   if (myset) PetscCall(PetscMUMPSIntCast(myval, value));
85   if (set) *set = myset;
86   PetscFunctionReturn(0);
87 }
88 #define PetscOptionsMUMPSInt(a, b, c, d, e, f) PetscOptionsMUMPSInt_Private(PetscOptionsObject, a, b, c, d, e, f, PETSC_MUMPS_INT_MIN, PETSC_MUMPS_INT_MAX)
89 
90 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */
91 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
92 #define PetscMUMPS_c(mumps) \
93   do { \
94     if (mumps->use_petsc_omp_support) { \
95       if (mumps->is_omp_master) { \
96         PetscCall(PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl)); \
97         MUMPS_c(&mumps->id); \
98         PetscCall(PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl)); \
99       } \
100       PetscCall(PetscOmpCtrlBarrier(mumps->omp_ctrl)); \
101       /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific      \
102          to processes, so we only Bcast info[1], an error code and leave others (since they do not have   \
103          an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82.                   \
104          omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \
105       */ \
106       PetscCallMPI(MPI_Bcast(mumps->id.infog, 40, MPIU_MUMPSINT, 0, mumps->omp_comm)); \
107       PetscCallMPI(MPI_Bcast(mumps->id.rinfog, 20, MPIU_REAL, 0, mumps->omp_comm)); \
108       PetscCallMPI(MPI_Bcast(mumps->id.info, 1, MPIU_MUMPSINT, 0, mumps->omp_comm)); \
109     } else { \
110       MUMPS_c(&mumps->id); \
111     } \
112   } while (0)
113 #else
114 #define PetscMUMPS_c(mumps) \
115   do { MUMPS_c(&mumps->id); } while (0)
116 #endif
117 
118 /* declare MumpsScalar */
119 #if defined(PETSC_USE_COMPLEX)
120 #if defined(PETSC_USE_REAL_SINGLE)
121 #define MumpsScalar mumps_complex
122 #else
123 #define MumpsScalar mumps_double_complex
124 #endif
125 #else
126 #define MumpsScalar PetscScalar
127 #endif
128 
129 /* macros s.t. indices match MUMPS documentation */
130 #define ICNTL(I)  icntl[(I)-1]
131 #define CNTL(I)   cntl[(I)-1]
132 #define INFOG(I)  infog[(I)-1]
133 #define INFO(I)   info[(I)-1]
134 #define RINFOG(I) rinfog[(I)-1]
135 #define RINFO(I)  rinfo[(I)-1]
136 
137 typedef struct Mat_MUMPS Mat_MUMPS;
138 struct Mat_MUMPS {
139 #if defined(PETSC_USE_COMPLEX)
140 #if defined(PETSC_USE_REAL_SINGLE)
141   CMUMPS_STRUC_C id;
142 #else
143   ZMUMPS_STRUC_C id;
144 #endif
145 #else
146 #if defined(PETSC_USE_REAL_SINGLE)
147   SMUMPS_STRUC_C id;
148 #else
149   DMUMPS_STRUC_C id;
150 #endif
151 #endif
152 
153   MatStructure   matstruc;
154   PetscMPIInt    myid, petsc_size;
155   PetscMUMPSInt *irn, *jcn;       /* the (i,j,v) triplets passed to mumps. */
156   PetscScalar   *val, *val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */
157   PetscInt64     nnz;             /* number of nonzeros. The type is called selective 64-bit in mumps */
158   PetscMUMPSInt  sym;
159   MPI_Comm       mumps_comm;
160   PetscMUMPSInt *ICNTL_pre;
161   PetscReal     *CNTL_pre;
162   PetscMUMPSInt  ICNTL9_pre;         /* check if ICNTL(9) is changed from previous MatSolve */
163   VecScatter     scat_rhs, scat_sol; /* used by MatSolve() */
164   PetscMUMPSInt  ICNTL20;            /* use centralized (0) or distributed (10) dense RHS */
165   PetscMUMPSInt  lrhs_loc, nloc_rhs, *irhs_loc;
166 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
167   PetscInt    *rhs_nrow, max_nrhs;
168   PetscMPIInt *rhs_recvcounts, *rhs_disps;
169   PetscScalar *rhs_loc, *rhs_recvbuf;
170 #endif
171   Vec            b_seq, x_seq;
172   PetscInt       ninfo, *info; /* which INFO to display */
173   PetscInt       sizeredrhs;
174   PetscScalar   *schur_sol;
175   PetscInt       schur_sizesol;
176   PetscMUMPSInt *ia_alloc, *ja_alloc; /* work arrays used for the CSR struct for sparse rhs */
177   PetscInt64     cur_ilen, cur_jlen;  /* current len of ia_alloc[], ja_alloc[] */
178   PetscErrorCode (*ConvertToTriples)(Mat, PetscInt, MatReuse, Mat_MUMPS *);
179 
180   /* stuff used by petsc/mumps OpenMP support*/
181   PetscBool    use_petsc_omp_support;
182   PetscOmpCtrl omp_ctrl;             /* an OpenMP controler that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */
183   MPI_Comm     petsc_comm, omp_comm; /* petsc_comm is petsc matrix's comm */
184   PetscInt64  *recvcount;            /* a collection of nnz on omp_master */
185   PetscMPIInt  tag, omp_comm_size;
186   PetscBool    is_omp_master; /* is this rank the master of omp_comm */
187   MPI_Request *reqs;
188 };
189 
190 /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt.
191    Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices.
192  */
193 static PetscErrorCode PetscMUMPSIntCSRCast(Mat_MUMPS *mumps, PetscInt nrow, PetscInt *ia, PetscInt *ja, PetscMUMPSInt **ia_mumps, PetscMUMPSInt **ja_mumps, PetscMUMPSInt *nnz_mumps) {
194   PetscInt nnz = ia[nrow] - 1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscInt64 since mumps only uses PetscMUMPSInt for rhs */
195 
196   PetscFunctionBegin;
197 #if defined(PETSC_USE_64BIT_INDICES)
198   {
199     PetscInt i;
200     if (nrow + 1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */
201       PetscCall(PetscFree(mumps->ia_alloc));
202       PetscCall(PetscMalloc1(nrow + 1, &mumps->ia_alloc));
203       mumps->cur_ilen = nrow + 1;
204     }
205     if (nnz > mumps->cur_jlen) {
206       PetscCall(PetscFree(mumps->ja_alloc));
207       PetscCall(PetscMalloc1(nnz, &mumps->ja_alloc));
208       mumps->cur_jlen = nnz;
209     }
210     for (i = 0; i < nrow + 1; i++) PetscCall(PetscMUMPSIntCast(ia[i], &(mumps->ia_alloc[i])));
211     for (i = 0; i < nnz; i++) PetscCall(PetscMUMPSIntCast(ja[i], &(mumps->ja_alloc[i])));
212     *ia_mumps = mumps->ia_alloc;
213     *ja_mumps = mumps->ja_alloc;
214   }
215 #else
216   *ia_mumps          = ia;
217   *ja_mumps          = ja;
218 #endif
219   PetscCall(PetscMUMPSIntCast(nnz, nnz_mumps));
220   PetscFunctionReturn(0);
221 }
222 
223 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS *mumps) {
224   PetscFunctionBegin;
225   PetscCall(PetscFree(mumps->id.listvar_schur));
226   PetscCall(PetscFree(mumps->id.redrhs));
227   PetscCall(PetscFree(mumps->schur_sol));
228   mumps->id.size_schur = 0;
229   mumps->id.schur_lld  = 0;
230   mumps->id.ICNTL(19)  = 0;
231   PetscFunctionReturn(0);
232 }
233 
234 /* solve with rhs in mumps->id.redrhs and return in the same location */
235 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F) {
236   Mat_MUMPS           *mumps = (Mat_MUMPS *)F->data;
237   Mat                  S, B, X;
238   MatFactorSchurStatus schurstatus;
239   PetscInt             sizesol;
240 
241   PetscFunctionBegin;
242   PetscCall(MatFactorFactorizeSchurComplement(F));
243   PetscCall(MatFactorGetSchurComplement(F, &S, &schurstatus));
244   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &B));
245   PetscCall(MatSetType(B, ((PetscObject)S)->type_name));
246 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
247   PetscCall(MatBindToCPU(B, S->boundtocpu));
248 #endif
249   switch (schurstatus) {
250   case MAT_FACTOR_SCHUR_FACTORED: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &X)); PetscCall(MatSetType(X, ((PetscObject)S)->type_name));
251 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
252     PetscCall(MatBindToCPU(X, S->boundtocpu));
253 #endif
254     if (!mumps->id.ICNTL(9)) { /* transpose solve */
255       PetscCall(MatMatSolveTranspose(S, B, X));
256     } else {
257       PetscCall(MatMatSolve(S, B, X));
258     }
259     break;
260   case MAT_FACTOR_SCHUR_INVERTED:
261     sizesol = mumps->id.nrhs * mumps->id.size_schur;
262     if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) {
263       PetscCall(PetscFree(mumps->schur_sol));
264       PetscCall(PetscMalloc1(sizesol, &mumps->schur_sol));
265       mumps->schur_sizesol = sizesol;
266     }
267     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->schur_sol, &X));
268     PetscCall(MatSetType(X, ((PetscObject)S)->type_name));
269 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
270     PetscCall(MatBindToCPU(X, S->boundtocpu));
271 #endif
272     PetscCall(MatProductCreateWithMat(S, B, NULL, X));
273     if (!mumps->id.ICNTL(9)) { /* transpose solve */
274       PetscCall(MatProductSetType(X, MATPRODUCT_AtB));
275     } else {
276       PetscCall(MatProductSetType(X, MATPRODUCT_AB));
277     }
278     PetscCall(MatProductSetFromOptions(X));
279     PetscCall(MatProductSymbolic(X));
280     PetscCall(MatProductNumeric(X));
281 
282     PetscCall(MatCopy(X, B, SAME_NONZERO_PATTERN));
283     break;
284   default: SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unhandled MatFactorSchurStatus %d", F->schur_status);
285   }
286   PetscCall(MatFactorRestoreSchurComplement(F, &S, schurstatus));
287   PetscCall(MatDestroy(&B));
288   PetscCall(MatDestroy(&X));
289   PetscFunctionReturn(0);
290 }
291 
292 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion) {
293   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
294 
295   PetscFunctionBegin;
296   if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */
297     PetscFunctionReturn(0);
298   }
299   if (!expansion) { /* prepare for the condensation step */
300     PetscInt sizeredrhs = mumps->id.nrhs * mumps->id.size_schur;
301     /* allocate MUMPS internal array to store reduced right-hand sides */
302     if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) {
303       PetscCall(PetscFree(mumps->id.redrhs));
304       mumps->id.lredrhs = mumps->id.size_schur;
305       PetscCall(PetscMalloc1(mumps->id.nrhs * mumps->id.lredrhs, &mumps->id.redrhs));
306       mumps->sizeredrhs = mumps->id.nrhs * mumps->id.lredrhs;
307     }
308     mumps->id.ICNTL(26) = 1; /* condensation phase */
309   } else {                   /* prepare for the expansion step */
310     /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */
311     PetscCall(MatMumpsSolveSchur_Private(F));
312     mumps->id.ICNTL(26) = 2; /* expansion phase */
313     PetscMUMPS_c(mumps);
314     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1));
315     /* restore defaults */
316     mumps->id.ICNTL(26) = -1;
317     /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */
318     if (mumps->id.nrhs > 1) {
319       PetscCall(PetscFree(mumps->id.redrhs));
320       mumps->id.lredrhs = 0;
321       mumps->sizeredrhs = 0;
322     }
323   }
324   PetscFunctionReturn(0);
325 }
326 
327 /*
328   MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz]
329 
330   input:
331     A       - matrix in aij,baij or sbaij format
332     shift   - 0: C style output triple; 1: Fortran style output triple.
333     reuse   - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple
334               MAT_REUSE_MATRIX:   only the values in v array are updated
335   output:
336     nnz     - dim of r, c, and v (number of local nonzero entries of A)
337     r, c, v - row and col index, matrix values (matrix triples)
338 
339   The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is
340   freed with PetscFree(mumps->irn);  This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means
341   that the PetscMalloc() cannot easily be replaced with a PetscMalloc3().
342 
343  */
344 
345 PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) {
346   const PetscScalar *av;
347   const PetscInt    *ai, *aj, *ajj, M = A->rmap->n;
348   PetscInt64         nz, rnz, i, j, k;
349   PetscMUMPSInt     *row, *col;
350   Mat_SeqAIJ        *aa = (Mat_SeqAIJ *)A->data;
351 
352   PetscFunctionBegin;
353   PetscCall(MatSeqAIJGetArrayRead(A, &av));
354   mumps->val = (PetscScalar *)av;
355   if (reuse == MAT_INITIAL_MATRIX) {
356     nz = aa->nz;
357     ai = aa->i;
358     aj = aa->j;
359     PetscCall(PetscMalloc2(nz, &row, nz, &col));
360     for (i = k = 0; i < M; i++) {
361       rnz = ai[i + 1] - ai[i];
362       ajj = aj + ai[i];
363       for (j = 0; j < rnz; j++) {
364         PetscCall(PetscMUMPSIntCast(i + shift, &row[k]));
365         PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[k]));
366         k++;
367       }
368     }
369     mumps->irn = row;
370     mumps->jcn = col;
371     mumps->nnz = nz;
372   }
373   PetscCall(MatSeqAIJRestoreArrayRead(A, &av));
374   PetscFunctionReturn(0);
375 }
376 
377 PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) {
378   PetscInt64     nz, i, j, k, r;
379   Mat_SeqSELL   *a = (Mat_SeqSELL *)A->data;
380   PetscMUMPSInt *row, *col;
381 
382   PetscFunctionBegin;
383   mumps->val = a->val;
384   if (reuse == MAT_INITIAL_MATRIX) {
385     nz = a->sliidx[a->totalslices];
386     PetscCall(PetscMalloc2(nz, &row, nz, &col));
387     for (i = k = 0; i < a->totalslices; i++) {
388       for (j = a->sliidx[i], r = 0; j < a->sliidx[i + 1]; j++, r = ((r + 1) & 0x07)) { PetscCall(PetscMUMPSIntCast(8 * i + r + shift, &row[k++])); }
389     }
390     for (i = 0; i < nz; i++) PetscCall(PetscMUMPSIntCast(a->colidx[i] + shift, &col[i]));
391     mumps->irn = row;
392     mumps->jcn = col;
393     mumps->nnz = nz;
394   }
395   PetscFunctionReturn(0);
396 }
397 
398 PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) {
399   Mat_SeqBAIJ    *aa = (Mat_SeqBAIJ *)A->data;
400   const PetscInt *ai, *aj, *ajj, bs2 = aa->bs2;
401   PetscInt64      M, nz, idx = 0, rnz, i, j, k, m;
402   PetscInt        bs;
403   PetscMUMPSInt  *row, *col;
404 
405   PetscFunctionBegin;
406   PetscCall(MatGetBlockSize(A, &bs));
407   M          = A->rmap->N / bs;
408   mumps->val = aa->a;
409   if (reuse == MAT_INITIAL_MATRIX) {
410     ai = aa->i;
411     aj = aa->j;
412     nz = bs2 * aa->nz;
413     PetscCall(PetscMalloc2(nz, &row, nz, &col));
414     for (i = 0; i < M; i++) {
415       ajj = aj + ai[i];
416       rnz = ai[i + 1] - ai[i];
417       for (k = 0; k < rnz; k++) {
418         for (j = 0; j < bs; j++) {
419           for (m = 0; m < bs; m++) {
420             PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[idx]));
421             PetscCall(PetscMUMPSIntCast(bs * ajj[k] + j + shift, &col[idx]));
422             idx++;
423           }
424         }
425       }
426     }
427     mumps->irn = row;
428     mumps->jcn = col;
429     mumps->nnz = nz;
430   }
431   PetscFunctionReturn(0);
432 }
433 
434 PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) {
435   const PetscInt *ai, *aj, *ajj;
436   PetscInt        bs;
437   PetscInt64      nz, rnz, i, j, k, m;
438   PetscMUMPSInt  *row, *col;
439   PetscScalar    *val;
440   Mat_SeqSBAIJ   *aa  = (Mat_SeqSBAIJ *)A->data;
441   const PetscInt  bs2 = aa->bs2, mbs = aa->mbs;
442 #if defined(PETSC_USE_COMPLEX)
443   PetscBool isset, hermitian;
444 #endif
445 
446   PetscFunctionBegin;
447 #if defined(PETSC_USE_COMPLEX)
448   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
449   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
450 #endif
451   ai = aa->i;
452   aj = aa->j;
453   PetscCall(MatGetBlockSize(A, &bs));
454   if (reuse == MAT_INITIAL_MATRIX) {
455     nz = aa->nz;
456     PetscCall(PetscMalloc2(bs2 * nz, &row, bs2 * nz, &col));
457     if (bs > 1) {
458       PetscCall(PetscMalloc1(bs2 * nz, &mumps->val_alloc));
459       mumps->val = mumps->val_alloc;
460     } else {
461       mumps->val = aa->a;
462     }
463     mumps->irn = row;
464     mumps->jcn = col;
465   } else {
466     if (bs == 1) mumps->val = aa->a;
467     row = mumps->irn;
468     col = mumps->jcn;
469   }
470   val = mumps->val;
471 
472   nz = 0;
473   if (bs > 1) {
474     for (i = 0; i < mbs; i++) {
475       rnz = ai[i + 1] - ai[i];
476       ajj = aj + ai[i];
477       for (j = 0; j < rnz; j++) {
478         for (k = 0; k < bs; k++) {
479           for (m = 0; m < bs; m++) {
480             if (ajj[j] > i || k >= m) {
481               if (reuse == MAT_INITIAL_MATRIX) {
482                 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[nz]));
483                 PetscCall(PetscMUMPSIntCast(ajj[j] * bs + k + shift, &col[nz]));
484               }
485               val[nz++] = aa->a[(ai[i] + j) * bs2 + m + k * bs];
486             }
487           }
488         }
489       }
490     }
491   } else if (reuse == MAT_INITIAL_MATRIX) {
492     for (i = 0; i < mbs; i++) {
493       rnz = ai[i + 1] - ai[i];
494       ajj = aj + ai[i];
495       for (j = 0; j < rnz; j++) {
496         PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
497         PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz]));
498         nz++;
499       }
500     }
501     PetscCheck(nz == aa->nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different numbers of nonzeros %" PetscInt64_FMT " != %" PetscInt_FMT, nz, aa->nz);
502   }
503   if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz;
504   PetscFunctionReturn(0);
505 }
506 
507 PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) {
508   const PetscInt    *ai, *aj, *ajj, *adiag, M = A->rmap->n;
509   PetscInt64         nz, rnz, i, j;
510   const PetscScalar *av, *v1;
511   PetscScalar       *val;
512   PetscMUMPSInt     *row, *col;
513   Mat_SeqAIJ        *aa = (Mat_SeqAIJ *)A->data;
514   PetscBool          missing;
515 #if defined(PETSC_USE_COMPLEX)
516   PetscBool hermitian, isset;
517 #endif
518 
519   PetscFunctionBegin;
520 #if defined(PETSC_USE_COMPLEX)
521   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
522   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
523 #endif
524   PetscCall(MatSeqAIJGetArrayRead(A, &av));
525   ai    = aa->i;
526   aj    = aa->j;
527   adiag = aa->diag;
528   PetscCall(MatMissingDiagonal_SeqAIJ(A, &missing, NULL));
529   if (reuse == MAT_INITIAL_MATRIX) {
530     /* count nz in the upper triangular part of A */
531     nz = 0;
532     if (missing) {
533       for (i = 0; i < M; i++) {
534         if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
535           for (j = ai[i]; j < ai[i + 1]; j++) {
536             if (aj[j] < i) continue;
537             nz++;
538           }
539         } else {
540           nz += ai[i + 1] - adiag[i];
541         }
542       }
543     } else {
544       for (i = 0; i < M; i++) nz += ai[i + 1] - adiag[i];
545     }
546     PetscCall(PetscMalloc2(nz, &row, nz, &col));
547     PetscCall(PetscMalloc1(nz, &val));
548     mumps->nnz = nz;
549     mumps->irn = row;
550     mumps->jcn = col;
551     mumps->val = mumps->val_alloc = val;
552 
553     nz = 0;
554     if (missing) {
555       for (i = 0; i < M; i++) {
556         if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
557           for (j = ai[i]; j < ai[i + 1]; j++) {
558             if (aj[j] < i) continue;
559             PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
560             PetscCall(PetscMUMPSIntCast(aj[j] + shift, &col[nz]));
561             val[nz] = av[j];
562             nz++;
563           }
564         } else {
565           rnz = ai[i + 1] - adiag[i];
566           ajj = aj + adiag[i];
567           v1  = av + adiag[i];
568           for (j = 0; j < rnz; j++) {
569             PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
570             PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz]));
571             val[nz++] = v1[j];
572           }
573         }
574       }
575     } else {
576       for (i = 0; i < M; i++) {
577         rnz = ai[i + 1] - adiag[i];
578         ajj = aj + adiag[i];
579         v1  = av + adiag[i];
580         for (j = 0; j < rnz; j++) {
581           PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
582           PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz]));
583           val[nz++] = v1[j];
584         }
585       }
586     }
587   } else {
588     nz  = 0;
589     val = mumps->val;
590     if (missing) {
591       for (i = 0; i < M; i++) {
592         if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
593           for (j = ai[i]; j < ai[i + 1]; j++) {
594             if (aj[j] < i) continue;
595             val[nz++] = av[j];
596           }
597         } else {
598           rnz = ai[i + 1] - adiag[i];
599           v1  = av + adiag[i];
600           for (j = 0; j < rnz; j++) { val[nz++] = v1[j]; }
601         }
602       }
603     } else {
604       for (i = 0; i < M; i++) {
605         rnz = ai[i + 1] - adiag[i];
606         v1  = av + adiag[i];
607         for (j = 0; j < rnz; j++) { val[nz++] = v1[j]; }
608       }
609     }
610   }
611   PetscCall(MatSeqAIJRestoreArrayRead(A, &av));
612   PetscFunctionReturn(0);
613 }
614 
615 PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) {
616   const PetscInt    *ai, *aj, *bi, *bj, *garray, *ajj, *bjj;
617   PetscInt           bs;
618   PetscInt64         rstart, nz, i, j, k, m, jj, irow, countA, countB;
619   PetscMUMPSInt     *row, *col;
620   const PetscScalar *av, *bv, *v1, *v2;
621   PetscScalar       *val;
622   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ *)A->data;
623   Mat_SeqSBAIJ      *aa  = (Mat_SeqSBAIJ *)(mat->A)->data;
624   Mat_SeqBAIJ       *bb  = (Mat_SeqBAIJ *)(mat->B)->data;
625   const PetscInt     bs2 = aa->bs2, mbs = aa->mbs;
626 #if defined(PETSC_USE_COMPLEX)
627   PetscBool hermitian, isset;
628 #endif
629 
630   PetscFunctionBegin;
631 #if defined(PETSC_USE_COMPLEX)
632   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
633   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
634 #endif
635   PetscCall(MatGetBlockSize(A, &bs));
636   rstart = A->rmap->rstart;
637   ai     = aa->i;
638   aj     = aa->j;
639   bi     = bb->i;
640   bj     = bb->j;
641   av     = aa->a;
642   bv     = bb->a;
643 
644   garray = mat->garray;
645 
646   if (reuse == MAT_INITIAL_MATRIX) {
647     nz = (aa->nz + bb->nz) * bs2; /* just a conservative estimate */
648     PetscCall(PetscMalloc2(nz, &row, nz, &col));
649     PetscCall(PetscMalloc1(nz, &val));
650     /* can not decide the exact mumps->nnz now because of the SBAIJ */
651     mumps->irn = row;
652     mumps->jcn = col;
653     mumps->val = mumps->val_alloc = val;
654   } else {
655     val = mumps->val;
656   }
657 
658   jj   = 0;
659   irow = rstart;
660   for (i = 0; i < mbs; i++) {
661     ajj    = aj + ai[i]; /* ptr to the beginning of this row */
662     countA = ai[i + 1] - ai[i];
663     countB = bi[i + 1] - bi[i];
664     bjj    = bj + bi[i];
665     v1     = av + ai[i] * bs2;
666     v2     = bv + bi[i] * bs2;
667 
668     if (bs > 1) {
669       /* A-part */
670       for (j = 0; j < countA; j++) {
671         for (k = 0; k < bs; k++) {
672           for (m = 0; m < bs; m++) {
673             if (rstart + ajj[j] * bs > irow || k >= m) {
674               if (reuse == MAT_INITIAL_MATRIX) {
675                 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj]));
676                 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] * bs + k + shift, &col[jj]));
677               }
678               val[jj++] = v1[j * bs2 + m + k * bs];
679             }
680           }
681         }
682       }
683 
684       /* B-part */
685       for (j = 0; j < countB; j++) {
686         for (k = 0; k < bs; k++) {
687           for (m = 0; m < bs; m++) {
688             if (reuse == MAT_INITIAL_MATRIX) {
689               PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj]));
690               PetscCall(PetscMUMPSIntCast(garray[bjj[j]] * bs + k + shift, &col[jj]));
691             }
692             val[jj++] = v2[j * bs2 + m + k * bs];
693           }
694         }
695       }
696     } else {
697       /* A-part */
698       for (j = 0; j < countA; j++) {
699         if (reuse == MAT_INITIAL_MATRIX) {
700           PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
701           PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj]));
702         }
703         val[jj++] = v1[j];
704       }
705 
706       /* B-part */
707       for (j = 0; j < countB; j++) {
708         if (reuse == MAT_INITIAL_MATRIX) {
709           PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
710           PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]));
711         }
712         val[jj++] = v2[j];
713       }
714     }
715     irow += bs;
716   }
717   mumps->nnz = jj;
718   PetscFunctionReturn(0);
719 }
720 
721 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) {
722   const PetscInt    *ai, *aj, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj;
723   PetscInt64         rstart, nz, i, j, jj, irow, countA, countB;
724   PetscMUMPSInt     *row, *col;
725   const PetscScalar *av, *bv, *v1, *v2;
726   PetscScalar       *val;
727   Mat                Ad, Ao;
728   Mat_SeqAIJ        *aa;
729   Mat_SeqAIJ        *bb;
730 
731   PetscFunctionBegin;
732   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray));
733   PetscCall(MatSeqAIJGetArrayRead(Ad, &av));
734   PetscCall(MatSeqAIJGetArrayRead(Ao, &bv));
735 
736   aa = (Mat_SeqAIJ *)(Ad)->data;
737   bb = (Mat_SeqAIJ *)(Ao)->data;
738   ai = aa->i;
739   aj = aa->j;
740   bi = bb->i;
741   bj = bb->j;
742 
743   rstart = A->rmap->rstart;
744 
745   if (reuse == MAT_INITIAL_MATRIX) {
746     nz = (PetscInt64)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */
747     PetscCall(PetscMalloc2(nz, &row, nz, &col));
748     PetscCall(PetscMalloc1(nz, &val));
749     mumps->nnz = nz;
750     mumps->irn = row;
751     mumps->jcn = col;
752     mumps->val = mumps->val_alloc = val;
753   } else {
754     val = mumps->val;
755   }
756 
757   jj   = 0;
758   irow = rstart;
759   for (i = 0; i < m; i++) {
760     ajj    = aj + ai[i]; /* ptr to the beginning of this row */
761     countA = ai[i + 1] - ai[i];
762     countB = bi[i + 1] - bi[i];
763     bjj    = bj + bi[i];
764     v1     = av + ai[i];
765     v2     = bv + bi[i];
766 
767     /* A-part */
768     for (j = 0; j < countA; j++) {
769       if (reuse == MAT_INITIAL_MATRIX) {
770         PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
771         PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj]));
772       }
773       val[jj++] = v1[j];
774     }
775 
776     /* B-part */
777     for (j = 0; j < countB; j++) {
778       if (reuse == MAT_INITIAL_MATRIX) {
779         PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
780         PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]));
781       }
782       val[jj++] = v2[j];
783     }
784     irow++;
785   }
786   PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av));
787   PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv));
788   PetscFunctionReturn(0);
789 }
790 
791 PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) {
792   Mat_MPIBAIJ       *mat = (Mat_MPIBAIJ *)A->data;
793   Mat_SeqBAIJ       *aa  = (Mat_SeqBAIJ *)(mat->A)->data;
794   Mat_SeqBAIJ       *bb  = (Mat_SeqBAIJ *)(mat->B)->data;
795   const PetscInt    *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j, *ajj, *bjj;
796   const PetscInt    *garray = mat->garray, mbs = mat->mbs, rstart = A->rmap->rstart;
797   const PetscInt     bs2 = mat->bs2;
798   PetscInt           bs;
799   PetscInt64         nz, i, j, k, n, jj, irow, countA, countB, idx;
800   PetscMUMPSInt     *row, *col;
801   const PetscScalar *av = aa->a, *bv = bb->a, *v1, *v2;
802   PetscScalar       *val;
803 
804   PetscFunctionBegin;
805   PetscCall(MatGetBlockSize(A, &bs));
806   if (reuse == MAT_INITIAL_MATRIX) {
807     nz = bs2 * (aa->nz + bb->nz);
808     PetscCall(PetscMalloc2(nz, &row, nz, &col));
809     PetscCall(PetscMalloc1(nz, &val));
810     mumps->nnz = nz;
811     mumps->irn = row;
812     mumps->jcn = col;
813     mumps->val = mumps->val_alloc = val;
814   } else {
815     val = mumps->val;
816   }
817 
818   jj   = 0;
819   irow = rstart;
820   for (i = 0; i < mbs; i++) {
821     countA = ai[i + 1] - ai[i];
822     countB = bi[i + 1] - bi[i];
823     ajj    = aj + ai[i];
824     bjj    = bj + bi[i];
825     v1     = av + bs2 * ai[i];
826     v2     = bv + bs2 * bi[i];
827 
828     idx = 0;
829     /* A-part */
830     for (k = 0; k < countA; k++) {
831       for (j = 0; j < bs; j++) {
832         for (n = 0; n < bs; n++) {
833           if (reuse == MAT_INITIAL_MATRIX) {
834             PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj]));
835             PetscCall(PetscMUMPSIntCast(rstart + bs * ajj[k] + j + shift, &col[jj]));
836           }
837           val[jj++] = v1[idx++];
838         }
839       }
840     }
841 
842     idx = 0;
843     /* B-part */
844     for (k = 0; k < countB; k++) {
845       for (j = 0; j < bs; j++) {
846         for (n = 0; n < bs; n++) {
847           if (reuse == MAT_INITIAL_MATRIX) {
848             PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj]));
849             PetscCall(PetscMUMPSIntCast(bs * garray[bjj[k]] + j + shift, &col[jj]));
850           }
851           val[jj++] = v2[idx++];
852         }
853       }
854     }
855     irow += bs;
856   }
857   PetscFunctionReturn(0);
858 }
859 
860 PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) {
861   const PetscInt    *ai, *aj, *adiag, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj;
862   PetscInt64         rstart, nz, nza, nzb, i, j, jj, irow, countA, countB;
863   PetscMUMPSInt     *row, *col;
864   const PetscScalar *av, *bv, *v1, *v2;
865   PetscScalar       *val;
866   Mat                Ad, Ao;
867   Mat_SeqAIJ        *aa;
868   Mat_SeqAIJ        *bb;
869 #if defined(PETSC_USE_COMPLEX)
870   PetscBool hermitian, isset;
871 #endif
872 
873   PetscFunctionBegin;
874 #if defined(PETSC_USE_COMPLEX)
875   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
876   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
877 #endif
878   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray));
879   PetscCall(MatSeqAIJGetArrayRead(Ad, &av));
880   PetscCall(MatSeqAIJGetArrayRead(Ao, &bv));
881 
882   aa    = (Mat_SeqAIJ *)(Ad)->data;
883   bb    = (Mat_SeqAIJ *)(Ao)->data;
884   ai    = aa->i;
885   aj    = aa->j;
886   adiag = aa->diag;
887   bi    = bb->i;
888   bj    = bb->j;
889 
890   rstart = A->rmap->rstart;
891 
892   if (reuse == MAT_INITIAL_MATRIX) {
893     nza = 0; /* num of upper triangular entries in mat->A, including diagonals */
894     nzb = 0; /* num of upper triangular entries in mat->B */
895     for (i = 0; i < m; i++) {
896       nza += (ai[i + 1] - adiag[i]);
897       countB = bi[i + 1] - bi[i];
898       bjj    = bj + bi[i];
899       for (j = 0; j < countB; j++) {
900         if (garray[bjj[j]] > rstart) nzb++;
901       }
902     }
903 
904     nz = nza + nzb; /* total nz of upper triangular part of mat */
905     PetscCall(PetscMalloc2(nz, &row, nz, &col));
906     PetscCall(PetscMalloc1(nz, &val));
907     mumps->nnz = nz;
908     mumps->irn = row;
909     mumps->jcn = col;
910     mumps->val = mumps->val_alloc = val;
911   } else {
912     val = mumps->val;
913   }
914 
915   jj   = 0;
916   irow = rstart;
917   for (i = 0; i < m; i++) {
918     ajj    = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */
919     v1     = av + adiag[i];
920     countA = ai[i + 1] - adiag[i];
921     countB = bi[i + 1] - bi[i];
922     bjj    = bj + bi[i];
923     v2     = bv + bi[i];
924 
925     /* A-part */
926     for (j = 0; j < countA; j++) {
927       if (reuse == MAT_INITIAL_MATRIX) {
928         PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
929         PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj]));
930       }
931       val[jj++] = v1[j];
932     }
933 
934     /* B-part */
935     for (j = 0; j < countB; j++) {
936       if (garray[bjj[j]] > rstart) {
937         if (reuse == MAT_INITIAL_MATRIX) {
938           PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
939           PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]));
940         }
941         val[jj++] = v2[j];
942       }
943     }
944     irow++;
945   }
946   PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av));
947   PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv));
948   PetscFunctionReturn(0);
949 }
950 
951 PetscErrorCode MatDestroy_MUMPS(Mat A) {
952   Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;
953 
954   PetscFunctionBegin;
955   PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc));
956   PetscCall(VecScatterDestroy(&mumps->scat_rhs));
957   PetscCall(VecScatterDestroy(&mumps->scat_sol));
958   PetscCall(VecDestroy(&mumps->b_seq));
959   PetscCall(VecDestroy(&mumps->x_seq));
960   PetscCall(PetscFree(mumps->id.perm_in));
961   PetscCall(PetscFree2(mumps->irn, mumps->jcn));
962   PetscCall(PetscFree(mumps->val_alloc));
963   PetscCall(PetscFree(mumps->info));
964   PetscCall(PetscFree(mumps->ICNTL_pre));
965   PetscCall(PetscFree(mumps->CNTL_pre));
966   PetscCall(MatMumpsResetSchur_Private(mumps));
967   if (mumps->id.job != JOB_NULL) { /* cannot call PetscMUMPS_c() if JOB_INIT has never been called for this instance */
968     mumps->id.job = JOB_END;
969     PetscMUMPS_c(mumps);
970     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in MatDestroy_MUMPS: INFOG(1)=%d", mumps->id.INFOG(1));
971     if (mumps->mumps_comm != MPI_COMM_NULL) {
972       if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) PetscCallMPI(MPI_Comm_free(&mumps->mumps_comm));
973       else PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A), &mumps->mumps_comm));
974     }
975   }
976 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
977   if (mumps->use_petsc_omp_support) {
978     PetscCall(PetscOmpCtrlDestroy(&mumps->omp_ctrl));
979     PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf));
980     PetscCall(PetscFree3(mumps->rhs_nrow, mumps->rhs_recvcounts, mumps->rhs_disps));
981   }
982 #endif
983   PetscCall(PetscFree(mumps->ia_alloc));
984   PetscCall(PetscFree(mumps->ja_alloc));
985   PetscCall(PetscFree(mumps->recvcount));
986   PetscCall(PetscFree(mumps->reqs));
987   PetscCall(PetscFree(mumps->irhs_loc));
988   PetscCall(PetscFree(A->data));
989 
990   /* clear composed functions */
991   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
992   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorSetSchurIS_C", NULL));
993   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorCreateSchurComplement_C", NULL));
994   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetIcntl_C", NULL));
995   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetIcntl_C", NULL));
996   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetCntl_C", NULL));
997   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetCntl_C", NULL));
998   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfo_C", NULL));
999   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfog_C", NULL));
1000   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfo_C", NULL));
1001   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfog_C", NULL));
1002   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverse_C", NULL));
1003   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverseTranspose_C", NULL));
1004   PetscFunctionReturn(0);
1005 }
1006 
1007 /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */
1008 static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A, PetscInt nrhs, const PetscScalar *array) {
1009   Mat_MUMPS        *mumps   = (Mat_MUMPS *)A->data;
1010   const PetscMPIInt ompsize = mumps->omp_comm_size;
1011   PetscInt          i, m, M, rstart;
1012 
1013   PetscFunctionBegin;
1014   PetscCall(MatGetSize(A, &M, NULL));
1015   PetscCall(MatGetLocalSize(A, &m, NULL));
1016   PetscCheck(M <= PETSC_MUMPS_INT_MAX, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt");
1017   if (ompsize == 1) {
1018     if (!mumps->irhs_loc) {
1019       mumps->nloc_rhs = m;
1020       PetscCall(PetscMalloc1(m, &mumps->irhs_loc));
1021       PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
1022       for (i = 0; i < m; i++) mumps->irhs_loc[i] = rstart + i + 1; /* use 1-based indices */
1023     }
1024     mumps->id.rhs_loc = (MumpsScalar *)array;
1025   } else {
1026 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1027     const PetscInt *ranges;
1028     PetscMPIInt     j, k, sendcount, *petsc_ranks, *omp_ranks;
1029     MPI_Group       petsc_group, omp_group;
1030     PetscScalar    *recvbuf = NULL;
1031 
1032     if (mumps->is_omp_master) {
1033       /* Lazily initialize the omp stuff for distributed rhs */
1034       if (!mumps->irhs_loc) {
1035         PetscCall(PetscMalloc2(ompsize, &omp_ranks, ompsize, &petsc_ranks));
1036         PetscCall(PetscMalloc3(ompsize, &mumps->rhs_nrow, ompsize, &mumps->rhs_recvcounts, ompsize, &mumps->rhs_disps));
1037         PetscCallMPI(MPI_Comm_group(mumps->petsc_comm, &petsc_group));
1038         PetscCallMPI(MPI_Comm_group(mumps->omp_comm, &omp_group));
1039         for (j = 0; j < ompsize; j++) omp_ranks[j] = j;
1040         PetscCallMPI(MPI_Group_translate_ranks(omp_group, ompsize, omp_ranks, petsc_group, petsc_ranks));
1041 
1042         /* Populate mumps->irhs_loc[], rhs_nrow[] */
1043         mumps->nloc_rhs = 0;
1044         PetscCall(MatGetOwnershipRanges(A, &ranges));
1045         for (j = 0; j < ompsize; j++) {
1046           mumps->rhs_nrow[j] = ranges[petsc_ranks[j] + 1] - ranges[petsc_ranks[j]];
1047           mumps->nloc_rhs += mumps->rhs_nrow[j];
1048         }
1049         PetscCall(PetscMalloc1(mumps->nloc_rhs, &mumps->irhs_loc));
1050         for (j = k = 0; j < ompsize; j++) {
1051           for (i = ranges[petsc_ranks[j]]; i < ranges[petsc_ranks[j] + 1]; i++, k++) mumps->irhs_loc[k] = i + 1; /* uses 1-based indices */
1052         }
1053 
1054         PetscCall(PetscFree2(omp_ranks, petsc_ranks));
1055         PetscCallMPI(MPI_Group_free(&petsc_group));
1056         PetscCallMPI(MPI_Group_free(&omp_group));
1057       }
1058 
1059       /* Realloc buffers when current nrhs is bigger than what we have met */
1060       if (nrhs > mumps->max_nrhs) {
1061         PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf));
1062         PetscCall(PetscMalloc2(mumps->nloc_rhs * nrhs, &mumps->rhs_loc, mumps->nloc_rhs * nrhs, &mumps->rhs_recvbuf));
1063         mumps->max_nrhs = nrhs;
1064       }
1065 
1066       /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */
1067       for (j = 0; j < ompsize; j++) PetscCall(PetscMPIIntCast(mumps->rhs_nrow[j] * nrhs, &mumps->rhs_recvcounts[j]));
1068       mumps->rhs_disps[0] = 0;
1069       for (j = 1; j < ompsize; j++) {
1070         mumps->rhs_disps[j] = mumps->rhs_disps[j - 1] + mumps->rhs_recvcounts[j - 1];
1071         PetscCheck(mumps->rhs_disps[j] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscMPIInt overflow!");
1072       }
1073       recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */
1074     }
1075 
1076     PetscCall(PetscMPIIntCast(m * nrhs, &sendcount));
1077     PetscCallMPI(MPI_Gatherv(array, sendcount, MPIU_SCALAR, recvbuf, mumps->rhs_recvcounts, mumps->rhs_disps, MPIU_SCALAR, 0, mumps->omp_comm));
1078 
1079     if (mumps->is_omp_master) {
1080       if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */
1081         PetscScalar *dst, *dstbase = mumps->rhs_loc;
1082         for (j = 0; j < ompsize; j++) {
1083           const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j];
1084           dst                    = dstbase;
1085           for (i = 0; i < nrhs; i++) {
1086             PetscCall(PetscArraycpy(dst, src, mumps->rhs_nrow[j]));
1087             src += mumps->rhs_nrow[j];
1088             dst += mumps->nloc_rhs;
1089           }
1090           dstbase += mumps->rhs_nrow[j];
1091         }
1092       }
1093       mumps->id.rhs_loc = (MumpsScalar *)mumps->rhs_loc;
1094     }
1095 #endif /* PETSC_HAVE_OPENMP_SUPPORT */
1096   }
1097   mumps->id.nrhs     = nrhs;
1098   mumps->id.nloc_rhs = mumps->nloc_rhs;
1099   mumps->id.lrhs_loc = mumps->nloc_rhs;
1100   mumps->id.irhs_loc = mumps->irhs_loc;
1101   PetscFunctionReturn(0);
1102 }
1103 
1104 PetscErrorCode MatSolve_MUMPS(Mat A, Vec b, Vec x) {
1105   Mat_MUMPS         *mumps  = (Mat_MUMPS *)A->data;
1106   const PetscScalar *rarray = NULL;
1107   PetscScalar       *array;
1108   IS                 is_iden, is_petsc;
1109   PetscInt           i;
1110   PetscBool          second_solve = PETSC_FALSE;
1111   static PetscBool   cite1 = PETSC_FALSE, cite2 = PETSC_FALSE;
1112 
1113   PetscFunctionBegin;
1114   PetscCall(PetscCitationsRegister("@article{MUMPS01,\n  author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n  title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n  journal = {SIAM "
1115                                    "Journal on Matrix Analysis and Applications},\n  volume = {23},\n  number = {1},\n  pages = {15--41},\n  year = {2001}\n}\n",
1116                                    &cite1));
1117   PetscCall(PetscCitationsRegister("@article{MUMPS02,\n  author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n  title = {Hybrid scheduling for the parallel solution of linear systems},\n  journal = {Parallel "
1118                                    "Computing},\n  volume = {32},\n  number = {2},\n  pages = {136--156},\n  year = {2006}\n}\n",
1119                                    &cite2));
1120 
1121   if (A->factorerrortype) {
1122     PetscCall(PetscInfo(A, "MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1123     PetscCall(VecSetInf(x));
1124     PetscFunctionReturn(0);
1125   }
1126 
1127   mumps->id.nrhs = 1;
1128   if (mumps->petsc_size > 1) {
1129     if (mumps->ICNTL20 == 10) {
1130       mumps->id.ICNTL(20) = 10; /* dense distributed RHS */
1131       PetscCall(VecGetArrayRead(b, &rarray));
1132       PetscCall(MatMumpsSetUpDistRHSInfo(A, 1, rarray));
1133     } else {
1134       mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential rhs vector*/
1135       PetscCall(VecScatterBegin(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD));
1136       PetscCall(VecScatterEnd(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD));
1137       if (!mumps->myid) {
1138         PetscCall(VecGetArray(mumps->b_seq, &array));
1139         mumps->id.rhs = (MumpsScalar *)array;
1140       }
1141     }
1142   } else {                   /* petsc_size == 1 */
1143     mumps->id.ICNTL(20) = 0; /* dense centralized RHS */
1144     PetscCall(VecCopy(b, x));
1145     PetscCall(VecGetArray(x, &array));
1146     mumps->id.rhs = (MumpsScalar *)array;
1147   }
1148 
1149   /*
1150      handle condensation step of Schur complement (if any)
1151      We set by default ICNTL(26) == -1 when Schur indices have been provided by the user.
1152      According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase
1153      Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system.
1154      This requires an extra call to PetscMUMPS_c and the computation of the factors for S
1155   */
1156   if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) {
1157     PetscCheck(mumps->petsc_size <= 1, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc");
1158     second_solve = PETSC_TRUE;
1159     PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE));
1160   }
1161   /* solve phase */
1162   /*-------------*/
1163   mumps->id.job = JOB_SOLVE;
1164   PetscMUMPS_c(mumps);
1165   PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1));
1166 
1167   /* handle expansion step of Schur complement (if any) */
1168   if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE));
1169 
1170   if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */
1171     if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) {
1172       /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */
1173       PetscCall(VecScatterDestroy(&mumps->scat_sol));
1174     }
1175     if (!mumps->scat_sol) { /* create scatter scat_sol */
1176       PetscInt *isol2_loc = NULL;
1177       PetscCall(ISCreateStride(PETSC_COMM_SELF, mumps->id.lsol_loc, 0, 1, &is_iden)); /* from */
1178       PetscCall(PetscMalloc1(mumps->id.lsol_loc, &isol2_loc));
1179       for (i = 0; i < mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i] - 1;                        /* change Fortran style to C style */
1180       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, mumps->id.lsol_loc, isol2_loc, PETSC_OWN_POINTER, &is_petsc)); /* to */
1181       PetscCall(VecScatterCreate(mumps->x_seq, is_iden, x, is_petsc, &mumps->scat_sol));
1182       PetscCall(ISDestroy(&is_iden));
1183       PetscCall(ISDestroy(&is_petsc));
1184       mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */
1185     }
1186 
1187     PetscCall(VecScatterBegin(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD));
1188     PetscCall(VecScatterEnd(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD));
1189   }
1190 
1191   if (mumps->petsc_size > 1) {
1192     if (mumps->ICNTL20 == 10) {
1193       PetscCall(VecRestoreArrayRead(b, &rarray));
1194     } else if (!mumps->myid) {
1195       PetscCall(VecRestoreArray(mumps->b_seq, &array));
1196     }
1197   } else PetscCall(VecRestoreArray(x, &array));
1198 
1199   PetscCall(PetscLogFlops(2.0 * mumps->id.RINFO(3)));
1200   PetscFunctionReturn(0);
1201 }
1202 
1203 PetscErrorCode MatSolveTranspose_MUMPS(Mat A, Vec b, Vec x) {
1204   Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;
1205 
1206   PetscFunctionBegin;
1207   mumps->id.ICNTL(9) = 0;
1208   PetscCall(MatSolve_MUMPS(A, b, x));
1209   mumps->id.ICNTL(9) = 1;
1210   PetscFunctionReturn(0);
1211 }
1212 
1213 PetscErrorCode MatMatSolve_MUMPS(Mat A, Mat B, Mat X) {
1214   Mat                Bt = NULL;
1215   PetscBool          denseX, denseB, flg, flgT;
1216   Mat_MUMPS         *mumps = (Mat_MUMPS *)A->data;
1217   PetscInt           i, nrhs, M;
1218   PetscScalar       *array;
1219   const PetscScalar *rbray;
1220   PetscInt           lsol_loc, nlsol_loc, *idxx, iidx = 0;
1221   PetscMUMPSInt     *isol_loc, *isol_loc_save;
1222   PetscScalar       *bray, *sol_loc, *sol_loc_save;
1223   IS                 is_to, is_from;
1224   PetscInt           k, proc, j, m, myrstart;
1225   const PetscInt    *rstart;
1226   Vec                v_mpi, msol_loc;
1227   VecScatter         scat_sol;
1228   Vec                b_seq;
1229   VecScatter         scat_rhs;
1230   PetscScalar       *aa;
1231   PetscInt           spnr, *ia, *ja;
1232   Mat_MPIAIJ        *b = NULL;
1233 
1234   PetscFunctionBegin;
1235   PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &denseX, MATSEQDENSE, MATMPIDENSE, NULL));
1236   PetscCheck(denseX, PetscObjectComm((PetscObject)X), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix");
1237 
1238   PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &denseB, MATSEQDENSE, MATMPIDENSE, NULL));
1239   if (denseB) {
1240     PetscCheck(B->rmap->n == X->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Matrix B and X must have same row distribution");
1241     mumps->id.ICNTL(20) = 0; /* dense RHS */
1242   } else {                   /* sparse B */
1243     PetscCheck(X != B, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_IDN, "X and B must be different matrices");
1244     PetscCall(PetscObjectTypeCompare((PetscObject)B, MATTRANSPOSEMAT, &flgT));
1245     if (flgT) { /* input B is transpose of actural RHS matrix,
1246                  because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */
1247       PetscCall(MatTransposeGetMat(B, &Bt));
1248     } else SETERRQ(PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_WRONG, "Matrix B must be MATTRANSPOSEMAT matrix");
1249     mumps->id.ICNTL(20) = 1; /* sparse RHS */
1250   }
1251 
1252   PetscCall(MatGetSize(B, &M, &nrhs));
1253   mumps->id.nrhs = nrhs;
1254   mumps->id.lrhs = M;
1255   mumps->id.rhs  = NULL;
1256 
1257   if (mumps->petsc_size == 1) {
1258     PetscScalar *aa;
1259     PetscInt     spnr, *ia, *ja;
1260     PetscBool    second_solve = PETSC_FALSE;
1261 
1262     PetscCall(MatDenseGetArray(X, &array));
1263     mumps->id.rhs = (MumpsScalar *)array;
1264 
1265     if (denseB) {
1266       /* copy B to X */
1267       PetscCall(MatDenseGetArrayRead(B, &rbray));
1268       PetscCall(PetscArraycpy(array, rbray, M * nrhs));
1269       PetscCall(MatDenseRestoreArrayRead(B, &rbray));
1270     } else { /* sparse B */
1271       PetscCall(MatSeqAIJGetArray(Bt, &aa));
1272       PetscCall(MatGetRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
1273       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
1274       PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs));
1275       mumps->id.rhs_sparse = (MumpsScalar *)aa;
1276     }
1277     /* handle condensation step of Schur complement (if any) */
1278     if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) {
1279       second_solve = PETSC_TRUE;
1280       PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE));
1281     }
1282     /* solve phase */
1283     /*-------------*/
1284     mumps->id.job = JOB_SOLVE;
1285     PetscMUMPS_c(mumps);
1286     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1));
1287 
1288     /* handle expansion step of Schur complement (if any) */
1289     if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE));
1290     if (!denseB) { /* sparse B */
1291       PetscCall(MatSeqAIJRestoreArray(Bt, &aa));
1292       PetscCall(MatRestoreRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
1293       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure");
1294     }
1295     PetscCall(MatDenseRestoreArray(X, &array));
1296     PetscFunctionReturn(0);
1297   }
1298 
1299   /*--------- parallel case: MUMPS requires rhs B to be centralized on the host! --------*/
1300   PetscCheck(mumps->petsc_size <= 1 || !mumps->id.ICNTL(19), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc");
1301 
1302   /* create msol_loc to hold mumps local solution */
1303   isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */
1304   sol_loc_save  = (PetscScalar *)mumps->id.sol_loc;
1305 
1306   lsol_loc  = mumps->id.lsol_loc;
1307   nlsol_loc = nrhs * lsol_loc; /* length of sol_loc */
1308   PetscCall(PetscMalloc2(nlsol_loc, &sol_loc, lsol_loc, &isol_loc));
1309   mumps->id.sol_loc  = (MumpsScalar *)sol_loc;
1310   mumps->id.isol_loc = isol_loc;
1311 
1312   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nlsol_loc, (PetscScalar *)sol_loc, &msol_loc));
1313 
1314   if (denseB) {
1315     if (mumps->ICNTL20 == 10) {
1316       mumps->id.ICNTL(20) = 10; /* dense distributed RHS */
1317       PetscCall(MatDenseGetArrayRead(B, &rbray));
1318       PetscCall(MatMumpsSetUpDistRHSInfo(A, nrhs, rbray));
1319       PetscCall(MatDenseRestoreArrayRead(B, &rbray));
1320       PetscCall(MatGetLocalSize(B, &m, NULL));
1321       PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhs * M, NULL, &v_mpi));
1322     } else {
1323       mumps->id.ICNTL(20) = 0; /* dense centralized RHS */
1324       /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in
1325         very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank
1326         0, re-arrange B into desired order, which is a local operation.
1327       */
1328 
1329       /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */
1330       /* wrap dense rhs matrix B into a vector v_mpi */
1331       PetscCall(MatGetLocalSize(B, &m, NULL));
1332       PetscCall(MatDenseGetArray(B, &bray));
1333       PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhs * M, (const PetscScalar *)bray, &v_mpi));
1334       PetscCall(MatDenseRestoreArray(B, &bray));
1335 
1336       /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */
1337       if (!mumps->myid) {
1338         PetscInt *idx;
1339         /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */
1340         PetscCall(PetscMalloc1(nrhs * M, &idx));
1341         PetscCall(MatGetOwnershipRanges(B, &rstart));
1342         k = 0;
1343         for (proc = 0; proc < mumps->petsc_size; proc++) {
1344           for (j = 0; j < nrhs; j++) {
1345             for (i = rstart[proc]; i < rstart[proc + 1]; i++) idx[k++] = j * M + i;
1346           }
1347         }
1348 
1349         PetscCall(VecCreateSeq(PETSC_COMM_SELF, nrhs * M, &b_seq));
1350         PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nrhs * M, idx, PETSC_OWN_POINTER, &is_to));
1351         PetscCall(ISCreateStride(PETSC_COMM_SELF, nrhs * M, 0, 1, &is_from));
1352       } else {
1353         PetscCall(VecCreateSeq(PETSC_COMM_SELF, 0, &b_seq));
1354         PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_to));
1355         PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_from));
1356       }
1357       PetscCall(VecScatterCreate(v_mpi, is_from, b_seq, is_to, &scat_rhs));
1358       PetscCall(VecScatterBegin(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD));
1359       PetscCall(ISDestroy(&is_to));
1360       PetscCall(ISDestroy(&is_from));
1361       PetscCall(VecScatterEnd(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD));
1362 
1363       if (!mumps->myid) { /* define rhs on the host */
1364         PetscCall(VecGetArray(b_seq, &bray));
1365         mumps->id.rhs = (MumpsScalar *)bray;
1366         PetscCall(VecRestoreArray(b_seq, &bray));
1367       }
1368     }
1369   } else { /* sparse B */
1370     b = (Mat_MPIAIJ *)Bt->data;
1371 
1372     /* wrap dense X into a vector v_mpi */
1373     PetscCall(MatGetLocalSize(X, &m, NULL));
1374     PetscCall(MatDenseGetArray(X, &bray));
1375     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)X), 1, nrhs * m, nrhs * M, (const PetscScalar *)bray, &v_mpi));
1376     PetscCall(MatDenseRestoreArray(X, &bray));
1377 
1378     if (!mumps->myid) {
1379       PetscCall(MatSeqAIJGetArray(b->A, &aa));
1380       PetscCall(MatGetRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
1381       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
1382       PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs));
1383       mumps->id.rhs_sparse = (MumpsScalar *)aa;
1384     } else {
1385       mumps->id.irhs_ptr    = NULL;
1386       mumps->id.irhs_sparse = NULL;
1387       mumps->id.nz_rhs      = 0;
1388       mumps->id.rhs_sparse  = NULL;
1389     }
1390   }
1391 
1392   /* solve phase */
1393   /*-------------*/
1394   mumps->id.job = JOB_SOLVE;
1395   PetscMUMPS_c(mumps);
1396   PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1));
1397 
1398   /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */
1399   PetscCall(MatDenseGetArray(X, &array));
1400   PetscCall(VecPlaceArray(v_mpi, array));
1401 
1402   /* create scatter scat_sol */
1403   PetscCall(MatGetOwnershipRanges(X, &rstart));
1404   /* iidx: index for scatter mumps solution to petsc X */
1405 
1406   PetscCall(ISCreateStride(PETSC_COMM_SELF, nlsol_loc, 0, 1, &is_from));
1407   PetscCall(PetscMalloc1(nlsol_loc, &idxx));
1408   for (i = 0; i < lsol_loc; i++) {
1409     isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */
1410 
1411     for (proc = 0; proc < mumps->petsc_size; proc++) {
1412       if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc + 1]) {
1413         myrstart = rstart[proc];
1414         k        = isol_loc[i] - myrstart;          /* local index on 1st column of petsc vector X */
1415         iidx     = k + myrstart * nrhs;             /* maps mumps isol_loc[i] to petsc index in X */
1416         m        = rstart[proc + 1] - rstart[proc]; /* rows of X for this proc */
1417         break;
1418       }
1419     }
1420 
1421     for (j = 0; j < nrhs; j++) idxx[i + j * lsol_loc] = iidx + j * m;
1422   }
1423   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nlsol_loc, idxx, PETSC_COPY_VALUES, &is_to));
1424   PetscCall(VecScatterCreate(msol_loc, is_from, v_mpi, is_to, &scat_sol));
1425   PetscCall(VecScatterBegin(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD));
1426   PetscCall(ISDestroy(&is_from));
1427   PetscCall(ISDestroy(&is_to));
1428   PetscCall(VecScatterEnd(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD));
1429   PetscCall(MatDenseRestoreArray(X, &array));
1430 
1431   /* free spaces */
1432   mumps->id.sol_loc  = (MumpsScalar *)sol_loc_save;
1433   mumps->id.isol_loc = isol_loc_save;
1434 
1435   PetscCall(PetscFree2(sol_loc, isol_loc));
1436   PetscCall(PetscFree(idxx));
1437   PetscCall(VecDestroy(&msol_loc));
1438   PetscCall(VecDestroy(&v_mpi));
1439   if (!denseB) {
1440     if (!mumps->myid) {
1441       b = (Mat_MPIAIJ *)Bt->data;
1442       PetscCall(MatSeqAIJRestoreArray(b->A, &aa));
1443       PetscCall(MatRestoreRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
1444       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure");
1445     }
1446   } else {
1447     if (mumps->ICNTL20 == 0) {
1448       PetscCall(VecDestroy(&b_seq));
1449       PetscCall(VecScatterDestroy(&scat_rhs));
1450     }
1451   }
1452   PetscCall(VecScatterDestroy(&scat_sol));
1453   PetscCall(PetscLogFlops(2.0 * nrhs * mumps->id.RINFO(3)));
1454   PetscFunctionReturn(0);
1455 }
1456 
1457 PetscErrorCode MatMatSolveTranspose_MUMPS(Mat A, Mat B, Mat X) {
1458   Mat_MUMPS    *mumps    = (Mat_MUMPS *)A->data;
1459   PetscMUMPSInt oldvalue = mumps->id.ICNTL(9);
1460 
1461   PetscFunctionBegin;
1462   mumps->id.ICNTL(9) = 0;
1463   PetscCall(MatMatSolve_MUMPS(A, B, X));
1464   mumps->id.ICNTL(9) = oldvalue;
1465   PetscFunctionReturn(0);
1466 }
1467 
1468 PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A, Mat Bt, Mat X) {
1469   PetscBool flg;
1470   Mat       B;
1471 
1472   PetscFunctionBegin;
1473   PetscCall(PetscObjectTypeCompareAny((PetscObject)Bt, &flg, MATSEQAIJ, MATMPIAIJ, NULL));
1474   PetscCheck(flg, PetscObjectComm((PetscObject)Bt), PETSC_ERR_ARG_WRONG, "Matrix Bt must be MATAIJ matrix");
1475 
1476   /* Create B=Bt^T that uses Bt's data structure */
1477   PetscCall(MatCreateTranspose(Bt, &B));
1478 
1479   PetscCall(MatMatSolve_MUMPS(A, B, X));
1480   PetscCall(MatDestroy(&B));
1481   PetscFunctionReturn(0);
1482 }
1483 
1484 #if !defined(PETSC_USE_COMPLEX)
1485 /*
1486   input:
1487    F:        numeric factor
1488   output:
1489    nneg:     total number of negative pivots
1490    nzero:    total number of zero pivots
1491    npos:     (global dimension of F) - nneg - nzero
1492 */
1493 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos) {
1494   Mat_MUMPS  *mumps = (Mat_MUMPS *)F->data;
1495   PetscMPIInt size;
1496 
1497   PetscFunctionBegin;
1498   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)F), &size));
1499   /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */
1500   PetscCheck(size <= 1 || mumps->id.ICNTL(13) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia", mumps->id.INFOG(13));
1501 
1502   if (nneg) *nneg = mumps->id.INFOG(12);
1503   if (nzero || npos) {
1504     PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection");
1505     if (nzero) *nzero = mumps->id.INFOG(28);
1506     if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28));
1507   }
1508   PetscFunctionReturn(0);
1509 }
1510 #endif
1511 
1512 PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse, Mat_MUMPS *mumps) {
1513   PetscInt       i, nreqs;
1514   PetscMUMPSInt *irn, *jcn;
1515   PetscMPIInt    count;
1516   PetscInt64     totnnz, remain;
1517   const PetscInt osize = mumps->omp_comm_size;
1518   PetscScalar   *val;
1519 
1520   PetscFunctionBegin;
1521   if (osize > 1) {
1522     if (reuse == MAT_INITIAL_MATRIX) {
1523       /* master first gathers counts of nonzeros to receive */
1524       if (mumps->is_omp_master) PetscCall(PetscMalloc1(osize, &mumps->recvcount));
1525       PetscCallMPI(MPI_Gather(&mumps->nnz, 1, MPIU_INT64, mumps->recvcount, 1, MPIU_INT64, 0 /*master*/, mumps->omp_comm));
1526 
1527       /* Then each computes number of send/recvs */
1528       if (mumps->is_omp_master) {
1529         /* Start from 1 since self communication is not done in MPI */
1530         nreqs = 0;
1531         for (i = 1; i < osize; i++) nreqs += (mumps->recvcount[i] + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX;
1532       } else {
1533         nreqs = (mumps->nnz + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX;
1534       }
1535       PetscCall(PetscMalloc1(nreqs * 3, &mumps->reqs)); /* Triple the requests since we send irn, jcn and val seperately */
1536 
1537       /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others.
1538          MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz
1539          might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size
1540          is very small, the current approach should have no extra overhead compared to MPI_Gatherv.
1541        */
1542       nreqs = 0; /* counter for actual send/recvs */
1543       if (mumps->is_omp_master) {
1544         for (i = 0, totnnz = 0; i < osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */
1545         PetscCall(PetscMalloc2(totnnz, &irn, totnnz, &jcn));
1546         PetscCall(PetscMalloc1(totnnz, &val));
1547 
1548         /* Self communication */
1549         PetscCall(PetscArraycpy(irn, mumps->irn, mumps->nnz));
1550         PetscCall(PetscArraycpy(jcn, mumps->jcn, mumps->nnz));
1551         PetscCall(PetscArraycpy(val, mumps->val, mumps->nnz));
1552 
1553         /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */
1554         PetscCall(PetscFree2(mumps->irn, mumps->jcn));
1555         PetscCall(PetscFree(mumps->val_alloc));
1556         mumps->nnz = totnnz;
1557         mumps->irn = irn;
1558         mumps->jcn = jcn;
1559         mumps->val = mumps->val_alloc = val;
1560 
1561         irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */
1562         jcn += mumps->recvcount[0];
1563         val += mumps->recvcount[0];
1564 
1565         /* Remote communication */
1566         for (i = 1; i < osize; i++) {
1567           count  = PetscMin(mumps->recvcount[i], PETSC_MPI_INT_MAX);
1568           remain = mumps->recvcount[i] - count;
1569           while (count > 0) {
1570             PetscCallMPI(MPI_Irecv(irn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1571             PetscCallMPI(MPI_Irecv(jcn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1572             PetscCallMPI(MPI_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1573             irn += count;
1574             jcn += count;
1575             val += count;
1576             count = PetscMin(remain, PETSC_MPI_INT_MAX);
1577             remain -= count;
1578           }
1579         }
1580       } else {
1581         irn    = mumps->irn;
1582         jcn    = mumps->jcn;
1583         val    = mumps->val;
1584         count  = PetscMin(mumps->nnz, PETSC_MPI_INT_MAX);
1585         remain = mumps->nnz - count;
1586         while (count > 0) {
1587           PetscCallMPI(MPI_Isend(irn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1588           PetscCallMPI(MPI_Isend(jcn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1589           PetscCallMPI(MPI_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1590           irn += count;
1591           jcn += count;
1592           val += count;
1593           count = PetscMin(remain, PETSC_MPI_INT_MAX);
1594           remain -= count;
1595         }
1596       }
1597     } else {
1598       nreqs = 0;
1599       if (mumps->is_omp_master) {
1600         val = mumps->val + mumps->recvcount[0];
1601         for (i = 1; i < osize; i++) { /* Remote communication only since self data is already in place */
1602           count  = PetscMin(mumps->recvcount[i], PETSC_MPI_INT_MAX);
1603           remain = mumps->recvcount[i] - count;
1604           while (count > 0) {
1605             PetscCallMPI(MPI_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1606             val += count;
1607             count = PetscMin(remain, PETSC_MPI_INT_MAX);
1608             remain -= count;
1609           }
1610         }
1611       } else {
1612         val    = mumps->val;
1613         count  = PetscMin(mumps->nnz, PETSC_MPI_INT_MAX);
1614         remain = mumps->nnz - count;
1615         while (count > 0) {
1616           PetscCallMPI(MPI_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1617           val += count;
1618           count = PetscMin(remain, PETSC_MPI_INT_MAX);
1619           remain -= count;
1620         }
1621       }
1622     }
1623     PetscCallMPI(MPI_Waitall(nreqs, mumps->reqs, MPI_STATUSES_IGNORE));
1624     mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */
1625   }
1626   PetscFunctionReturn(0);
1627 }
1628 
1629 PetscErrorCode MatFactorNumeric_MUMPS(Mat F, Mat A, const MatFactorInfo *info) {
1630   Mat_MUMPS *mumps = (Mat_MUMPS *)(F)->data;
1631   PetscBool  isMPIAIJ;
1632 
1633   PetscFunctionBegin;
1634   if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) {
1635     if (mumps->id.INFOG(1) == -6) { PetscCall(PetscInfo(A, "MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); }
1636     PetscCall(PetscInfo(A, "MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1637     PetscFunctionReturn(0);
1638   }
1639 
1640   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps));
1641   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX, mumps));
1642 
1643   /* numerical factorization phase */
1644   /*-------------------------------*/
1645   mumps->id.job = JOB_FACTNUMERIC;
1646   if (!mumps->id.ICNTL(18)) { /* A is centralized */
1647     if (!mumps->myid) { mumps->id.a = (MumpsScalar *)mumps->val; }
1648   } else {
1649     mumps->id.a_loc = (MumpsScalar *)mumps->val;
1650   }
1651   PetscMUMPS_c(mumps);
1652   if (mumps->id.INFOG(1) < 0) {
1653     PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d", mumps->id.INFOG(1), mumps->id.INFO(2));
1654     if (mumps->id.INFOG(1) == -10) { /* numerically singular matrix */
1655       PetscCall(PetscInfo(F, "matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1656       F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
1657     } else if (mumps->id.INFOG(1) == -13) {
1658       PetscCall(PetscInfo(F, "MUMPS in numerical factorization phase: INFOG(1)=%d, cannot allocate required memory %d megabytes\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1659       F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1660     } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) {
1661       PetscCall(PetscInfo(F, "MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d, problem with workarray \n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1662       F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1663     } else {
1664       PetscCall(PetscInfo(F, "MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1665       F->factorerrortype = MAT_FACTOR_OTHER;
1666     }
1667   }
1668   PetscCheck(mumps->myid || mumps->id.ICNTL(16) <= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "  mumps->id.ICNTL(16):=%d", mumps->id.INFOG(16));
1669 
1670   F->assembled = PETSC_TRUE;
1671 
1672   if (F->schur) { /* reset Schur status to unfactored */
1673 #if defined(PETSC_HAVE_CUDA)
1674     F->schur->offloadmask = PETSC_OFFLOAD_CPU;
1675 #endif
1676     if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
1677       mumps->id.ICNTL(19) = 2;
1678       PetscCall(MatTranspose(F->schur, MAT_INPLACE_MATRIX, &F->schur));
1679     }
1680     PetscCall(MatFactorRestoreSchurComplement(F, NULL, MAT_FACTOR_SCHUR_UNFACTORED));
1681   }
1682 
1683   /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */
1684   if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3;
1685 
1686   if (!mumps->is_omp_master) mumps->id.INFO(23) = 0;
1687   if (mumps->petsc_size > 1) {
1688     PetscInt     lsol_loc;
1689     PetscScalar *sol_loc;
1690 
1691     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &isMPIAIJ));
1692 
1693     /* distributed solution; Create x_seq=sol_loc for repeated use */
1694     if (mumps->x_seq) {
1695       PetscCall(VecScatterDestroy(&mumps->scat_sol));
1696       PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc));
1697       PetscCall(VecDestroy(&mumps->x_seq));
1698     }
1699     lsol_loc = mumps->id.INFO(23); /* length of sol_loc */
1700     PetscCall(PetscMalloc2(lsol_loc, &sol_loc, lsol_loc, &mumps->id.isol_loc));
1701     mumps->id.lsol_loc = lsol_loc;
1702     mumps->id.sol_loc  = (MumpsScalar *)sol_loc;
1703     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, lsol_loc, sol_loc, &mumps->x_seq));
1704   }
1705   PetscCall(PetscLogFlops(mumps->id.RINFO(2)));
1706   PetscFunctionReturn(0);
1707 }
1708 
1709 /* Sets MUMPS options from the options database */
1710 PetscErrorCode MatSetFromOptions_MUMPS(Mat F, Mat A) {
1711   Mat_MUMPS    *mumps = (Mat_MUMPS *)F->data;
1712   PetscMUMPSInt icntl = 0, size, *listvar_schur;
1713   PetscInt      info[80], i, ninfo = 80, rbs, cbs;
1714   PetscBool     flg = PETSC_FALSE, schur = (PetscBool)(mumps->id.ICNTL(26) == -1);
1715   MumpsScalar  *arr;
1716 
1717   PetscFunctionBegin;
1718   PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "MUMPS Options", "Mat");
1719   if (mumps->id.job == JOB_NULL) { /* MatSetFromOptions_MUMPS() has never been called before */
1720     PetscInt nthreads   = 0;
1721     PetscInt nCNTL_pre  = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0;
1722     PetscInt nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0;
1723 
1724     mumps->petsc_comm = PetscObjectComm((PetscObject)A);
1725     PetscCallMPI(MPI_Comm_size(mumps->petsc_comm, &mumps->petsc_size));
1726     PetscCallMPI(MPI_Comm_rank(mumps->petsc_comm, &mumps->myid)); /* "if (!myid)" still works even if mumps_comm is different */
1727 
1728     PetscCall(PetscOptionsName("-mat_mumps_use_omp_threads", "Convert MPI processes into OpenMP threads", "None", &mumps->use_petsc_omp_support));
1729     if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */
1730     /* do not use PetscOptionsInt() so that the option -mat_mumps_use_omp_threads is not displayed twice in the help */
1731     PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)F)->prefix, "-mat_mumps_use_omp_threads", &nthreads, NULL));
1732     if (mumps->use_petsc_omp_support) {
1733       PetscCheck(PetscDefined(HAVE_OPENMP_SUPPORT), PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "The system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual",
1734                  ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : "");
1735       PetscCheck(!schur, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use -%smat_mumps_use_omp_threads with the Schur complement feature", ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : "");
1736 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1737       PetscCall(PetscOmpCtrlCreate(mumps->petsc_comm, nthreads, &mumps->omp_ctrl));
1738       PetscCall(PetscOmpCtrlGetOmpComms(mumps->omp_ctrl, &mumps->omp_comm, &mumps->mumps_comm, &mumps->is_omp_master));
1739 #endif
1740     } else {
1741       mumps->omp_comm      = PETSC_COMM_SELF;
1742       mumps->mumps_comm    = mumps->petsc_comm;
1743       mumps->is_omp_master = PETSC_TRUE;
1744     }
1745     PetscCallMPI(MPI_Comm_size(mumps->omp_comm, &mumps->omp_comm_size));
1746     mumps->reqs = NULL;
1747     mumps->tag  = 0;
1748 
1749     if (mumps->mumps_comm != MPI_COMM_NULL) {
1750       if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) {
1751         /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */
1752         MPI_Comm comm;
1753         PetscCallMPI(MPI_Comm_dup(mumps->mumps_comm, &comm));
1754         mumps->mumps_comm = comm;
1755       } else PetscCall(PetscCommGetComm(mumps->petsc_comm, &mumps->mumps_comm));
1756     }
1757 
1758     mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm);
1759     mumps->id.job          = JOB_INIT;
1760     mumps->id.par          = 1; /* host participates factorizaton and solve */
1761     mumps->id.sym          = mumps->sym;
1762 
1763     size          = mumps->id.size_schur;
1764     arr           = mumps->id.schur;
1765     listvar_schur = mumps->id.listvar_schur;
1766     PetscMUMPS_c(mumps);
1767     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS: INFOG(1)=%d", mumps->id.INFOG(1));
1768     /* restore cached ICNTL and CNTL values */
1769     for (icntl = 0; icntl < nICNTL_pre; ++icntl) mumps->id.ICNTL(mumps->ICNTL_pre[1 + 2 * icntl]) = mumps->ICNTL_pre[2 + 2 * icntl];
1770     for (icntl = 0; icntl < nCNTL_pre; ++icntl) mumps->id.CNTL((PetscInt)mumps->CNTL_pre[1 + 2 * icntl]) = mumps->CNTL_pre[2 + 2 * icntl];
1771     PetscCall(PetscFree(mumps->ICNTL_pre));
1772     PetscCall(PetscFree(mumps->CNTL_pre));
1773 
1774     if (schur) {
1775       mumps->id.size_schur    = size;
1776       mumps->id.schur_lld     = size;
1777       mumps->id.schur         = arr;
1778       mumps->id.listvar_schur = listvar_schur;
1779       if (mumps->petsc_size > 1) {
1780         PetscBool gs; /* gs is false if any rank other than root has non-empty IS */
1781 
1782         mumps->id.ICNTL(19) = 1;                                                                            /* MUMPS returns Schur centralized on the host */
1783         gs                  = mumps->myid ? (mumps->id.size_schur ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */
1784         PetscCallMPI(MPI_Allreduce(MPI_IN_PLACE, &gs, 1, MPIU_BOOL, MPI_LAND, mumps->petsc_comm));
1785         PetscCheck(gs, PETSC_COMM_SELF, PETSC_ERR_SUP, "MUMPS distributed parallel Schur complements not yet supported from PETSc");
1786       } else {
1787         if (F->factortype == MAT_FACTOR_LU) {
1788           mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */
1789         } else {
1790           mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */
1791         }
1792       }
1793       mumps->id.ICNTL(26) = -1;
1794     }
1795 
1796     /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code.
1797        For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS.
1798      */
1799     PetscCallMPI(MPI_Bcast(mumps->id.icntl, 40, MPI_INT, 0, mumps->omp_comm));
1800     PetscCallMPI(MPI_Bcast(mumps->id.cntl, 15, MPIU_REAL, 0, mumps->omp_comm));
1801 
1802     mumps->scat_rhs = NULL;
1803     mumps->scat_sol = NULL;
1804 
1805     /* set PETSc-MUMPS default options - override MUMPS default */
1806     mumps->id.ICNTL(3) = 0;
1807     mumps->id.ICNTL(4) = 0;
1808     if (mumps->petsc_size == 1) {
1809       mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */
1810       mumps->id.ICNTL(7)  = 7; /* automatic choice of ordering done by the package */
1811     } else {
1812       mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */
1813       mumps->id.ICNTL(21) = 1; /* distributed solution */
1814     }
1815   }
1816   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_1", "ICNTL(1): output stream for error messages", "None", mumps->id.ICNTL(1), &icntl, &flg));
1817   if (flg) mumps->id.ICNTL(1) = icntl;
1818   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_2", "ICNTL(2): output stream for diagnostic printing, statistics, and warning", "None", mumps->id.ICNTL(2), &icntl, &flg));
1819   if (flg) mumps->id.ICNTL(2) = icntl;
1820   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_3", "ICNTL(3): output stream for global information, collected on the host", "None", mumps->id.ICNTL(3), &icntl, &flg));
1821   if (flg) mumps->id.ICNTL(3) = icntl;
1822 
1823   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_4", "ICNTL(4): level of printing (0 to 4)", "None", mumps->id.ICNTL(4), &icntl, &flg));
1824   if (flg) mumps->id.ICNTL(4) = icntl;
1825   if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */
1826 
1827   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_6", "ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)", "None", mumps->id.ICNTL(6), &icntl, &flg));
1828   if (flg) mumps->id.ICNTL(6) = icntl;
1829 
1830   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_7", "ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)", "None", mumps->id.ICNTL(7), &icntl, &flg));
1831   if (flg) {
1832     PetscCheck(icntl != 1 && icntl >= 0 && icntl <= 7, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto");
1833     mumps->id.ICNTL(7) = icntl;
1834   }
1835 
1836   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_8", "ICNTL(8): scaling strategy (-2 to 8 or 77)", "None", mumps->id.ICNTL(8), &mumps->id.ICNTL(8), NULL));
1837   /* PetscCall(PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL)); handled by MatSolveTranspose_MUMPS() */
1838   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_10", "ICNTL(10): max num of refinements", "None", mumps->id.ICNTL(10), &mumps->id.ICNTL(10), NULL));
1839   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_11", "ICNTL(11): statistics related to an error analysis (via -ksp_view)", "None", mumps->id.ICNTL(11), &mumps->id.ICNTL(11), NULL));
1840   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_12", "ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)", "None", mumps->id.ICNTL(12), &mumps->id.ICNTL(12), NULL));
1841   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_13", "ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting", "None", mumps->id.ICNTL(13), &mumps->id.ICNTL(13), NULL));
1842   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_14", "ICNTL(14): percentage increase in the estimated working space", "None", mumps->id.ICNTL(14), &mumps->id.ICNTL(14), NULL));
1843   PetscCall(MatGetBlockSizes(A, &rbs, &cbs));
1844   if (rbs == cbs && rbs > 1) mumps->id.ICNTL(15) = -rbs;
1845   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_15", "ICNTL(15): compression of the input matrix resulting from a block format", "None", mumps->id.ICNTL(15), &mumps->id.ICNTL(15), &flg));
1846   if (flg) {
1847     PetscCheck(mumps->id.ICNTL(15) <= 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Positive -mat_mumps_icntl_15 not handled");
1848     PetscCheck((-mumps->id.ICNTL(15) % cbs == 0) && (-mumps->id.ICNTL(15) % rbs == 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "The opposite of -mat_mumps_icntl_15 must be a multiple of the column and row blocksizes");
1849   }
1850   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_19", "ICNTL(19): computes the Schur complement", "None", mumps->id.ICNTL(19), &mumps->id.ICNTL(19), NULL));
1851   if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */
1852     PetscCall(MatDestroy(&F->schur));
1853     PetscCall(MatMumpsResetSchur_Private(mumps));
1854   }
1855 
1856   /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps",
1857      and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran
1858      and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...'
1859      This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug,
1860      see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590.
1861      In short, we could not use distributed RHS with MPICH until v4.0b1.
1862    */
1863 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) || (defined(PETSC_HAVE_MPICH_NUMVERSION) && (PETSC_HAVE_MPICH_NUMVERSION < 40000101))
1864   mumps->ICNTL20 = 0; /* Centralized dense RHS*/
1865 #else
1866   mumps->ICNTL20     = 10; /* Distributed dense RHS*/
1867 #endif
1868   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_20", "ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides", "None", mumps->ICNTL20, &mumps->ICNTL20, &flg));
1869   PetscCheck(!flg || mumps->ICNTL20 == 10 || mumps->ICNTL20 == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10", (int)mumps->ICNTL20);
1870 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0)
1871   PetscCheck(!flg || mumps->ICNTL20 != 10, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=10 is not supported before MUMPS-5.3.0");
1872 #endif
1873   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL)); we only use distributed solution vector */
1874 
1875   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_22", "ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)", "None", mumps->id.ICNTL(22), &mumps->id.ICNTL(22), NULL));
1876   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_23", "ICNTL(23): max size of the working memory (MB) that can allocate per processor", "None", mumps->id.ICNTL(23), &mumps->id.ICNTL(23), NULL));
1877   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_24", "ICNTL(24): detection of null pivot rows (0 or 1)", "None", mumps->id.ICNTL(24), &mumps->id.ICNTL(24), NULL));
1878   if (mumps->id.ICNTL(24)) { mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ }
1879 
1880   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_25", "ICNTL(25): computes a solution of a deficient matrix and a null space basis", "None", mumps->id.ICNTL(25), &mumps->id.ICNTL(25), NULL));
1881   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_26", "ICNTL(26): drives the solution phase if a Schur complement matrix", "None", mumps->id.ICNTL(26), &mumps->id.ICNTL(26), NULL));
1882   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_27", "ICNTL(27): controls the blocking size for multiple right-hand sides", "None", mumps->id.ICNTL(27), &mumps->id.ICNTL(27), NULL));
1883   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_28", "ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering", "None", mumps->id.ICNTL(28), &mumps->id.ICNTL(28), NULL));
1884   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_29", "ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis", "None", mumps->id.ICNTL(29), &mumps->id.ICNTL(29), NULL));
1885   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL)); */ /* call MatMumpsGetInverse() directly */
1886   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_31", "ICNTL(31): indicates which factors may be discarded during factorization", "None", mumps->id.ICNTL(31), &mumps->id.ICNTL(31), NULL));
1887   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elemination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL));  -- not supported by PETSc API */
1888   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_33", "ICNTL(33): compute determinant", "None", mumps->id.ICNTL(33), &mumps->id.ICNTL(33), NULL));
1889   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_35", "ICNTL(35): activates Block Low Rank (BLR) based factorization", "None", mumps->id.ICNTL(35), &mumps->id.ICNTL(35), NULL));
1890   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_36", "ICNTL(36): choice of BLR factorization variant", "None", mumps->id.ICNTL(36), &mumps->id.ICNTL(36), NULL));
1891   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_38", "ICNTL(38): estimated compression rate of LU factors with BLR", "None", mumps->id.ICNTL(38), &mumps->id.ICNTL(38), NULL));
1892 
1893   PetscCall(PetscOptionsReal("-mat_mumps_cntl_1", "CNTL(1): relative pivoting threshold", "None", mumps->id.CNTL(1), &mumps->id.CNTL(1), NULL));
1894   PetscCall(PetscOptionsReal("-mat_mumps_cntl_2", "CNTL(2): stopping criterion of refinement", "None", mumps->id.CNTL(2), &mumps->id.CNTL(2), NULL));
1895   PetscCall(PetscOptionsReal("-mat_mumps_cntl_3", "CNTL(3): absolute pivoting threshold", "None", mumps->id.CNTL(3), &mumps->id.CNTL(3), NULL));
1896   PetscCall(PetscOptionsReal("-mat_mumps_cntl_4", "CNTL(4): value for static pivoting", "None", mumps->id.CNTL(4), &mumps->id.CNTL(4), NULL));
1897   PetscCall(PetscOptionsReal("-mat_mumps_cntl_5", "CNTL(5): fixation for null pivots", "None", mumps->id.CNTL(5), &mumps->id.CNTL(5), NULL));
1898   PetscCall(PetscOptionsReal("-mat_mumps_cntl_7", "CNTL(7): dropping parameter used during BLR", "None", mumps->id.CNTL(7), &mumps->id.CNTL(7), NULL));
1899 
1900   PetscCall(PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL));
1901 
1902   PetscCall(PetscOptionsIntArray("-mat_mumps_view_info", "request INFO local to each processor", "", info, &ninfo, NULL));
1903   if (ninfo) {
1904     PetscCheck(ninfo <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "number of INFO %" PetscInt_FMT " must <= 80", ninfo);
1905     PetscCall(PetscMalloc1(ninfo, &mumps->info));
1906     mumps->ninfo = ninfo;
1907     for (i = 0; i < ninfo; i++) {
1908       PetscCheck(info[i] >= 0 && info[i] <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "index of INFO %" PetscInt_FMT " must between 1 and 80", ninfo);
1909       mumps->info[i] = info[i];
1910     }
1911   }
1912   PetscOptionsEnd();
1913   PetscFunctionReturn(0);
1914 }
1915 
1916 PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F, Mat A, const MatFactorInfo *info, Mat_MUMPS *mumps) {
1917   PetscFunctionBegin;
1918   if (mumps->id.INFOG(1) < 0) {
1919     PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in analysis phase: INFOG(1)=%d", mumps->id.INFOG(1));
1920     if (mumps->id.INFOG(1) == -6) {
1921       PetscCall(PetscInfo(F, "matrix is singular in structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1922       F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT;
1923     } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) {
1924       PetscCall(PetscInfo(F, "problem of workspace, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1925       F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1926     } else if (mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0) {
1927       PetscCall(PetscInfo(F, "Empty matrix\n"));
1928     } else {
1929       PetscCall(PetscInfo(F, "Error reported by MUMPS in analysis phase: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1930       F->factorerrortype = MAT_FACTOR_OTHER;
1931     }
1932   }
1933   PetscFunctionReturn(0);
1934 }
1935 
1936 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info) {
1937   Mat_MUMPS     *mumps = (Mat_MUMPS *)F->data;
1938   Vec            b;
1939   const PetscInt M = A->rmap->N;
1940 
1941   PetscFunctionBegin;
1942   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
1943     /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */
1944     PetscFunctionReturn(0);
1945   }
1946 
1947   /* Set MUMPS options from the options database */
1948   PetscCall(MatSetFromOptions_MUMPS(F, A));
1949 
1950   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
1951   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps));
1952 
1953   /* analysis phase */
1954   /*----------------*/
1955   mumps->id.job = JOB_FACTSYMBOLIC;
1956   mumps->id.n   = M;
1957   switch (mumps->id.ICNTL(18)) {
1958   case 0: /* centralized assembled matrix input */
1959     if (!mumps->myid) {
1960       mumps->id.nnz = mumps->nnz;
1961       mumps->id.irn = mumps->irn;
1962       mumps->id.jcn = mumps->jcn;
1963       if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val;
1964       if (r) {
1965         mumps->id.ICNTL(7) = 1;
1966         if (!mumps->myid) {
1967           const PetscInt *idx;
1968           PetscInt        i;
1969 
1970           PetscCall(PetscMalloc1(M, &mumps->id.perm_in));
1971           PetscCall(ISGetIndices(r, &idx));
1972           for (i = 0; i < M; i++) PetscCall(PetscMUMPSIntCast(idx[i] + 1, &(mumps->id.perm_in[i]))); /* perm_in[]: start from 1, not 0! */
1973           PetscCall(ISRestoreIndices(r, &idx));
1974         }
1975       }
1976     }
1977     break;
1978   case 3: /* distributed assembled matrix input (size>1) */
1979     mumps->id.nnz_loc = mumps->nnz;
1980     mumps->id.irn_loc = mumps->irn;
1981     mumps->id.jcn_loc = mumps->jcn;
1982     if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val;
1983     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
1984       PetscCall(MatCreateVecs(A, NULL, &b));
1985       PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq));
1986       PetscCall(VecDestroy(&b));
1987     }
1988     break;
1989   }
1990   PetscMUMPS_c(mumps);
1991   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps));
1992 
1993   F->ops->lufactornumeric   = MatFactorNumeric_MUMPS;
1994   F->ops->solve             = MatSolve_MUMPS;
1995   F->ops->solvetranspose    = MatSolveTranspose_MUMPS;
1996   F->ops->matsolve          = MatMatSolve_MUMPS;
1997   F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS;
1998   F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS;
1999 
2000   mumps->matstruc = SAME_NONZERO_PATTERN;
2001   PetscFunctionReturn(0);
2002 }
2003 
2004 /* Note the Petsc r and c permutations are ignored */
2005 PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info) {
2006   Mat_MUMPS     *mumps = (Mat_MUMPS *)F->data;
2007   Vec            b;
2008   const PetscInt M = A->rmap->N;
2009 
2010   PetscFunctionBegin;
2011   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
2012     /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */
2013     PetscFunctionReturn(0);
2014   }
2015 
2016   /* Set MUMPS options from the options database */
2017   PetscCall(MatSetFromOptions_MUMPS(F, A));
2018 
2019   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
2020   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps));
2021 
2022   /* analysis phase */
2023   /*----------------*/
2024   mumps->id.job = JOB_FACTSYMBOLIC;
2025   mumps->id.n   = M;
2026   switch (mumps->id.ICNTL(18)) {
2027   case 0: /* centralized assembled matrix input */
2028     if (!mumps->myid) {
2029       mumps->id.nnz = mumps->nnz;
2030       mumps->id.irn = mumps->irn;
2031       mumps->id.jcn = mumps->jcn;
2032       if (mumps->id.ICNTL(6) > 1) { mumps->id.a = (MumpsScalar *)mumps->val; }
2033     }
2034     break;
2035   case 3: /* distributed assembled matrix input (size>1) */
2036     mumps->id.nnz_loc = mumps->nnz;
2037     mumps->id.irn_loc = mumps->irn;
2038     mumps->id.jcn_loc = mumps->jcn;
2039     if (mumps->id.ICNTL(6) > 1) { mumps->id.a_loc = (MumpsScalar *)mumps->val; }
2040     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2041       PetscCall(MatCreateVecs(A, NULL, &b));
2042       PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq));
2043       PetscCall(VecDestroy(&b));
2044     }
2045     break;
2046   }
2047   PetscMUMPS_c(mumps);
2048   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps));
2049 
2050   F->ops->lufactornumeric   = MatFactorNumeric_MUMPS;
2051   F->ops->solve             = MatSolve_MUMPS;
2052   F->ops->solvetranspose    = MatSolveTranspose_MUMPS;
2053   F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS;
2054 
2055   mumps->matstruc = SAME_NONZERO_PATTERN;
2056   PetscFunctionReturn(0);
2057 }
2058 
2059 /* Note the Petsc r permutation and factor info are ignored */
2060 PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F, Mat A, IS r, const MatFactorInfo *info) {
2061   Mat_MUMPS     *mumps = (Mat_MUMPS *)F->data;
2062   Vec            b;
2063   const PetscInt M = A->rmap->N;
2064 
2065   PetscFunctionBegin;
2066   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
2067     /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */
2068     PetscFunctionReturn(0);
2069   }
2070 
2071   /* Set MUMPS options from the options database */
2072   PetscCall(MatSetFromOptions_MUMPS(F, A));
2073 
2074   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
2075   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps));
2076 
2077   /* analysis phase */
2078   /*----------------*/
2079   mumps->id.job = JOB_FACTSYMBOLIC;
2080   mumps->id.n   = M;
2081   switch (mumps->id.ICNTL(18)) {
2082   case 0: /* centralized assembled matrix input */
2083     if (!mumps->myid) {
2084       mumps->id.nnz = mumps->nnz;
2085       mumps->id.irn = mumps->irn;
2086       mumps->id.jcn = mumps->jcn;
2087       if (mumps->id.ICNTL(6) > 1) { mumps->id.a = (MumpsScalar *)mumps->val; }
2088     }
2089     break;
2090   case 3: /* distributed assembled matrix input (size>1) */
2091     mumps->id.nnz_loc = mumps->nnz;
2092     mumps->id.irn_loc = mumps->irn;
2093     mumps->id.jcn_loc = mumps->jcn;
2094     if (mumps->id.ICNTL(6) > 1) { mumps->id.a_loc = (MumpsScalar *)mumps->val; }
2095     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2096       PetscCall(MatCreateVecs(A, NULL, &b));
2097       PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq));
2098       PetscCall(VecDestroy(&b));
2099     }
2100     break;
2101   }
2102   PetscMUMPS_c(mumps);
2103   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps));
2104 
2105   F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS;
2106   F->ops->solve                 = MatSolve_MUMPS;
2107   F->ops->solvetranspose        = MatSolve_MUMPS;
2108   F->ops->matsolve              = MatMatSolve_MUMPS;
2109   F->ops->mattransposesolve     = MatMatTransposeSolve_MUMPS;
2110   F->ops->matsolvetranspose     = MatMatSolveTranspose_MUMPS;
2111 #if defined(PETSC_USE_COMPLEX)
2112   F->ops->getinertia = NULL;
2113 #else
2114   F->ops->getinertia = MatGetInertia_SBAIJMUMPS;
2115 #endif
2116 
2117   mumps->matstruc = SAME_NONZERO_PATTERN;
2118   PetscFunctionReturn(0);
2119 }
2120 
2121 PetscErrorCode MatView_MUMPS(Mat A, PetscViewer viewer) {
2122   PetscBool         iascii;
2123   PetscViewerFormat format;
2124   Mat_MUMPS        *mumps = (Mat_MUMPS *)A->data;
2125 
2126   PetscFunctionBegin;
2127   /* check if matrix is mumps type */
2128   if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(0);
2129 
2130   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
2131   if (iascii) {
2132     PetscCall(PetscViewerGetFormat(viewer, &format));
2133     if (format == PETSC_VIEWER_ASCII_INFO) {
2134       PetscCall(PetscViewerASCIIPrintf(viewer, "MUMPS run parameters:\n"));
2135       PetscCall(PetscViewerASCIIPrintf(viewer, "  SYM (matrix type):                   %d\n", mumps->id.sym));
2136       PetscCall(PetscViewerASCIIPrintf(viewer, "  PAR (host participation):            %d\n", mumps->id.par));
2137       PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(1) (output for error):         %d\n", mumps->id.ICNTL(1)));
2138       PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(2) (output of diagnostic msg): %d\n", mumps->id.ICNTL(2)));
2139       PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(3) (output for global info):   %d\n", mumps->id.ICNTL(3)));
2140       PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(4) (level of printing):        %d\n", mumps->id.ICNTL(4)));
2141       PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(5) (input mat struct):         %d\n", mumps->id.ICNTL(5)));
2142       PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(6) (matrix prescaling):        %d\n", mumps->id.ICNTL(6)));
2143       PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(7) (sequential matrix ordering):%d\n", mumps->id.ICNTL(7)));
2144       PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(8) (scaling strategy):        %d\n", mumps->id.ICNTL(8)));
2145       PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(10) (max num of refinements):  %d\n", mumps->id.ICNTL(10)));
2146       PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(11) (error analysis):          %d\n", mumps->id.ICNTL(11)));
2147       if (mumps->id.ICNTL(11) > 0) {
2148         PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(4) (inf norm of input mat):        %g\n", mumps->id.RINFOG(4)));
2149         PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(5) (inf norm of solution):         %g\n", mumps->id.RINFOG(5)));
2150         PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(6) (inf norm of residual):         %g\n", mumps->id.RINFOG(6)));
2151         PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(7),RINFOG(8) (backward error est): %g, %g\n", mumps->id.RINFOG(7), mumps->id.RINFOG(8)));
2152         PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(9) (error estimate):               %g \n", mumps->id.RINFOG(9)));
2153         PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n", mumps->id.RINFOG(10), mumps->id.RINFOG(11)));
2154       }
2155       PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(12) (efficiency control):                         %d\n", mumps->id.ICNTL(12)));
2156       PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(13) (sequential factorization of the root node):  %d\n", mumps->id.ICNTL(13)));
2157       PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(14) (percentage of estimated workspace increase): %d\n", mumps->id.ICNTL(14)));
2158       PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(15) (compression of the input matrix):            %d\n", mumps->id.ICNTL(15)));
2159       /* ICNTL(15-17) not used */
2160       PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(18) (input mat struct):                           %d\n", mumps->id.ICNTL(18)));
2161       PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(19) (Schur complement info):                      %d\n", mumps->id.ICNTL(19)));
2162       PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(20) (RHS sparse pattern):                         %d\n", mumps->id.ICNTL(20)));
2163       PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(21) (solution struct):                            %d\n", mumps->id.ICNTL(21)));
2164       PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(22) (in-core/out-of-core facility):               %d\n", mumps->id.ICNTL(22)));
2165       PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(23) (max size of memory can be allocated locally):%d\n", mumps->id.ICNTL(23)));
2166 
2167       PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(24) (detection of null pivot rows):               %d\n", mumps->id.ICNTL(24)));
2168       PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(25) (computation of a null space basis):          %d\n", mumps->id.ICNTL(25)));
2169       PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(26) (Schur options for RHS or solution):          %d\n", mumps->id.ICNTL(26)));
2170       PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(27) (blocking size for multiple RHS):             %d\n", mumps->id.ICNTL(27)));
2171       PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(28) (use parallel or sequential ordering):        %d\n", mumps->id.ICNTL(28)));
2172       PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(29) (parallel ordering):                          %d\n", mumps->id.ICNTL(29)));
2173 
2174       PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(30) (user-specified set of entries in inv(A)):    %d\n", mumps->id.ICNTL(30)));
2175       PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(31) (factors is discarded in the solve phase):    %d\n", mumps->id.ICNTL(31)));
2176       PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(33) (compute determinant):                        %d\n", mumps->id.ICNTL(33)));
2177       PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(35) (activate BLR based factorization):           %d\n", mumps->id.ICNTL(35)));
2178       PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(36) (choice of BLR factorization variant):        %d\n", mumps->id.ICNTL(36)));
2179       PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(38) (estimated compression rate of LU factors):   %d\n", mumps->id.ICNTL(38)));
2180 
2181       PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(1) (relative pivoting threshold):      %g \n", mumps->id.CNTL(1)));
2182       PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(2) (stopping criterion of refinement): %g \n", mumps->id.CNTL(2)));
2183       PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(3) (absolute pivoting threshold):      %g \n", mumps->id.CNTL(3)));
2184       PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(4) (value of static pivoting):         %g \n", mumps->id.CNTL(4)));
2185       PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(5) (fixation for null pivots):         %g \n", mumps->id.CNTL(5)));
2186       PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(7) (dropping parameter for BLR):       %g \n", mumps->id.CNTL(7)));
2187 
2188       /* information local to each processor */
2189       PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(1) (local estimated flops for the elimination after analysis): \n"));
2190       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
2191       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %g \n", mumps->myid, mumps->id.RINFO(1)));
2192       PetscCall(PetscViewerFlush(viewer));
2193       PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(2) (local estimated flops for the assembly after factorization): \n"));
2194       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d]  %g \n", mumps->myid, mumps->id.RINFO(2)));
2195       PetscCall(PetscViewerFlush(viewer));
2196       PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(3) (local estimated flops for the elimination after factorization): \n"));
2197       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d]  %g \n", mumps->myid, mumps->id.RINFO(3)));
2198       PetscCall(PetscViewerFlush(viewer));
2199 
2200       PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization): \n"));
2201       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "  [%d] %d\n", mumps->myid, mumps->id.INFO(15)));
2202       PetscCall(PetscViewerFlush(viewer));
2203 
2204       PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization): \n"));
2205       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(16)));
2206       PetscCall(PetscViewerFlush(viewer));
2207 
2208       PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(23) (num of pivots eliminated on this processor after factorization): \n"));
2209       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(23)));
2210       PetscCall(PetscViewerFlush(viewer));
2211 
2212       if (mumps->ninfo && mumps->ninfo <= 80) {
2213         PetscInt i;
2214         for (i = 0; i < mumps->ninfo; i++) {
2215           PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(%" PetscInt_FMT "): \n", mumps->info[i]));
2216           PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(mumps->info[i])));
2217           PetscCall(PetscViewerFlush(viewer));
2218         }
2219       }
2220       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
2221 
2222       if (!mumps->myid) { /* information from the host */
2223         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFOG(1) (global estimated flops for the elimination after analysis): %g \n", mumps->id.RINFOG(1)));
2224         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFOG(2) (global estimated flops for the assembly after factorization): %g \n", mumps->id.RINFOG(2)));
2225         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFOG(3) (global estimated flops for the elimination after factorization): %g \n", mumps->id.RINFOG(3)));
2226         PetscCall(PetscViewerASCIIPrintf(viewer, "  (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n", mumps->id.RINFOG(12), mumps->id.RINFOG(13), mumps->id.INFOG(34)));
2227 
2228         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(3)));
2229         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(4)));
2230         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(5) (estimated maximum front size in the complete tree): %d\n", mumps->id.INFOG(5)));
2231         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(6) (number of nodes in the complete tree): %d\n", mumps->id.INFOG(6)));
2232         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(7) (ordering option effectively used after analysis): %d\n", mumps->id.INFOG(7)));
2233         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n", mumps->id.INFOG(8)));
2234         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n", mumps->id.INFOG(9)));
2235         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(10) (total integer space store the matrix factors after factorization): %d\n", mumps->id.INFOG(10)));
2236         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(11) (order of largest frontal matrix after factorization): %d\n", mumps->id.INFOG(11)));
2237         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(12) (number of off-diagonal pivots): %d\n", mumps->id.INFOG(12)));
2238         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(13) (number of delayed pivots after factorization): %d\n", mumps->id.INFOG(13)));
2239         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(14) (number of memory compress after factorization): %d\n", mumps->id.INFOG(14)));
2240         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(15) (number of steps of iterative refinement after solution): %d\n", mumps->id.INFOG(15)));
2241         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n", mumps->id.INFOG(16)));
2242         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n", mumps->id.INFOG(17)));
2243         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n", mumps->id.INFOG(18)));
2244         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n", mumps->id.INFOG(19)));
2245         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(20) (estimated number of entries in the factors): %d\n", mumps->id.INFOG(20)));
2246         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n", mumps->id.INFOG(21)));
2247         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n", mumps->id.INFOG(22)));
2248         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n", mumps->id.INFOG(23)));
2249         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n", mumps->id.INFOG(24)));
2250         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n", mumps->id.INFOG(25)));
2251         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(28) (after factorization: number of null pivots encountered): %d\n", mumps->id.INFOG(28)));
2252         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n", mumps->id.INFOG(29)));
2253         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n", mumps->id.INFOG(30), mumps->id.INFOG(31)));
2254         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(32) (after analysis: type of analysis done): %d\n", mumps->id.INFOG(32)));
2255         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(33) (value used for ICNTL(8)): %d\n", mumps->id.INFOG(33)));
2256         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(34) (exponent of the determinant if determinant is requested): %d\n", mumps->id.INFOG(34)));
2257         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n", mumps->id.INFOG(35)));
2258         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(36)));
2259         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n", mumps->id.INFOG(37)));
2260         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(38)));
2261         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n", mumps->id.INFOG(39)));
2262       }
2263     }
2264   }
2265   PetscFunctionReturn(0);
2266 }
2267 
2268 PetscErrorCode MatGetInfo_MUMPS(Mat A, MatInfoType flag, MatInfo *info) {
2269   Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;
2270 
2271   PetscFunctionBegin;
2272   info->block_size        = 1.0;
2273   info->nz_allocated      = mumps->id.INFOG(20);
2274   info->nz_used           = mumps->id.INFOG(20);
2275   info->nz_unneeded       = 0.0;
2276   info->assemblies        = 0.0;
2277   info->mallocs           = 0.0;
2278   info->memory            = 0.0;
2279   info->fill_ratio_given  = 0;
2280   info->fill_ratio_needed = 0;
2281   info->factor_mallocs    = 0;
2282   PetscFunctionReturn(0);
2283 }
2284 
2285 /* -------------------------------------------------------------------------------------------*/
2286 PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is) {
2287   Mat_MUMPS         *mumps = (Mat_MUMPS *)F->data;
2288   const PetscScalar *arr;
2289   const PetscInt    *idxs;
2290   PetscInt           size, i;
2291 
2292   PetscFunctionBegin;
2293   PetscCall(ISGetLocalSize(is, &size));
2294   /* Schur complement matrix */
2295   PetscCall(MatDestroy(&F->schur));
2296   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size, size, NULL, &F->schur));
2297   PetscCall(MatDenseGetArrayRead(F->schur, &arr));
2298   mumps->id.schur      = (MumpsScalar *)arr;
2299   mumps->id.size_schur = size;
2300   mumps->id.schur_lld  = size;
2301   PetscCall(MatDenseRestoreArrayRead(F->schur, &arr));
2302   if (mumps->sym == 1) { PetscCall(MatSetOption(F->schur, MAT_SPD, PETSC_TRUE)); }
2303 
2304   /* MUMPS expects Fortran style indices */
2305   PetscCall(PetscFree(mumps->id.listvar_schur));
2306   PetscCall(PetscMalloc1(size, &mumps->id.listvar_schur));
2307   PetscCall(ISGetIndices(is, &idxs));
2308   for (i = 0; i < size; i++) PetscCall(PetscMUMPSIntCast(idxs[i] + 1, &(mumps->id.listvar_schur[i])));
2309   PetscCall(ISRestoreIndices(is, &idxs));
2310   /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */
2311   mumps->id.ICNTL(26) = -1;
2312   PetscFunctionReturn(0);
2313 }
2314 
2315 /* -------------------------------------------------------------------------------------------*/
2316 PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F, Mat *S) {
2317   Mat          St;
2318   Mat_MUMPS   *mumps = (Mat_MUMPS *)F->data;
2319   PetscScalar *array;
2320 #if defined(PETSC_USE_COMPLEX)
2321   PetscScalar im = PetscSqrtScalar((PetscScalar)-1.0);
2322 #endif
2323 
2324   PetscFunctionBegin;
2325   PetscCheck(mumps->id.ICNTL(19), PetscObjectComm((PetscObject)F), PETSC_ERR_ORDER, "Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it");
2326   PetscCall(MatCreate(PETSC_COMM_SELF, &St));
2327   PetscCall(MatSetSizes(St, PETSC_DECIDE, PETSC_DECIDE, mumps->id.size_schur, mumps->id.size_schur));
2328   PetscCall(MatSetType(St, MATDENSE));
2329   PetscCall(MatSetUp(St));
2330   PetscCall(MatDenseGetArray(St, &array));
2331   if (!mumps->sym) {                /* MUMPS always return a full matrix */
2332     if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
2333       PetscInt i, j, N = mumps->id.size_schur;
2334       for (i = 0; i < N; i++) {
2335         for (j = 0; j < N; j++) {
2336 #if !defined(PETSC_USE_COMPLEX)
2337           PetscScalar val = mumps->id.schur[i * N + j];
2338 #else
2339           PetscScalar val = mumps->id.schur[i * N + j].r + im * mumps->id.schur[i * N + j].i;
2340 #endif
2341           array[j * N + i] = val;
2342         }
2343       }
2344     } else { /* stored by columns */
2345       PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur));
2346     }
2347   } else {                          /* either full or lower-triangular (not packed) */
2348     if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */
2349       PetscInt i, j, N = mumps->id.size_schur;
2350       for (i = 0; i < N; i++) {
2351         for (j = i; j < N; j++) {
2352 #if !defined(PETSC_USE_COMPLEX)
2353           PetscScalar val = mumps->id.schur[i * N + j];
2354 #else
2355           PetscScalar val = mumps->id.schur[i * N + j].r + im * mumps->id.schur[i * N + j].i;
2356 #endif
2357           array[i * N + j] = val;
2358           array[j * N + i] = val;
2359         }
2360       }
2361     } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */
2362       PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur));
2363     } else { /* ICNTL(19) == 1 lower triangular stored by rows */
2364       PetscInt i, j, N = mumps->id.size_schur;
2365       for (i = 0; i < N; i++) {
2366         for (j = 0; j < i + 1; j++) {
2367 #if !defined(PETSC_USE_COMPLEX)
2368           PetscScalar val = mumps->id.schur[i * N + j];
2369 #else
2370           PetscScalar val = mumps->id.schur[i * N + j].r + im * mumps->id.schur[i * N + j].i;
2371 #endif
2372           array[i * N + j] = val;
2373           array[j * N + i] = val;
2374         }
2375       }
2376     }
2377   }
2378   PetscCall(MatDenseRestoreArray(St, &array));
2379   *S = St;
2380   PetscFunctionReturn(0);
2381 }
2382 
2383 /* -------------------------------------------------------------------------------------------*/
2384 PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt ival) {
2385   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2386 
2387   PetscFunctionBegin;
2388   if (mumps->id.job == JOB_NULL) {                                       /* need to cache icntl and ival since PetscMUMPS_c() has never been called */
2389     PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; /* number of already cached ICNTL */
2390     for (i = 0; i < nICNTL_pre; ++i)
2391       if (mumps->ICNTL_pre[1 + 2 * i] == icntl) break; /* is this ICNTL already cached? */
2392     if (i == nICNTL_pre) {                             /* not already cached */
2393       if (i > 0) PetscCall(PetscRealloc(sizeof(PetscMUMPSInt) * (2 * nICNTL_pre + 3), &mumps->ICNTL_pre));
2394       else PetscCall(PetscCalloc(sizeof(PetscMUMPSInt) * 3, &mumps->ICNTL_pre));
2395       mumps->ICNTL_pre[0]++;
2396     }
2397     mumps->ICNTL_pre[1 + 2 * i] = icntl;
2398     PetscCall(PetscMUMPSIntCast(ival, mumps->ICNTL_pre + 2 + 2 * i));
2399   } else PetscCall(PetscMUMPSIntCast(ival, &mumps->id.ICNTL(icntl)));
2400   PetscFunctionReturn(0);
2401 }
2402 
2403 PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt *ival) {
2404   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2405 
2406   PetscFunctionBegin;
2407   *ival = mumps->id.ICNTL(icntl);
2408   PetscFunctionReturn(0);
2409 }
2410 
2411 /*@
2412   MatMumpsSetIcntl - Set MUMPS parameter ICNTL()
2413 
2414    Logically Collective on Mat
2415 
2416    Input Parameters:
2417 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2418 .  icntl - index of MUMPS parameter array ICNTL()
2419 -  ival - value of MUMPS ICNTL(icntl)
2420 
2421   Options Database:
2422 .   -mat_mumps_icntl_<icntl> <ival> - change the option numbered icntl to ival
2423 
2424    Level: beginner
2425 
2426    References:
2427 .  * - MUMPS Users' Guide
2428 
2429 .seealso: `MatGetFactor()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2430 @*/
2431 PetscErrorCode MatMumpsSetIcntl(Mat F, PetscInt icntl, PetscInt ival) {
2432   PetscFunctionBegin;
2433   PetscValidType(F, 1);
2434   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2435   PetscValidLogicalCollectiveInt(F, icntl, 2);
2436   PetscValidLogicalCollectiveInt(F, ival, 3);
2437   PetscCheck(icntl >= 1 && icntl <= 38, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl);
2438   PetscTryMethod(F, "MatMumpsSetIcntl_C", (Mat, PetscInt, PetscInt), (F, icntl, ival));
2439   PetscFunctionReturn(0);
2440 }
2441 
2442 /*@
2443   MatMumpsGetIcntl - Get MUMPS parameter ICNTL()
2444 
2445    Logically Collective on Mat
2446 
2447    Input Parameters:
2448 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2449 -  icntl - index of MUMPS parameter array ICNTL()
2450 
2451   Output Parameter:
2452 .  ival - value of MUMPS ICNTL(icntl)
2453 
2454    Level: beginner
2455 
2456    References:
2457 .  * - MUMPS Users' Guide
2458 
2459 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2460 @*/
2461 PetscErrorCode MatMumpsGetIcntl(Mat F, PetscInt icntl, PetscInt *ival) {
2462   PetscFunctionBegin;
2463   PetscValidType(F, 1);
2464   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2465   PetscValidLogicalCollectiveInt(F, icntl, 2);
2466   PetscValidIntPointer(ival, 3);
2467   PetscCheck(icntl >= 1 && icntl <= 38, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl);
2468   PetscUseMethod(F, "MatMumpsGetIcntl_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival));
2469   PetscFunctionReturn(0);
2470 }
2471 
2472 /* -------------------------------------------------------------------------------------------*/
2473 PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal val) {
2474   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2475 
2476   PetscFunctionBegin;
2477   if (mumps->id.job == JOB_NULL) {
2478     PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0;
2479     for (i = 0; i < nCNTL_pre; ++i)
2480       if (mumps->CNTL_pre[1 + 2 * i] == icntl) break;
2481     if (i == nCNTL_pre) {
2482       if (i > 0) PetscCall(PetscRealloc(sizeof(PetscReal) * (2 * nCNTL_pre + 3), &mumps->CNTL_pre));
2483       else PetscCall(PetscCalloc(sizeof(PetscReal) * 3, &mumps->CNTL_pre));
2484       mumps->CNTL_pre[0]++;
2485     }
2486     mumps->CNTL_pre[1 + 2 * i] = icntl;
2487     mumps->CNTL_pre[2 + 2 * i] = val;
2488   } else mumps->id.CNTL(icntl) = val;
2489   PetscFunctionReturn(0);
2490 }
2491 
2492 PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal *val) {
2493   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2494 
2495   PetscFunctionBegin;
2496   *val = mumps->id.CNTL(icntl);
2497   PetscFunctionReturn(0);
2498 }
2499 
2500 /*@
2501   MatMumpsSetCntl - Set MUMPS parameter CNTL()
2502 
2503    Logically Collective on Mat
2504 
2505    Input Parameters:
2506 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2507 .  icntl - index of MUMPS parameter array CNTL()
2508 -  val - value of MUMPS CNTL(icntl)
2509 
2510   Options Database:
2511 .   -mat_mumps_cntl_<icntl> <val>  - change the option numbered icntl to ival
2512 
2513    Level: beginner
2514 
2515    References:
2516 .  * - MUMPS Users' Guide
2517 
2518 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2519 @*/
2520 PetscErrorCode MatMumpsSetCntl(Mat F, PetscInt icntl, PetscReal val) {
2521   PetscFunctionBegin;
2522   PetscValidType(F, 1);
2523   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2524   PetscValidLogicalCollectiveInt(F, icntl, 2);
2525   PetscValidLogicalCollectiveReal(F, val, 3);
2526   PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl);
2527   PetscTryMethod(F, "MatMumpsSetCntl_C", (Mat, PetscInt, PetscReal), (F, icntl, val));
2528   PetscFunctionReturn(0);
2529 }
2530 
2531 /*@
2532   MatMumpsGetCntl - Get MUMPS parameter CNTL()
2533 
2534    Logically Collective on Mat
2535 
2536    Input Parameters:
2537 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2538 -  icntl - index of MUMPS parameter array CNTL()
2539 
2540   Output Parameter:
2541 .  val - value of MUMPS CNTL(icntl)
2542 
2543    Level: beginner
2544 
2545    References:
2546 .  * - MUMPS Users' Guide
2547 
2548 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2549 @*/
2550 PetscErrorCode MatMumpsGetCntl(Mat F, PetscInt icntl, PetscReal *val) {
2551   PetscFunctionBegin;
2552   PetscValidType(F, 1);
2553   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2554   PetscValidLogicalCollectiveInt(F, icntl, 2);
2555   PetscValidRealPointer(val, 3);
2556   PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl);
2557   PetscUseMethod(F, "MatMumpsGetCntl_C", (Mat, PetscInt, PetscReal *), (F, icntl, val));
2558   PetscFunctionReturn(0);
2559 }
2560 
2561 PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F, PetscInt icntl, PetscInt *info) {
2562   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2563 
2564   PetscFunctionBegin;
2565   *info = mumps->id.INFO(icntl);
2566   PetscFunctionReturn(0);
2567 }
2568 
2569 PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F, PetscInt icntl, PetscInt *infog) {
2570   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2571 
2572   PetscFunctionBegin;
2573   *infog = mumps->id.INFOG(icntl);
2574   PetscFunctionReturn(0);
2575 }
2576 
2577 PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfo) {
2578   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2579 
2580   PetscFunctionBegin;
2581   *rinfo = mumps->id.RINFO(icntl);
2582   PetscFunctionReturn(0);
2583 }
2584 
2585 PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfog) {
2586   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2587 
2588   PetscFunctionBegin;
2589   *rinfog = mumps->id.RINFOG(icntl);
2590   PetscFunctionReturn(0);
2591 }
2592 
2593 PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F, Mat spRHS) {
2594   Mat          Bt = NULL, Btseq = NULL;
2595   PetscBool    flg;
2596   Mat_MUMPS   *mumps = (Mat_MUMPS *)F->data;
2597   PetscScalar *aa;
2598   PetscInt     spnr, *ia, *ja, M, nrhs;
2599 
2600   PetscFunctionBegin;
2601   PetscValidPointer(spRHS, 2);
2602   PetscCall(PetscObjectTypeCompare((PetscObject)spRHS, MATTRANSPOSEMAT, &flg));
2603   if (flg) {
2604     PetscCall(MatTransposeGetMat(spRHS, &Bt));
2605   } else SETERRQ(PetscObjectComm((PetscObject)spRHS), PETSC_ERR_ARG_WRONG, "Matrix spRHS must be type MATTRANSPOSEMAT matrix");
2606 
2607   PetscCall(MatMumpsSetIcntl(F, 30, 1));
2608 
2609   if (mumps->petsc_size > 1) {
2610     Mat_MPIAIJ *b = (Mat_MPIAIJ *)Bt->data;
2611     Btseq         = b->A;
2612   } else {
2613     Btseq = Bt;
2614   }
2615 
2616   PetscCall(MatGetSize(spRHS, &M, &nrhs));
2617   mumps->id.nrhs = nrhs;
2618   mumps->id.lrhs = M;
2619   mumps->id.rhs  = NULL;
2620 
2621   if (!mumps->myid) {
2622     PetscCall(MatSeqAIJGetArray(Btseq, &aa));
2623     PetscCall(MatGetRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
2624     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
2625     PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs));
2626     mumps->id.rhs_sparse = (MumpsScalar *)aa;
2627   } else {
2628     mumps->id.irhs_ptr    = NULL;
2629     mumps->id.irhs_sparse = NULL;
2630     mumps->id.nz_rhs      = 0;
2631     mumps->id.rhs_sparse  = NULL;
2632   }
2633   mumps->id.ICNTL(20) = 1; /* rhs is sparse */
2634   mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */
2635 
2636   /* solve phase */
2637   /*-------------*/
2638   mumps->id.job = JOB_SOLVE;
2639   PetscMUMPS_c(mumps);
2640   PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d INFO(2)=%d", mumps->id.INFOG(1), mumps->id.INFO(2));
2641 
2642   if (!mumps->myid) {
2643     PetscCall(MatSeqAIJRestoreArray(Btseq, &aa));
2644     PetscCall(MatRestoreRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
2645     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
2646   }
2647   PetscFunctionReturn(0);
2648 }
2649 
2650 /*@
2651   MatMumpsGetInverse - Get user-specified set of entries in inverse of A
2652 
2653    Logically Collective on Mat
2654 
2655    Input Parameters:
2656 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2657 -  spRHS - sequential sparse matrix in MATTRANSPOSEMAT format holding specified indices in processor[0]
2658 
2659   Output Parameter:
2660 . spRHS - requested entries of inverse of A
2661 
2662    Level: beginner
2663 
2664    References:
2665 .  * - MUMPS Users' Guide
2666 
2667 .seealso: `MatGetFactor()`, `MatCreateTranspose()`
2668 @*/
2669 PetscErrorCode MatMumpsGetInverse(Mat F, Mat spRHS) {
2670   PetscFunctionBegin;
2671   PetscValidType(F, 1);
2672   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2673   PetscUseMethod(F, "MatMumpsGetInverse_C", (Mat, Mat), (F, spRHS));
2674   PetscFunctionReturn(0);
2675 }
2676 
2677 PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F, Mat spRHST) {
2678   Mat spRHS;
2679 
2680   PetscFunctionBegin;
2681   PetscCall(MatCreateTranspose(spRHST, &spRHS));
2682   PetscCall(MatMumpsGetInverse_MUMPS(F, spRHS));
2683   PetscCall(MatDestroy(&spRHS));
2684   PetscFunctionReturn(0);
2685 }
2686 
2687 /*@
2688   MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix A^T
2689 
2690    Logically Collective on Mat
2691 
2692    Input Parameters:
2693 +  F - the factored matrix of A obtained by calling MatGetFactor() from PETSc-MUMPS interface
2694 -  spRHST - sequential sparse matrix in MATAIJ format holding specified indices of A^T in processor[0]
2695 
2696   Output Parameter:
2697 . spRHST - requested entries of inverse of A^T
2698 
2699    Level: beginner
2700 
2701    References:
2702 .  * - MUMPS Users' Guide
2703 
2704 .seealso: `MatGetFactor()`, `MatCreateTranspose()`, `MatMumpsGetInverse()`
2705 @*/
2706 PetscErrorCode MatMumpsGetInverseTranspose(Mat F, Mat spRHST) {
2707   PetscBool flg;
2708 
2709   PetscFunctionBegin;
2710   PetscValidType(F, 1);
2711   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2712   PetscCall(PetscObjectTypeCompareAny((PetscObject)spRHST, &flg, MATSEQAIJ, MATMPIAIJ, NULL));
2713   PetscCheck(flg, PetscObjectComm((PetscObject)spRHST), PETSC_ERR_ARG_WRONG, "Matrix spRHST must be MATAIJ matrix");
2714 
2715   PetscUseMethod(F, "MatMumpsGetInverseTranspose_C", (Mat, Mat), (F, spRHST));
2716   PetscFunctionReturn(0);
2717 }
2718 
2719 /*@
2720   MatMumpsGetInfo - Get MUMPS parameter INFO()
2721 
2722    Logically Collective on Mat
2723 
2724    Input Parameters:
2725 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2726 -  icntl - index of MUMPS parameter array INFO()
2727 
2728   Output Parameter:
2729 .  ival - value of MUMPS INFO(icntl)
2730 
2731    Level: beginner
2732 
2733    References:
2734 .  * - MUMPS Users' Guide
2735 
2736 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2737 @*/
2738 PetscErrorCode MatMumpsGetInfo(Mat F, PetscInt icntl, PetscInt *ival) {
2739   PetscFunctionBegin;
2740   PetscValidType(F, 1);
2741   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2742   PetscValidIntPointer(ival, 3);
2743   PetscUseMethod(F, "MatMumpsGetInfo_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival));
2744   PetscFunctionReturn(0);
2745 }
2746 
2747 /*@
2748   MatMumpsGetInfog - Get MUMPS parameter INFOG()
2749 
2750    Logically Collective on Mat
2751 
2752    Input Parameters:
2753 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2754 -  icntl - index of MUMPS parameter array INFOG()
2755 
2756   Output Parameter:
2757 .  ival - value of MUMPS INFOG(icntl)
2758 
2759    Level: beginner
2760 
2761    References:
2762 .  * - MUMPS Users' Guide
2763 
2764 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2765 @*/
2766 PetscErrorCode MatMumpsGetInfog(Mat F, PetscInt icntl, PetscInt *ival) {
2767   PetscFunctionBegin;
2768   PetscValidType(F, 1);
2769   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2770   PetscValidIntPointer(ival, 3);
2771   PetscUseMethod(F, "MatMumpsGetInfog_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival));
2772   PetscFunctionReturn(0);
2773 }
2774 
2775 /*@
2776   MatMumpsGetRinfo - Get MUMPS parameter RINFO()
2777 
2778    Logically Collective on Mat
2779 
2780    Input Parameters:
2781 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2782 -  icntl - index of MUMPS parameter array RINFO()
2783 
2784   Output Parameter:
2785 .  val - value of MUMPS RINFO(icntl)
2786 
2787    Level: beginner
2788 
2789    References:
2790 .  * - MUMPS Users' Guide
2791 
2792 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfog()`
2793 @*/
2794 PetscErrorCode MatMumpsGetRinfo(Mat F, PetscInt icntl, PetscReal *val) {
2795   PetscFunctionBegin;
2796   PetscValidType(F, 1);
2797   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2798   PetscValidRealPointer(val, 3);
2799   PetscUseMethod(F, "MatMumpsGetRinfo_C", (Mat, PetscInt, PetscReal *), (F, icntl, val));
2800   PetscFunctionReturn(0);
2801 }
2802 
2803 /*@
2804   MatMumpsGetRinfog - Get MUMPS parameter RINFOG()
2805 
2806    Logically Collective on Mat
2807 
2808    Input Parameters:
2809 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2810 -  icntl - index of MUMPS parameter array RINFOG()
2811 
2812   Output Parameter:
2813 .  val - value of MUMPS RINFOG(icntl)
2814 
2815    Level: beginner
2816 
2817    References:
2818 .  * - MUMPS Users' Guide
2819 
2820 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`
2821 @*/
2822 PetscErrorCode MatMumpsGetRinfog(Mat F, PetscInt icntl, PetscReal *val) {
2823   PetscFunctionBegin;
2824   PetscValidType(F, 1);
2825   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2826   PetscValidRealPointer(val, 3);
2827   PetscUseMethod(F, "MatMumpsGetRinfog_C", (Mat, PetscInt, PetscReal *), (F, icntl, val));
2828   PetscFunctionReturn(0);
2829 }
2830 
2831 /*MC
2832   MATSOLVERMUMPS -  A matrix type providing direct solvers (LU and Cholesky) for
2833   distributed and sequential matrices via the external package MUMPS.
2834 
2835   Works with MATAIJ and MATSBAIJ matrices
2836 
2837   Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS
2838 
2839   Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. See details below.
2840 
2841   Use -pc_type cholesky or lu -pc_factor_mat_solver_type mumps to use this direct solver
2842 
2843   Options Database Keys:
2844 +  -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages
2845 .  -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning
2846 .  -mat_mumps_icntl_3 -  ICNTL(3): output stream for global information, collected on the host
2847 .  -mat_mumps_icntl_4 -  ICNTL(4): level of printing (0 to 4)
2848 .  -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)
2849 .  -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto
2850                         Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only)
2851 .  -mat_mumps_icntl_8  - ICNTL(8): scaling strategy (-2 to 8 or 77)
2852 .  -mat_mumps_icntl_10  - ICNTL(10): max num of refinements
2853 .  -mat_mumps_icntl_11  - ICNTL(11): statistics related to an error analysis (via -ksp_view)
2854 .  -mat_mumps_icntl_12  - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)
2855 .  -mat_mumps_icntl_13  - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting
2856 .  -mat_mumps_icntl_14  - ICNTL(14): percentage increase in the estimated working space
2857 .  -mat_mumps_icntl_15  - ICNTL(15): compression of the input matrix resulting from a block format
2858 .  -mat_mumps_icntl_19  - ICNTL(19): computes the Schur complement
2859 .  -mat_mumps_icntl_20  - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS
2860 .  -mat_mumps_icntl_22  - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)
2861 .  -mat_mumps_icntl_23  - ICNTL(23): max size of the working memory (MB) that can allocate per processor
2862 .  -mat_mumps_icntl_24  - ICNTL(24): detection of null pivot rows (0 or 1)
2863 .  -mat_mumps_icntl_25  - ICNTL(25): compute a solution of a deficient matrix and a null space basis
2864 .  -mat_mumps_icntl_26  - ICNTL(26): drives the solution phase if a Schur complement matrix
2865 .  -mat_mumps_icntl_28  - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering
2866 .  -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis
2867 .  -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A)
2868 .  -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization
2869 .  -mat_mumps_icntl_33 - ICNTL(33): compute determinant
2870 .  -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature
2871 .  -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant
2872 .  -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR
2873 .  -mat_mumps_cntl_1  - CNTL(1): relative pivoting threshold
2874 .  -mat_mumps_cntl_2  -  CNTL(2): stopping criterion of refinement
2875 .  -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold
2876 .  -mat_mumps_cntl_4 - CNTL(4): value for static pivoting
2877 .  -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots
2878 .  -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization
2879 -  -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS.
2880                                    Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual.
2881 
2882   Level: beginner
2883 
2884     Notes:
2885     MUMPS Cholesky does not handle (complex) Hermitian matrices http://mumps.enseeiht.fr/doc/userguide_5.2.1.pdf so using it will error if the matrix is Hermitian.
2886 
2887     When used within a `KSP`/`PC` solve the options are prefixed with that of the `PC`. Otherwise one can set the options prefix by calling
2888     `MatSetOptionsPrefixFactor()` on the matrix from which the factor was obtained or `MatSetOptionsPrefix()` on the factor matrix.
2889 
2890     When a MUMPS factorization fails inside a KSP solve, for example with a KSP_DIVERGED_PC_FAILED, one can find the MUMPS information about the failure by calling
2891 $          KSPGetPC(ksp,&pc);
2892 $          PCFactorGetMatrix(pc,&mat);
2893 $          MatMumpsGetInfo(mat,....);
2894 $          MatMumpsGetInfog(mat,....); etc.
2895            Or you can run with -ksp_error_if_not_converged and the program will be stopped and the information printed in the error message.
2896 
2897   Using MUMPS with 64-bit integers
2898     MUMPS provides 64-bit integer support in two build modes:
2899       full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and
2900       requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI).
2901 
2902       selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices,
2903       MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and
2904       columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit
2905       integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS.
2906 
2907     With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc.
2908 
2909   Two modes to run MUMPS/PETSc with OpenMP
2910 $     Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP
2911 $     threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test".
2912 
2913 $     -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example,
2914 $     if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16"
2915 
2916    To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part
2917    (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with --with-openmp --download-hwloc
2918    (or --with-hwloc), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS
2919    libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS
2920    (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided).
2921 
2922    If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI
2923    processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of
2924    size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm
2925    are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set
2926    by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs.
2927    In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets,
2928    if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind
2929    MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half
2930    cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the
2931    problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding.
2932    For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and
2933    examine the mapping result.
2934 
2935    PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set OMP_PROC_BIND and OMP_PLACES in job scripts,
2936    for example, export OMP_PLACES=threads and export OMP_PROC_BIND=spread. One does not need to export OMP_NUM_THREADS=m in job scripts as PETSc
2937    calls omp_set_num_threads(m) internally before calling MUMPS.
2938 
2939    References:
2940 +  * - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011).
2941 -  * - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017.
2942 
2943 .seealso: `PCFactorSetMatSolverType()`, `MatSolverType`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`, `KSPGetPC()`, `PCFactorGetMatrix()`
2944 
2945 M*/
2946 
2947 static PetscErrorCode MatFactorGetSolverType_mumps(Mat A, MatSolverType *type) {
2948   PetscFunctionBegin;
2949   *type = MATSOLVERMUMPS;
2950   PetscFunctionReturn(0);
2951 }
2952 
2953 /* MatGetFactor for Seq and MPI AIJ matrices */
2954 static PetscErrorCode MatGetFactor_aij_mumps(Mat A, MatFactorType ftype, Mat *F) {
2955   Mat         B;
2956   Mat_MUMPS  *mumps;
2957   PetscBool   isSeqAIJ;
2958   PetscMPIInt size;
2959 
2960   PetscFunctionBegin;
2961 #if defined(PETSC_USE_COMPLEX)
2962   PetscCheck(A->hermitian != PETSC_BOOL3_TRUE || A->symmetric == PETSC_BOOL3_TRUE || ftype != MAT_FACTOR_CHOLESKY, PETSC_COMM_SELF, PETSC_ERR_SUP, "Hermitian CHOLESKY Factor is not supported");
2963 #endif
2964   /* Create the factorization matrix */
2965   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isSeqAIJ));
2966   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
2967   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
2968   PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name));
2969   PetscCall(MatSetUp(B));
2970 
2971   PetscCall(PetscNewLog(B, &mumps));
2972 
2973   B->ops->view    = MatView_MUMPS;
2974   B->ops->getinfo = MatGetInfo_MUMPS;
2975 
2976   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
2977   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
2978   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
2979   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
2980   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
2981   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
2982   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
2983   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
2984   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
2985   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
2986   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
2987   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
2988   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
2989 
2990   if (ftype == MAT_FACTOR_LU) {
2991     B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
2992     B->factortype            = MAT_FACTOR_LU;
2993     if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij;
2994     else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij;
2995     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]));
2996     mumps->sym = 0;
2997   } else {
2998     B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
2999     B->factortype                  = MAT_FACTOR_CHOLESKY;
3000     if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij;
3001     else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij;
3002     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY]));
3003 #if defined(PETSC_USE_COMPLEX)
3004     mumps->sym = 2;
3005 #else
3006     if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1;
3007     else mumps->sym = 2;
3008 #endif
3009   }
3010 
3011   /* set solvertype */
3012   PetscCall(PetscFree(B->solvertype));
3013   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
3014   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
3015   if (size == 1) {
3016     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3017     B->canuseordering = PETSC_TRUE;
3018   }
3019   B->ops->destroy = MatDestroy_MUMPS;
3020   B->data         = (void *)mumps;
3021 
3022   *F               = B;
3023   mumps->id.job    = JOB_NULL;
3024   mumps->ICNTL_pre = NULL;
3025   mumps->CNTL_pre  = NULL;
3026   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
3027   PetscFunctionReturn(0);
3028 }
3029 
3030 /* MatGetFactor for Seq and MPI SBAIJ matrices */
3031 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A, MatFactorType ftype, Mat *F) {
3032   Mat         B;
3033   Mat_MUMPS  *mumps;
3034   PetscBool   isSeqSBAIJ;
3035   PetscMPIInt size;
3036 
3037   PetscFunctionBegin;
3038 #if defined(PETSC_USE_COMPLEX)
3039   PetscCheck(A->hermitian != PETSC_BOOL3_TRUE || A->symmetric == PETSC_BOOL3_TRUE, PETSC_COMM_SELF, PETSC_ERR_SUP, "Hermitian CHOLESKY Factor is not supported");
3040 #endif
3041   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
3042   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
3043   PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name));
3044   PetscCall(MatSetUp(B));
3045 
3046   PetscCall(PetscNewLog(B, &mumps));
3047   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &isSeqSBAIJ));
3048   if (isSeqSBAIJ) {
3049     mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij;
3050   } else {
3051     mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij;
3052   }
3053 
3054   B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
3055   B->ops->view                   = MatView_MUMPS;
3056   B->ops->getinfo                = MatGetInfo_MUMPS;
3057 
3058   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
3059   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
3060   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
3061   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
3062   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
3063   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
3064   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
3065   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
3066   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
3067   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
3068   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
3069   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
3070   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
3071 
3072   B->factortype = MAT_FACTOR_CHOLESKY;
3073 #if defined(PETSC_USE_COMPLEX)
3074   mumps->sym = 2;
3075 #else
3076   if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1;
3077   else mumps->sym = 2;
3078 #endif
3079 
3080   /* set solvertype */
3081   PetscCall(PetscFree(B->solvertype));
3082   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
3083   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
3084   if (size == 1) {
3085     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3086     B->canuseordering = PETSC_TRUE;
3087   }
3088   PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY]));
3089   B->ops->destroy = MatDestroy_MUMPS;
3090   B->data         = (void *)mumps;
3091 
3092   *F               = B;
3093   mumps->id.job    = JOB_NULL;
3094   mumps->ICNTL_pre = NULL;
3095   mumps->CNTL_pre  = NULL;
3096   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
3097   PetscFunctionReturn(0);
3098 }
3099 
3100 static PetscErrorCode MatGetFactor_baij_mumps(Mat A, MatFactorType ftype, Mat *F) {
3101   Mat         B;
3102   Mat_MUMPS  *mumps;
3103   PetscBool   isSeqBAIJ;
3104   PetscMPIInt size;
3105 
3106   PetscFunctionBegin;
3107   /* Create the factorization matrix */
3108   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &isSeqBAIJ));
3109   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
3110   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
3111   PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name));
3112   PetscCall(MatSetUp(B));
3113 
3114   PetscCall(PetscNewLog(B, &mumps));
3115   if (ftype == MAT_FACTOR_LU) {
3116     B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS;
3117     B->factortype            = MAT_FACTOR_LU;
3118     if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij;
3119     else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij;
3120     mumps->sym = 0;
3121     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]));
3122   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead");
3123 
3124   B->ops->view    = MatView_MUMPS;
3125   B->ops->getinfo = MatGetInfo_MUMPS;
3126 
3127   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
3128   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
3129   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
3130   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
3131   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
3132   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
3133   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
3134   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
3135   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
3136   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
3137   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
3138   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
3139   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
3140 
3141   /* set solvertype */
3142   PetscCall(PetscFree(B->solvertype));
3143   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
3144   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
3145   if (size == 1) {
3146     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3147     B->canuseordering = PETSC_TRUE;
3148   }
3149   B->ops->destroy = MatDestroy_MUMPS;
3150   B->data         = (void *)mumps;
3151 
3152   *F               = B;
3153   mumps->id.job    = JOB_NULL;
3154   mumps->ICNTL_pre = NULL;
3155   mumps->CNTL_pre  = NULL;
3156   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
3157   PetscFunctionReturn(0);
3158 }
3159 
3160 /* MatGetFactor for Seq and MPI SELL matrices */
3161 static PetscErrorCode MatGetFactor_sell_mumps(Mat A, MatFactorType ftype, Mat *F) {
3162   Mat         B;
3163   Mat_MUMPS  *mumps;
3164   PetscBool   isSeqSELL;
3165   PetscMPIInt size;
3166 
3167   PetscFunctionBegin;
3168   /* Create the factorization matrix */
3169   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSELL, &isSeqSELL));
3170   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
3171   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
3172   PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name));
3173   PetscCall(MatSetUp(B));
3174 
3175   PetscCall(PetscNewLog(B, &mumps));
3176 
3177   B->ops->view    = MatView_MUMPS;
3178   B->ops->getinfo = MatGetInfo_MUMPS;
3179 
3180   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
3181   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
3182   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
3183   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
3184   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
3185   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
3186   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
3187   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
3188   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
3189   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
3190   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
3191 
3192   if (ftype == MAT_FACTOR_LU) {
3193     B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
3194     B->factortype            = MAT_FACTOR_LU;
3195     if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij;
3196     else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented");
3197     mumps->sym = 0;
3198     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]));
3199   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented");
3200 
3201   /* set solvertype */
3202   PetscCall(PetscFree(B->solvertype));
3203   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
3204   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
3205   if (size == 1) {
3206     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization  */
3207     B->canuseordering = PETSC_TRUE;
3208   }
3209   B->ops->destroy = MatDestroy_MUMPS;
3210   B->data         = (void *)mumps;
3211 
3212   *F               = B;
3213   mumps->id.job    = JOB_NULL;
3214   mumps->ICNTL_pre = NULL;
3215   mumps->CNTL_pre  = NULL;
3216   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
3217   PetscFunctionReturn(0);
3218 }
3219 
3220 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void) {
3221   PetscFunctionBegin;
3222   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
3223   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
3224   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps));
3225   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps));
3226   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPISBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps));
3227   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
3228   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
3229   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps));
3230   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps));
3231   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps));
3232   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSELL, MAT_FACTOR_LU, MatGetFactor_sell_mumps));
3233   PetscFunctionReturn(0);
3234 }
3235