xref: /petsc/src/mat/impls/aij/mpi/mumps/mumps.c (revision 965f1a8a98153c626e4c8fc06eebd096d79fdff5)
1 
2 /*
3     Provides an interface to the MUMPS sparse solver
4 */
5 #include <petscpkg_version.h>
6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I  "petscmat.h"  I*/
7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
8 #include <../src/mat/impls/sell/mpi/mpisell.h>
9 
10 EXTERN_C_BEGIN
11 #if defined(PETSC_USE_COMPLEX)
12   #if defined(PETSC_USE_REAL_SINGLE)
13     #include <cmumps_c.h>
14   #else
15     #include <zmumps_c.h>
16   #endif
17 #else
18   #if defined(PETSC_USE_REAL_SINGLE)
19     #include <smumps_c.h>
20   #else
21     #include <dmumps_c.h>
22   #endif
23 #endif
24 EXTERN_C_END
25 #define JOB_INIT         -1
26 #define JOB_NULL         0
27 #define JOB_FACTSYMBOLIC 1
28 #define JOB_FACTNUMERIC  2
29 #define JOB_SOLVE        3
30 #define JOB_END          -2
31 
32 /* calls to MUMPS */
33 #if defined(PETSC_USE_COMPLEX)
34   #if defined(PETSC_USE_REAL_SINGLE)
35     #define MUMPS_c cmumps_c
36   #else
37     #define MUMPS_c zmumps_c
38   #endif
39 #else
40   #if defined(PETSC_USE_REAL_SINGLE)
41     #define MUMPS_c smumps_c
42   #else
43     #define MUMPS_c dmumps_c
44   #endif
45 #endif
46 
47 /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for
48    number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the
49    naming convention in PetscMPIInt, PetscBLASInt etc.
50 */
51 typedef MUMPS_INT PetscMUMPSInt;
52 
53 #if PETSC_PKG_MUMPS_VERSION_GE(5, 3, 0)
54   #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */
55     #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out"
56   #endif
57 #else
58   #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */
59     #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out"
60   #endif
61 #endif
62 
63 #define MPIU_MUMPSINT       MPI_INT
64 #define PETSC_MUMPS_INT_MAX 2147483647
65 #define PETSC_MUMPS_INT_MIN -2147483648
66 
67 /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/
68 static inline PetscErrorCode PetscMUMPSIntCast(PetscInt a, PetscMUMPSInt *b)
69 {
70   PetscFunctionBegin;
71 #if PetscDefined(USE_64BIT_INDICES)
72   PetscAssert(a <= PETSC_MUMPS_INT_MAX && a >= PETSC_MUMPS_INT_MIN, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt");
73 #endif
74   *b = (PetscMUMPSInt)(a);
75   PetscFunctionReturn(0);
76 }
77 
78 /* Put these utility routines here since they are only used in this file */
79 static inline PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems *PetscOptionsObject, const char opt[], const char text[], const char man[], PetscMUMPSInt currentvalue, PetscMUMPSInt *value, PetscBool *set, PetscMUMPSInt lb, PetscMUMPSInt ub)
80 {
81   PetscInt  myval;
82   PetscBool myset;
83   PetscFunctionBegin;
84   /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */
85   PetscCall(PetscOptionsInt_Private(PetscOptionsObject, opt, text, man, (PetscInt)currentvalue, &myval, &myset, lb, ub));
86   if (myset) PetscCall(PetscMUMPSIntCast(myval, value));
87   if (set) *set = myset;
88   PetscFunctionReturn(0);
89 }
90 #define PetscOptionsMUMPSInt(a, b, c, d, e, f) PetscOptionsMUMPSInt_Private(PetscOptionsObject, a, b, c, d, e, f, PETSC_MUMPS_INT_MIN, PETSC_MUMPS_INT_MAX)
91 
92 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */
93 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
94   #define PetscMUMPS_c(mumps) \
95     do { \
96       if (mumps->use_petsc_omp_support) { \
97         if (mumps->is_omp_master) { \
98           PetscCall(PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl)); \
99           MUMPS_c(&mumps->id); \
100           PetscCall(PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl)); \
101         } \
102         PetscCall(PetscOmpCtrlBarrier(mumps->omp_ctrl)); \
103         /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific      \
104          to processes, so we only Bcast info[1], an error code and leave others (since they do not have   \
105          an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82.                   \
106          omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \
107       */ \
108         PetscCallMPI(MPI_Bcast(mumps->id.infog, 40, MPIU_MUMPSINT, 0, mumps->omp_comm)); \
109         PetscCallMPI(MPI_Bcast(mumps->id.rinfog, 20, MPIU_REAL, 0, mumps->omp_comm)); \
110         PetscCallMPI(MPI_Bcast(mumps->id.info, 1, MPIU_MUMPSINT, 0, mumps->omp_comm)); \
111       } else { \
112         MUMPS_c(&mumps->id); \
113       } \
114     } while (0)
115 #else
116   #define PetscMUMPS_c(mumps) \
117     do { \
118       MUMPS_c(&mumps->id); \
119     } while (0)
120 #endif
121 
122 /* declare MumpsScalar */
123 #if defined(PETSC_USE_COMPLEX)
124   #if defined(PETSC_USE_REAL_SINGLE)
125     #define MumpsScalar mumps_complex
126   #else
127     #define MumpsScalar mumps_double_complex
128   #endif
129 #else
130   #define MumpsScalar PetscScalar
131 #endif
132 
133 /* macros s.t. indices match MUMPS documentation */
134 #define ICNTL(I)  icntl[(I)-1]
135 #define CNTL(I)   cntl[(I)-1]
136 #define INFOG(I)  infog[(I)-1]
137 #define INFO(I)   info[(I)-1]
138 #define RINFOG(I) rinfog[(I)-1]
139 #define RINFO(I)  rinfo[(I)-1]
140 
141 typedef struct Mat_MUMPS Mat_MUMPS;
142 struct Mat_MUMPS {
143 #if defined(PETSC_USE_COMPLEX)
144   #if defined(PETSC_USE_REAL_SINGLE)
145   CMUMPS_STRUC_C id;
146   #else
147   ZMUMPS_STRUC_C id;
148   #endif
149 #else
150   #if defined(PETSC_USE_REAL_SINGLE)
151   SMUMPS_STRUC_C id;
152   #else
153   DMUMPS_STRUC_C id;
154   #endif
155 #endif
156 
157   MatStructure   matstruc;
158   PetscMPIInt    myid, petsc_size;
159   PetscMUMPSInt *irn, *jcn;       /* the (i,j,v) triplets passed to mumps. */
160   PetscScalar   *val, *val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */
161   PetscInt64     nnz;             /* number of nonzeros. The type is called selective 64-bit in mumps */
162   PetscMUMPSInt  sym;
163   MPI_Comm       mumps_comm;
164   PetscMUMPSInt *ICNTL_pre;
165   PetscReal     *CNTL_pre;
166   PetscMUMPSInt  ICNTL9_pre;         /* check if ICNTL(9) is changed from previous MatSolve */
167   VecScatter     scat_rhs, scat_sol; /* used by MatSolve() */
168   PetscMUMPSInt  ICNTL20;            /* use centralized (0) or distributed (10) dense RHS */
169   PetscMUMPSInt  lrhs_loc, nloc_rhs, *irhs_loc;
170 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
171   PetscInt    *rhs_nrow, max_nrhs;
172   PetscMPIInt *rhs_recvcounts, *rhs_disps;
173   PetscScalar *rhs_loc, *rhs_recvbuf;
174 #endif
175   Vec            b_seq, x_seq;
176   PetscInt       ninfo, *info; /* which INFO to display */
177   PetscInt       sizeredrhs;
178   PetscScalar   *schur_sol;
179   PetscInt       schur_sizesol;
180   PetscMUMPSInt *ia_alloc, *ja_alloc; /* work arrays used for the CSR struct for sparse rhs */
181   PetscInt64     cur_ilen, cur_jlen;  /* current len of ia_alloc[], ja_alloc[] */
182   PetscErrorCode (*ConvertToTriples)(Mat, PetscInt, MatReuse, Mat_MUMPS *);
183 
184   /* stuff used by petsc/mumps OpenMP support*/
185   PetscBool    use_petsc_omp_support;
186   PetscOmpCtrl omp_ctrl;             /* an OpenMP controler that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */
187   MPI_Comm     petsc_comm, omp_comm; /* petsc_comm is petsc matrix's comm */
188   PetscInt64  *recvcount;            /* a collection of nnz on omp_master */
189   PetscMPIInt  tag, omp_comm_size;
190   PetscBool    is_omp_master; /* is this rank the master of omp_comm */
191   MPI_Request *reqs;
192 };
193 
194 /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt.
195    Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices.
196  */
197 static PetscErrorCode PetscMUMPSIntCSRCast(Mat_MUMPS *mumps, PetscInt nrow, PetscInt *ia, PetscInt *ja, PetscMUMPSInt **ia_mumps, PetscMUMPSInt **ja_mumps, PetscMUMPSInt *nnz_mumps)
198 {
199   PetscInt nnz = ia[nrow] - 1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscInt64 since mumps only uses PetscMUMPSInt for rhs */
200 
201   PetscFunctionBegin;
202 #if defined(PETSC_USE_64BIT_INDICES)
203   {
204     PetscInt i;
205     if (nrow + 1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */
206       PetscCall(PetscFree(mumps->ia_alloc));
207       PetscCall(PetscMalloc1(nrow + 1, &mumps->ia_alloc));
208       mumps->cur_ilen = nrow + 1;
209     }
210     if (nnz > mumps->cur_jlen) {
211       PetscCall(PetscFree(mumps->ja_alloc));
212       PetscCall(PetscMalloc1(nnz, &mumps->ja_alloc));
213       mumps->cur_jlen = nnz;
214     }
215     for (i = 0; i < nrow + 1; i++) PetscCall(PetscMUMPSIntCast(ia[i], &(mumps->ia_alloc[i])));
216     for (i = 0; i < nnz; i++) PetscCall(PetscMUMPSIntCast(ja[i], &(mumps->ja_alloc[i])));
217     *ia_mumps = mumps->ia_alloc;
218     *ja_mumps = mumps->ja_alloc;
219   }
220 #else
221   *ia_mumps          = ia;
222   *ja_mumps          = ja;
223 #endif
224   PetscCall(PetscMUMPSIntCast(nnz, nnz_mumps));
225   PetscFunctionReturn(0);
226 }
227 
228 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS *mumps)
229 {
230   PetscFunctionBegin;
231   PetscCall(PetscFree(mumps->id.listvar_schur));
232   PetscCall(PetscFree(mumps->id.redrhs));
233   PetscCall(PetscFree(mumps->schur_sol));
234   mumps->id.size_schur = 0;
235   mumps->id.schur_lld  = 0;
236   mumps->id.ICNTL(19)  = 0;
237   PetscFunctionReturn(0);
238 }
239 
240 /* solve with rhs in mumps->id.redrhs and return in the same location */
241 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F)
242 {
243   Mat_MUMPS           *mumps = (Mat_MUMPS *)F->data;
244   Mat                  S, B, X;
245   MatFactorSchurStatus schurstatus;
246   PetscInt             sizesol;
247 
248   PetscFunctionBegin;
249   PetscCall(MatFactorFactorizeSchurComplement(F));
250   PetscCall(MatFactorGetSchurComplement(F, &S, &schurstatus));
251   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &B));
252   PetscCall(MatSetType(B, ((PetscObject)S)->type_name));
253 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
254   PetscCall(MatBindToCPU(B, S->boundtocpu));
255 #endif
256   switch (schurstatus) {
257   case MAT_FACTOR_SCHUR_FACTORED:
258     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &X));
259     PetscCall(MatSetType(X, ((PetscObject)S)->type_name));
260 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
261     PetscCall(MatBindToCPU(X, S->boundtocpu));
262 #endif
263     if (!mumps->id.ICNTL(9)) { /* transpose solve */
264       PetscCall(MatMatSolveTranspose(S, B, X));
265     } else {
266       PetscCall(MatMatSolve(S, B, X));
267     }
268     break;
269   case MAT_FACTOR_SCHUR_INVERTED:
270     sizesol = mumps->id.nrhs * mumps->id.size_schur;
271     if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) {
272       PetscCall(PetscFree(mumps->schur_sol));
273       PetscCall(PetscMalloc1(sizesol, &mumps->schur_sol));
274       mumps->schur_sizesol = sizesol;
275     }
276     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->schur_sol, &X));
277     PetscCall(MatSetType(X, ((PetscObject)S)->type_name));
278 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
279     PetscCall(MatBindToCPU(X, S->boundtocpu));
280 #endif
281     PetscCall(MatProductCreateWithMat(S, B, NULL, X));
282     if (!mumps->id.ICNTL(9)) { /* transpose solve */
283       PetscCall(MatProductSetType(X, MATPRODUCT_AtB));
284     } else {
285       PetscCall(MatProductSetType(X, MATPRODUCT_AB));
286     }
287     PetscCall(MatProductSetFromOptions(X));
288     PetscCall(MatProductSymbolic(X));
289     PetscCall(MatProductNumeric(X));
290 
291     PetscCall(MatCopy(X, B, SAME_NONZERO_PATTERN));
292     break;
293   default:
294     SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unhandled MatFactorSchurStatus %d", F->schur_status);
295   }
296   PetscCall(MatFactorRestoreSchurComplement(F, &S, schurstatus));
297   PetscCall(MatDestroy(&B));
298   PetscCall(MatDestroy(&X));
299   PetscFunctionReturn(0);
300 }
301 
302 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion)
303 {
304   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
305 
306   PetscFunctionBegin;
307   if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */
308     PetscFunctionReturn(0);
309   }
310   if (!expansion) { /* prepare for the condensation step */
311     PetscInt sizeredrhs = mumps->id.nrhs * mumps->id.size_schur;
312     /* allocate MUMPS internal array to store reduced right-hand sides */
313     if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) {
314       PetscCall(PetscFree(mumps->id.redrhs));
315       mumps->id.lredrhs = mumps->id.size_schur;
316       PetscCall(PetscMalloc1(mumps->id.nrhs * mumps->id.lredrhs, &mumps->id.redrhs));
317       mumps->sizeredrhs = mumps->id.nrhs * mumps->id.lredrhs;
318     }
319     mumps->id.ICNTL(26) = 1; /* condensation phase */
320   } else {                   /* prepare for the expansion step */
321     /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */
322     PetscCall(MatMumpsSolveSchur_Private(F));
323     mumps->id.ICNTL(26) = 2; /* expansion phase */
324     PetscMUMPS_c(mumps);
325     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1));
326     /* restore defaults */
327     mumps->id.ICNTL(26) = -1;
328     /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */
329     if (mumps->id.nrhs > 1) {
330       PetscCall(PetscFree(mumps->id.redrhs));
331       mumps->id.lredrhs = 0;
332       mumps->sizeredrhs = 0;
333     }
334   }
335   PetscFunctionReturn(0);
336 }
337 
338 /*
339   MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz]
340 
341   input:
342     A       - matrix in aij,baij or sbaij format
343     shift   - 0: C style output triple; 1: Fortran style output triple.
344     reuse   - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple
345               MAT_REUSE_MATRIX:   only the values in v array are updated
346   output:
347     nnz     - dim of r, c, and v (number of local nonzero entries of A)
348     r, c, v - row and col index, matrix values (matrix triples)
349 
350   The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is
351   freed with PetscFree(mumps->irn);  This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means
352   that the PetscMalloc() cannot easily be replaced with a PetscMalloc3().
353 
354  */
355 
356 PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
357 {
358   const PetscScalar *av;
359   const PetscInt    *ai, *aj, *ajj, M = A->rmap->n;
360   PetscInt64         nz, rnz, i, j, k;
361   PetscMUMPSInt     *row, *col;
362   Mat_SeqAIJ        *aa = (Mat_SeqAIJ *)A->data;
363 
364   PetscFunctionBegin;
365   PetscCall(MatSeqAIJGetArrayRead(A, &av));
366   mumps->val = (PetscScalar *)av;
367   if (reuse == MAT_INITIAL_MATRIX) {
368     nz = aa->nz;
369     ai = aa->i;
370     aj = aa->j;
371     PetscCall(PetscMalloc2(nz, &row, nz, &col));
372     for (i = k = 0; i < M; i++) {
373       rnz = ai[i + 1] - ai[i];
374       ajj = aj + ai[i];
375       for (j = 0; j < rnz; j++) {
376         PetscCall(PetscMUMPSIntCast(i + shift, &row[k]));
377         PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[k]));
378         k++;
379       }
380     }
381     mumps->irn = row;
382     mumps->jcn = col;
383     mumps->nnz = nz;
384   }
385   PetscCall(MatSeqAIJRestoreArrayRead(A, &av));
386   PetscFunctionReturn(0);
387 }
388 
389 PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
390 {
391   PetscInt64     nz, i, j, k, r;
392   Mat_SeqSELL   *a = (Mat_SeqSELL *)A->data;
393   PetscMUMPSInt *row, *col;
394 
395   PetscFunctionBegin;
396   mumps->val = a->val;
397   if (reuse == MAT_INITIAL_MATRIX) {
398     nz = a->sliidx[a->totalslices];
399     PetscCall(PetscMalloc2(nz, &row, nz, &col));
400     for (i = k = 0; i < a->totalslices; i++) {
401       for (j = a->sliidx[i], r = 0; j < a->sliidx[i + 1]; j++, r = ((r + 1) & 0x07)) PetscCall(PetscMUMPSIntCast(8 * i + r + shift, &row[k++]));
402     }
403     for (i = 0; i < nz; i++) PetscCall(PetscMUMPSIntCast(a->colidx[i] + shift, &col[i]));
404     mumps->irn = row;
405     mumps->jcn = col;
406     mumps->nnz = nz;
407   }
408   PetscFunctionReturn(0);
409 }
410 
411 PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
412 {
413   Mat_SeqBAIJ    *aa = (Mat_SeqBAIJ *)A->data;
414   const PetscInt *ai, *aj, *ajj, bs2 = aa->bs2;
415   PetscInt64      M, nz, idx = 0, rnz, i, j, k, m;
416   PetscInt        bs;
417   PetscMUMPSInt  *row, *col;
418 
419   PetscFunctionBegin;
420   PetscCall(MatGetBlockSize(A, &bs));
421   M          = A->rmap->N / bs;
422   mumps->val = aa->a;
423   if (reuse == MAT_INITIAL_MATRIX) {
424     ai = aa->i;
425     aj = aa->j;
426     nz = bs2 * aa->nz;
427     PetscCall(PetscMalloc2(nz, &row, nz, &col));
428     for (i = 0; i < M; i++) {
429       ajj = aj + ai[i];
430       rnz = ai[i + 1] - ai[i];
431       for (k = 0; k < rnz; k++) {
432         for (j = 0; j < bs; j++) {
433           for (m = 0; m < bs; m++) {
434             PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[idx]));
435             PetscCall(PetscMUMPSIntCast(bs * ajj[k] + j + shift, &col[idx]));
436             idx++;
437           }
438         }
439       }
440     }
441     mumps->irn = row;
442     mumps->jcn = col;
443     mumps->nnz = nz;
444   }
445   PetscFunctionReturn(0);
446 }
447 
448 PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
449 {
450   const PetscInt *ai, *aj, *ajj;
451   PetscInt        bs;
452   PetscInt64      nz, rnz, i, j, k, m;
453   PetscMUMPSInt  *row, *col;
454   PetscScalar    *val;
455   Mat_SeqSBAIJ   *aa  = (Mat_SeqSBAIJ *)A->data;
456   const PetscInt  bs2 = aa->bs2, mbs = aa->mbs;
457 #if defined(PETSC_USE_COMPLEX)
458   PetscBool isset, hermitian;
459 #endif
460 
461   PetscFunctionBegin;
462 #if defined(PETSC_USE_COMPLEX)
463   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
464   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
465 #endif
466   ai = aa->i;
467   aj = aa->j;
468   PetscCall(MatGetBlockSize(A, &bs));
469   if (reuse == MAT_INITIAL_MATRIX) {
470     const PetscInt64 alloc_size = aa->nz * bs2;
471 
472     PetscCall(PetscMalloc2(alloc_size, &row, alloc_size, &col));
473     if (bs > 1) {
474       PetscCall(PetscMalloc1(alloc_size, &mumps->val_alloc));
475       mumps->val = mumps->val_alloc;
476     } else {
477       mumps->val = aa->a;
478     }
479     mumps->irn = row;
480     mumps->jcn = col;
481   } else {
482     if (bs == 1) mumps->val = aa->a;
483     row = mumps->irn;
484     col = mumps->jcn;
485   }
486   val = mumps->val;
487 
488   nz = 0;
489   if (bs > 1) {
490     for (i = 0; i < mbs; i++) {
491       rnz = ai[i + 1] - ai[i];
492       ajj = aj + ai[i];
493       for (j = 0; j < rnz; j++) {
494         for (k = 0; k < bs; k++) {
495           for (m = 0; m < bs; m++) {
496             if (ajj[j] > i || k >= m) {
497               if (reuse == MAT_INITIAL_MATRIX) {
498                 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[nz]));
499                 PetscCall(PetscMUMPSIntCast(ajj[j] * bs + k + shift, &col[nz]));
500               }
501               val[nz++] = aa->a[(ai[i] + j) * bs2 + m + k * bs];
502             }
503           }
504         }
505       }
506     }
507   } else if (reuse == MAT_INITIAL_MATRIX) {
508     for (i = 0; i < mbs; i++) {
509       rnz = ai[i + 1] - ai[i];
510       ajj = aj + ai[i];
511       for (j = 0; j < rnz; j++) {
512         PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
513         PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz]));
514         nz++;
515       }
516     }
517     PetscCheck(nz == aa->nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different numbers of nonzeros %" PetscInt64_FMT " != %" PetscInt_FMT, nz, aa->nz);
518   }
519   if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz;
520   PetscFunctionReturn(0);
521 }
522 
523 PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
524 {
525   const PetscInt    *ai, *aj, *ajj, *adiag, M = A->rmap->n;
526   PetscInt64         nz, rnz, i, j;
527   const PetscScalar *av, *v1;
528   PetscScalar       *val;
529   PetscMUMPSInt     *row, *col;
530   Mat_SeqAIJ        *aa = (Mat_SeqAIJ *)A->data;
531   PetscBool          missing;
532 #if defined(PETSC_USE_COMPLEX)
533   PetscBool hermitian, isset;
534 #endif
535 
536   PetscFunctionBegin;
537 #if defined(PETSC_USE_COMPLEX)
538   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
539   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
540 #endif
541   PetscCall(MatSeqAIJGetArrayRead(A, &av));
542   ai    = aa->i;
543   aj    = aa->j;
544   adiag = aa->diag;
545   PetscCall(MatMissingDiagonal_SeqAIJ(A, &missing, NULL));
546   if (reuse == MAT_INITIAL_MATRIX) {
547     /* count nz in the upper triangular part of A */
548     nz = 0;
549     if (missing) {
550       for (i = 0; i < M; i++) {
551         if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
552           for (j = ai[i]; j < ai[i + 1]; j++) {
553             if (aj[j] < i) continue;
554             nz++;
555           }
556         } else {
557           nz += ai[i + 1] - adiag[i];
558         }
559       }
560     } else {
561       for (i = 0; i < M; i++) nz += ai[i + 1] - adiag[i];
562     }
563     PetscCall(PetscMalloc2(nz, &row, nz, &col));
564     PetscCall(PetscMalloc1(nz, &val));
565     mumps->nnz = nz;
566     mumps->irn = row;
567     mumps->jcn = col;
568     mumps->val = mumps->val_alloc = val;
569 
570     nz = 0;
571     if (missing) {
572       for (i = 0; i < M; i++) {
573         if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
574           for (j = ai[i]; j < ai[i + 1]; j++) {
575             if (aj[j] < i) continue;
576             PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
577             PetscCall(PetscMUMPSIntCast(aj[j] + shift, &col[nz]));
578             val[nz] = av[j];
579             nz++;
580           }
581         } else {
582           rnz = ai[i + 1] - adiag[i];
583           ajj = aj + adiag[i];
584           v1  = av + adiag[i];
585           for (j = 0; j < rnz; j++) {
586             PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
587             PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz]));
588             val[nz++] = v1[j];
589           }
590         }
591       }
592     } else {
593       for (i = 0; i < M; i++) {
594         rnz = ai[i + 1] - adiag[i];
595         ajj = aj + adiag[i];
596         v1  = av + adiag[i];
597         for (j = 0; j < rnz; j++) {
598           PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
599           PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz]));
600           val[nz++] = v1[j];
601         }
602       }
603     }
604   } else {
605     nz  = 0;
606     val = mumps->val;
607     if (missing) {
608       for (i = 0; i < M; i++) {
609         if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
610           for (j = ai[i]; j < ai[i + 1]; j++) {
611             if (aj[j] < i) continue;
612             val[nz++] = av[j];
613           }
614         } else {
615           rnz = ai[i + 1] - adiag[i];
616           v1  = av + adiag[i];
617           for (j = 0; j < rnz; j++) val[nz++] = v1[j];
618         }
619       }
620     } else {
621       for (i = 0; i < M; i++) {
622         rnz = ai[i + 1] - adiag[i];
623         v1  = av + adiag[i];
624         for (j = 0; j < rnz; j++) val[nz++] = v1[j];
625       }
626     }
627   }
628   PetscCall(MatSeqAIJRestoreArrayRead(A, &av));
629   PetscFunctionReturn(0);
630 }
631 
632 PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
633 {
634   const PetscInt    *ai, *aj, *bi, *bj, *garray, *ajj, *bjj;
635   PetscInt           bs;
636   PetscInt64         rstart, nz, i, j, k, m, jj, irow, countA, countB;
637   PetscMUMPSInt     *row, *col;
638   const PetscScalar *av, *bv, *v1, *v2;
639   PetscScalar       *val;
640   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ *)A->data;
641   Mat_SeqSBAIJ      *aa  = (Mat_SeqSBAIJ *)(mat->A)->data;
642   Mat_SeqBAIJ       *bb  = (Mat_SeqBAIJ *)(mat->B)->data;
643   const PetscInt     bs2 = aa->bs2, mbs = aa->mbs;
644 #if defined(PETSC_USE_COMPLEX)
645   PetscBool hermitian, isset;
646 #endif
647 
648   PetscFunctionBegin;
649 #if defined(PETSC_USE_COMPLEX)
650   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
651   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
652 #endif
653   PetscCall(MatGetBlockSize(A, &bs));
654   rstart = A->rmap->rstart;
655   ai     = aa->i;
656   aj     = aa->j;
657   bi     = bb->i;
658   bj     = bb->j;
659   av     = aa->a;
660   bv     = bb->a;
661 
662   garray = mat->garray;
663 
664   if (reuse == MAT_INITIAL_MATRIX) {
665     nz = (aa->nz + bb->nz) * bs2; /* just a conservative estimate */
666     PetscCall(PetscMalloc2(nz, &row, nz, &col));
667     PetscCall(PetscMalloc1(nz, &val));
668     /* can not decide the exact mumps->nnz now because of the SBAIJ */
669     mumps->irn = row;
670     mumps->jcn = col;
671     mumps->val = mumps->val_alloc = val;
672   } else {
673     val = mumps->val;
674   }
675 
676   jj   = 0;
677   irow = rstart;
678   for (i = 0; i < mbs; i++) {
679     ajj    = aj + ai[i]; /* ptr to the beginning of this row */
680     countA = ai[i + 1] - ai[i];
681     countB = bi[i + 1] - bi[i];
682     bjj    = bj + bi[i];
683     v1     = av + ai[i] * bs2;
684     v2     = bv + bi[i] * bs2;
685 
686     if (bs > 1) {
687       /* A-part */
688       for (j = 0; j < countA; j++) {
689         for (k = 0; k < bs; k++) {
690           for (m = 0; m < bs; m++) {
691             if (rstart + ajj[j] * bs > irow || k >= m) {
692               if (reuse == MAT_INITIAL_MATRIX) {
693                 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj]));
694                 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] * bs + k + shift, &col[jj]));
695               }
696               val[jj++] = v1[j * bs2 + m + k * bs];
697             }
698           }
699         }
700       }
701 
702       /* B-part */
703       for (j = 0; j < countB; j++) {
704         for (k = 0; k < bs; k++) {
705           for (m = 0; m < bs; m++) {
706             if (reuse == MAT_INITIAL_MATRIX) {
707               PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj]));
708               PetscCall(PetscMUMPSIntCast(garray[bjj[j]] * bs + k + shift, &col[jj]));
709             }
710             val[jj++] = v2[j * bs2 + m + k * bs];
711           }
712         }
713       }
714     } else {
715       /* A-part */
716       for (j = 0; j < countA; j++) {
717         if (reuse == MAT_INITIAL_MATRIX) {
718           PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
719           PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj]));
720         }
721         val[jj++] = v1[j];
722       }
723 
724       /* B-part */
725       for (j = 0; j < countB; j++) {
726         if (reuse == MAT_INITIAL_MATRIX) {
727           PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
728           PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]));
729         }
730         val[jj++] = v2[j];
731       }
732     }
733     irow += bs;
734   }
735   mumps->nnz = jj;
736   PetscFunctionReturn(0);
737 }
738 
739 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
740 {
741   const PetscInt    *ai, *aj, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj;
742   PetscInt64         rstart, nz, i, j, jj, irow, countA, countB;
743   PetscMUMPSInt     *row, *col;
744   const PetscScalar *av, *bv, *v1, *v2;
745   PetscScalar       *val;
746   Mat                Ad, Ao;
747   Mat_SeqAIJ        *aa;
748   Mat_SeqAIJ        *bb;
749 
750   PetscFunctionBegin;
751   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray));
752   PetscCall(MatSeqAIJGetArrayRead(Ad, &av));
753   PetscCall(MatSeqAIJGetArrayRead(Ao, &bv));
754 
755   aa = (Mat_SeqAIJ *)(Ad)->data;
756   bb = (Mat_SeqAIJ *)(Ao)->data;
757   ai = aa->i;
758   aj = aa->j;
759   bi = bb->i;
760   bj = bb->j;
761 
762   rstart = A->rmap->rstart;
763 
764   if (reuse == MAT_INITIAL_MATRIX) {
765     nz = (PetscInt64)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */
766     PetscCall(PetscMalloc2(nz, &row, nz, &col));
767     PetscCall(PetscMalloc1(nz, &val));
768     mumps->nnz = nz;
769     mumps->irn = row;
770     mumps->jcn = col;
771     mumps->val = mumps->val_alloc = val;
772   } else {
773     val = mumps->val;
774   }
775 
776   jj   = 0;
777   irow = rstart;
778   for (i = 0; i < m; i++) {
779     ajj    = aj + ai[i]; /* ptr to the beginning of this row */
780     countA = ai[i + 1] - ai[i];
781     countB = bi[i + 1] - bi[i];
782     bjj    = bj + bi[i];
783     v1     = av + ai[i];
784     v2     = bv + bi[i];
785 
786     /* A-part */
787     for (j = 0; j < countA; j++) {
788       if (reuse == MAT_INITIAL_MATRIX) {
789         PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
790         PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj]));
791       }
792       val[jj++] = v1[j];
793     }
794 
795     /* B-part */
796     for (j = 0; j < countB; j++) {
797       if (reuse == MAT_INITIAL_MATRIX) {
798         PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
799         PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]));
800       }
801       val[jj++] = v2[j];
802     }
803     irow++;
804   }
805   PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av));
806   PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv));
807   PetscFunctionReturn(0);
808 }
809 
810 PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
811 {
812   Mat_MPIBAIJ       *mat = (Mat_MPIBAIJ *)A->data;
813   Mat_SeqBAIJ       *aa  = (Mat_SeqBAIJ *)(mat->A)->data;
814   Mat_SeqBAIJ       *bb  = (Mat_SeqBAIJ *)(mat->B)->data;
815   const PetscInt    *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j, *ajj, *bjj;
816   const PetscInt    *garray = mat->garray, mbs = mat->mbs, rstart = A->rmap->rstart;
817   const PetscInt     bs2 = mat->bs2;
818   PetscInt           bs;
819   PetscInt64         nz, i, j, k, n, jj, irow, countA, countB, idx;
820   PetscMUMPSInt     *row, *col;
821   const PetscScalar *av = aa->a, *bv = bb->a, *v1, *v2;
822   PetscScalar       *val;
823 
824   PetscFunctionBegin;
825   PetscCall(MatGetBlockSize(A, &bs));
826   if (reuse == MAT_INITIAL_MATRIX) {
827     nz = bs2 * (aa->nz + bb->nz);
828     PetscCall(PetscMalloc2(nz, &row, nz, &col));
829     PetscCall(PetscMalloc1(nz, &val));
830     mumps->nnz = nz;
831     mumps->irn = row;
832     mumps->jcn = col;
833     mumps->val = mumps->val_alloc = val;
834   } else {
835     val = mumps->val;
836   }
837 
838   jj   = 0;
839   irow = rstart;
840   for (i = 0; i < mbs; i++) {
841     countA = ai[i + 1] - ai[i];
842     countB = bi[i + 1] - bi[i];
843     ajj    = aj + ai[i];
844     bjj    = bj + bi[i];
845     v1     = av + bs2 * ai[i];
846     v2     = bv + bs2 * bi[i];
847 
848     idx = 0;
849     /* A-part */
850     for (k = 0; k < countA; k++) {
851       for (j = 0; j < bs; j++) {
852         for (n = 0; n < bs; n++) {
853           if (reuse == MAT_INITIAL_MATRIX) {
854             PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj]));
855             PetscCall(PetscMUMPSIntCast(rstart + bs * ajj[k] + j + shift, &col[jj]));
856           }
857           val[jj++] = v1[idx++];
858         }
859       }
860     }
861 
862     idx = 0;
863     /* B-part */
864     for (k = 0; k < countB; k++) {
865       for (j = 0; j < bs; j++) {
866         for (n = 0; n < bs; n++) {
867           if (reuse == MAT_INITIAL_MATRIX) {
868             PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj]));
869             PetscCall(PetscMUMPSIntCast(bs * garray[bjj[k]] + j + shift, &col[jj]));
870           }
871           val[jj++] = v2[idx++];
872         }
873       }
874     }
875     irow += bs;
876   }
877   PetscFunctionReturn(0);
878 }
879 
880 PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
881 {
882   const PetscInt    *ai, *aj, *adiag, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj;
883   PetscInt64         rstart, nz, nza, nzb, i, j, jj, irow, countA, countB;
884   PetscMUMPSInt     *row, *col;
885   const PetscScalar *av, *bv, *v1, *v2;
886   PetscScalar       *val;
887   Mat                Ad, Ao;
888   Mat_SeqAIJ        *aa;
889   Mat_SeqAIJ        *bb;
890 #if defined(PETSC_USE_COMPLEX)
891   PetscBool hermitian, isset;
892 #endif
893 
894   PetscFunctionBegin;
895 #if defined(PETSC_USE_COMPLEX)
896   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
897   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
898 #endif
899   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray));
900   PetscCall(MatSeqAIJGetArrayRead(Ad, &av));
901   PetscCall(MatSeqAIJGetArrayRead(Ao, &bv));
902 
903   aa    = (Mat_SeqAIJ *)(Ad)->data;
904   bb    = (Mat_SeqAIJ *)(Ao)->data;
905   ai    = aa->i;
906   aj    = aa->j;
907   adiag = aa->diag;
908   bi    = bb->i;
909   bj    = bb->j;
910 
911   rstart = A->rmap->rstart;
912 
913   if (reuse == MAT_INITIAL_MATRIX) {
914     nza = 0; /* num of upper triangular entries in mat->A, including diagonals */
915     nzb = 0; /* num of upper triangular entries in mat->B */
916     for (i = 0; i < m; i++) {
917       nza += (ai[i + 1] - adiag[i]);
918       countB = bi[i + 1] - bi[i];
919       bjj    = bj + bi[i];
920       for (j = 0; j < countB; j++) {
921         if (garray[bjj[j]] > rstart) nzb++;
922       }
923     }
924 
925     nz = nza + nzb; /* total nz of upper triangular part of mat */
926     PetscCall(PetscMalloc2(nz, &row, nz, &col));
927     PetscCall(PetscMalloc1(nz, &val));
928     mumps->nnz = nz;
929     mumps->irn = row;
930     mumps->jcn = col;
931     mumps->val = mumps->val_alloc = val;
932   } else {
933     val = mumps->val;
934   }
935 
936   jj   = 0;
937   irow = rstart;
938   for (i = 0; i < m; i++) {
939     ajj    = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */
940     v1     = av + adiag[i];
941     countA = ai[i + 1] - adiag[i];
942     countB = bi[i + 1] - bi[i];
943     bjj    = bj + bi[i];
944     v2     = bv + bi[i];
945 
946     /* A-part */
947     for (j = 0; j < countA; j++) {
948       if (reuse == MAT_INITIAL_MATRIX) {
949         PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
950         PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj]));
951       }
952       val[jj++] = v1[j];
953     }
954 
955     /* B-part */
956     for (j = 0; j < countB; j++) {
957       if (garray[bjj[j]] > rstart) {
958         if (reuse == MAT_INITIAL_MATRIX) {
959           PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
960           PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]));
961         }
962         val[jj++] = v2[j];
963       }
964     }
965     irow++;
966   }
967   PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av));
968   PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv));
969   PetscFunctionReturn(0);
970 }
971 
972 PetscErrorCode MatDestroy_MUMPS(Mat A)
973 {
974   Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;
975 
976   PetscFunctionBegin;
977   PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc));
978   PetscCall(VecScatterDestroy(&mumps->scat_rhs));
979   PetscCall(VecScatterDestroy(&mumps->scat_sol));
980   PetscCall(VecDestroy(&mumps->b_seq));
981   PetscCall(VecDestroy(&mumps->x_seq));
982   PetscCall(PetscFree(mumps->id.perm_in));
983   PetscCall(PetscFree2(mumps->irn, mumps->jcn));
984   PetscCall(PetscFree(mumps->val_alloc));
985   PetscCall(PetscFree(mumps->info));
986   PetscCall(PetscFree(mumps->ICNTL_pre));
987   PetscCall(PetscFree(mumps->CNTL_pre));
988   PetscCall(MatMumpsResetSchur_Private(mumps));
989   if (mumps->id.job != JOB_NULL) { /* cannot call PetscMUMPS_c() if JOB_INIT has never been called for this instance */
990     mumps->id.job = JOB_END;
991     PetscMUMPS_c(mumps);
992     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in MatDestroy_MUMPS: INFOG(1)=%d", mumps->id.INFOG(1));
993     if (mumps->mumps_comm != MPI_COMM_NULL) {
994       if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) PetscCallMPI(MPI_Comm_free(&mumps->mumps_comm));
995       else PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A), &mumps->mumps_comm));
996     }
997   }
998 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
999   if (mumps->use_petsc_omp_support) {
1000     PetscCall(PetscOmpCtrlDestroy(&mumps->omp_ctrl));
1001     PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf));
1002     PetscCall(PetscFree3(mumps->rhs_nrow, mumps->rhs_recvcounts, mumps->rhs_disps));
1003   }
1004 #endif
1005   PetscCall(PetscFree(mumps->ia_alloc));
1006   PetscCall(PetscFree(mumps->ja_alloc));
1007   PetscCall(PetscFree(mumps->recvcount));
1008   PetscCall(PetscFree(mumps->reqs));
1009   PetscCall(PetscFree(mumps->irhs_loc));
1010   PetscCall(PetscFree(A->data));
1011 
1012   /* clear composed functions */
1013   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
1014   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorSetSchurIS_C", NULL));
1015   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorCreateSchurComplement_C", NULL));
1016   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetIcntl_C", NULL));
1017   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetIcntl_C", NULL));
1018   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetCntl_C", NULL));
1019   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetCntl_C", NULL));
1020   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfo_C", NULL));
1021   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfog_C", NULL));
1022   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfo_C", NULL));
1023   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfog_C", NULL));
1024   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverse_C", NULL));
1025   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverseTranspose_C", NULL));
1026   PetscFunctionReturn(0);
1027 }
1028 
1029 /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */
1030 static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A, PetscInt nrhs, const PetscScalar *array)
1031 {
1032   Mat_MUMPS        *mumps   = (Mat_MUMPS *)A->data;
1033   const PetscMPIInt ompsize = mumps->omp_comm_size;
1034   PetscInt          i, m, M, rstart;
1035 
1036   PetscFunctionBegin;
1037   PetscCall(MatGetSize(A, &M, NULL));
1038   PetscCall(MatGetLocalSize(A, &m, NULL));
1039   PetscCheck(M <= PETSC_MUMPS_INT_MAX, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt");
1040   if (ompsize == 1) {
1041     if (!mumps->irhs_loc) {
1042       mumps->nloc_rhs = m;
1043       PetscCall(PetscMalloc1(m, &mumps->irhs_loc));
1044       PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
1045       for (i = 0; i < m; i++) mumps->irhs_loc[i] = rstart + i + 1; /* use 1-based indices */
1046     }
1047     mumps->id.rhs_loc = (MumpsScalar *)array;
1048   } else {
1049 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1050     const PetscInt *ranges;
1051     PetscMPIInt     j, k, sendcount, *petsc_ranks, *omp_ranks;
1052     MPI_Group       petsc_group, omp_group;
1053     PetscScalar    *recvbuf = NULL;
1054 
1055     if (mumps->is_omp_master) {
1056       /* Lazily initialize the omp stuff for distributed rhs */
1057       if (!mumps->irhs_loc) {
1058         PetscCall(PetscMalloc2(ompsize, &omp_ranks, ompsize, &petsc_ranks));
1059         PetscCall(PetscMalloc3(ompsize, &mumps->rhs_nrow, ompsize, &mumps->rhs_recvcounts, ompsize, &mumps->rhs_disps));
1060         PetscCallMPI(MPI_Comm_group(mumps->petsc_comm, &petsc_group));
1061         PetscCallMPI(MPI_Comm_group(mumps->omp_comm, &omp_group));
1062         for (j = 0; j < ompsize; j++) omp_ranks[j] = j;
1063         PetscCallMPI(MPI_Group_translate_ranks(omp_group, ompsize, omp_ranks, petsc_group, petsc_ranks));
1064 
1065         /* Populate mumps->irhs_loc[], rhs_nrow[] */
1066         mumps->nloc_rhs = 0;
1067         PetscCall(MatGetOwnershipRanges(A, &ranges));
1068         for (j = 0; j < ompsize; j++) {
1069           mumps->rhs_nrow[j] = ranges[petsc_ranks[j] + 1] - ranges[petsc_ranks[j]];
1070           mumps->nloc_rhs += mumps->rhs_nrow[j];
1071         }
1072         PetscCall(PetscMalloc1(mumps->nloc_rhs, &mumps->irhs_loc));
1073         for (j = k = 0; j < ompsize; j++) {
1074           for (i = ranges[petsc_ranks[j]]; i < ranges[petsc_ranks[j] + 1]; i++, k++) mumps->irhs_loc[k] = i + 1; /* uses 1-based indices */
1075         }
1076 
1077         PetscCall(PetscFree2(omp_ranks, petsc_ranks));
1078         PetscCallMPI(MPI_Group_free(&petsc_group));
1079         PetscCallMPI(MPI_Group_free(&omp_group));
1080       }
1081 
1082       /* Realloc buffers when current nrhs is bigger than what we have met */
1083       if (nrhs > mumps->max_nrhs) {
1084         PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf));
1085         PetscCall(PetscMalloc2(mumps->nloc_rhs * nrhs, &mumps->rhs_loc, mumps->nloc_rhs * nrhs, &mumps->rhs_recvbuf));
1086         mumps->max_nrhs = nrhs;
1087       }
1088 
1089       /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */
1090       for (j = 0; j < ompsize; j++) PetscCall(PetscMPIIntCast(mumps->rhs_nrow[j] * nrhs, &mumps->rhs_recvcounts[j]));
1091       mumps->rhs_disps[0] = 0;
1092       for (j = 1; j < ompsize; j++) {
1093         mumps->rhs_disps[j] = mumps->rhs_disps[j - 1] + mumps->rhs_recvcounts[j - 1];
1094         PetscCheck(mumps->rhs_disps[j] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscMPIInt overflow!");
1095       }
1096       recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */
1097     }
1098 
1099     PetscCall(PetscMPIIntCast(m * nrhs, &sendcount));
1100     PetscCallMPI(MPI_Gatherv(array, sendcount, MPIU_SCALAR, recvbuf, mumps->rhs_recvcounts, mumps->rhs_disps, MPIU_SCALAR, 0, mumps->omp_comm));
1101 
1102     if (mumps->is_omp_master) {
1103       if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */
1104         PetscScalar *dst, *dstbase = mumps->rhs_loc;
1105         for (j = 0; j < ompsize; j++) {
1106           const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j];
1107           dst                    = dstbase;
1108           for (i = 0; i < nrhs; i++) {
1109             PetscCall(PetscArraycpy(dst, src, mumps->rhs_nrow[j]));
1110             src += mumps->rhs_nrow[j];
1111             dst += mumps->nloc_rhs;
1112           }
1113           dstbase += mumps->rhs_nrow[j];
1114         }
1115       }
1116       mumps->id.rhs_loc = (MumpsScalar *)mumps->rhs_loc;
1117     }
1118 #endif /* PETSC_HAVE_OPENMP_SUPPORT */
1119   }
1120   mumps->id.nrhs     = nrhs;
1121   mumps->id.nloc_rhs = mumps->nloc_rhs;
1122   mumps->id.lrhs_loc = mumps->nloc_rhs;
1123   mumps->id.irhs_loc = mumps->irhs_loc;
1124   PetscFunctionReturn(0);
1125 }
1126 
1127 PetscErrorCode MatSolve_MUMPS(Mat A, Vec b, Vec x)
1128 {
1129   Mat_MUMPS         *mumps  = (Mat_MUMPS *)A->data;
1130   const PetscScalar *rarray = NULL;
1131   PetscScalar       *array;
1132   IS                 is_iden, is_petsc;
1133   PetscInt           i;
1134   PetscBool          second_solve = PETSC_FALSE;
1135   static PetscBool   cite1 = PETSC_FALSE, cite2 = PETSC_FALSE;
1136 
1137   PetscFunctionBegin;
1138   PetscCall(PetscCitationsRegister("@article{MUMPS01,\n  author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n  title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n  journal = {SIAM "
1139                                    "Journal on Matrix Analysis and Applications},\n  volume = {23},\n  number = {1},\n  pages = {15--41},\n  year = {2001}\n}\n",
1140                                    &cite1));
1141   PetscCall(PetscCitationsRegister("@article{MUMPS02,\n  author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n  title = {Hybrid scheduling for the parallel solution of linear systems},\n  journal = {Parallel "
1142                                    "Computing},\n  volume = {32},\n  number = {2},\n  pages = {136--156},\n  year = {2006}\n}\n",
1143                                    &cite2));
1144 
1145   if (A->factorerrortype) {
1146     PetscCall(PetscInfo(A, "MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1147     PetscCall(VecSetInf(x));
1148     PetscFunctionReturn(0);
1149   }
1150 
1151   mumps->id.nrhs = 1;
1152   if (mumps->petsc_size > 1) {
1153     if (mumps->ICNTL20 == 10) {
1154       mumps->id.ICNTL(20) = 10; /* dense distributed RHS */
1155       PetscCall(VecGetArrayRead(b, &rarray));
1156       PetscCall(MatMumpsSetUpDistRHSInfo(A, 1, rarray));
1157     } else {
1158       mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential rhs vector*/
1159       PetscCall(VecScatterBegin(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD));
1160       PetscCall(VecScatterEnd(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD));
1161       if (!mumps->myid) {
1162         PetscCall(VecGetArray(mumps->b_seq, &array));
1163         mumps->id.rhs = (MumpsScalar *)array;
1164       }
1165     }
1166   } else {                   /* petsc_size == 1 */
1167     mumps->id.ICNTL(20) = 0; /* dense centralized RHS */
1168     PetscCall(VecCopy(b, x));
1169     PetscCall(VecGetArray(x, &array));
1170     mumps->id.rhs = (MumpsScalar *)array;
1171   }
1172 
1173   /*
1174      handle condensation step of Schur complement (if any)
1175      We set by default ICNTL(26) == -1 when Schur indices have been provided by the user.
1176      According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase
1177      Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system.
1178      This requires an extra call to PetscMUMPS_c and the computation of the factors for S
1179   */
1180   if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) {
1181     PetscCheck(mumps->petsc_size <= 1, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc");
1182     second_solve = PETSC_TRUE;
1183     PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE));
1184   }
1185   /* solve phase */
1186   /*-------------*/
1187   mumps->id.job = JOB_SOLVE;
1188   PetscMUMPS_c(mumps);
1189   PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1));
1190 
1191   /* handle expansion step of Schur complement (if any) */
1192   if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE));
1193 
1194   if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */
1195     if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) {
1196       /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */
1197       PetscCall(VecScatterDestroy(&mumps->scat_sol));
1198     }
1199     if (!mumps->scat_sol) { /* create scatter scat_sol */
1200       PetscInt *isol2_loc = NULL;
1201       PetscCall(ISCreateStride(PETSC_COMM_SELF, mumps->id.lsol_loc, 0, 1, &is_iden)); /* from */
1202       PetscCall(PetscMalloc1(mumps->id.lsol_loc, &isol2_loc));
1203       for (i = 0; i < mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i] - 1;                        /* change Fortran style to C style */
1204       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, mumps->id.lsol_loc, isol2_loc, PETSC_OWN_POINTER, &is_petsc)); /* to */
1205       PetscCall(VecScatterCreate(mumps->x_seq, is_iden, x, is_petsc, &mumps->scat_sol));
1206       PetscCall(ISDestroy(&is_iden));
1207       PetscCall(ISDestroy(&is_petsc));
1208       mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */
1209     }
1210 
1211     PetscCall(VecScatterBegin(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD));
1212     PetscCall(VecScatterEnd(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD));
1213   }
1214 
1215   if (mumps->petsc_size > 1) {
1216     if (mumps->ICNTL20 == 10) {
1217       PetscCall(VecRestoreArrayRead(b, &rarray));
1218     } else if (!mumps->myid) {
1219       PetscCall(VecRestoreArray(mumps->b_seq, &array));
1220     }
1221   } else PetscCall(VecRestoreArray(x, &array));
1222 
1223   PetscCall(PetscLogFlops(2.0 * mumps->id.RINFO(3)));
1224   PetscFunctionReturn(0);
1225 }
1226 
1227 PetscErrorCode MatSolveTranspose_MUMPS(Mat A, Vec b, Vec x)
1228 {
1229   Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;
1230 
1231   PetscFunctionBegin;
1232   mumps->id.ICNTL(9) = 0;
1233   PetscCall(MatSolve_MUMPS(A, b, x));
1234   mumps->id.ICNTL(9) = 1;
1235   PetscFunctionReturn(0);
1236 }
1237 
1238 PetscErrorCode MatMatSolve_MUMPS(Mat A, Mat B, Mat X)
1239 {
1240   Mat                Bt = NULL;
1241   PetscBool          denseX, denseB, flg, flgT;
1242   Mat_MUMPS         *mumps = (Mat_MUMPS *)A->data;
1243   PetscInt           i, nrhs, M;
1244   PetscScalar       *array;
1245   const PetscScalar *rbray;
1246   PetscInt           lsol_loc, nlsol_loc, *idxx, iidx = 0;
1247   PetscMUMPSInt     *isol_loc, *isol_loc_save;
1248   PetscScalar       *bray, *sol_loc, *sol_loc_save;
1249   IS                 is_to, is_from;
1250   PetscInt           k, proc, j, m, myrstart;
1251   const PetscInt    *rstart;
1252   Vec                v_mpi, msol_loc;
1253   VecScatter         scat_sol;
1254   Vec                b_seq;
1255   VecScatter         scat_rhs;
1256   PetscScalar       *aa;
1257   PetscInt           spnr, *ia, *ja;
1258   Mat_MPIAIJ        *b = NULL;
1259 
1260   PetscFunctionBegin;
1261   PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &denseX, MATSEQDENSE, MATMPIDENSE, NULL));
1262   PetscCheck(denseX, PetscObjectComm((PetscObject)X), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix");
1263 
1264   PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &denseB, MATSEQDENSE, MATMPIDENSE, NULL));
1265   if (denseB) {
1266     PetscCheck(B->rmap->n == X->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Matrix B and X must have same row distribution");
1267     mumps->id.ICNTL(20) = 0; /* dense RHS */
1268   } else {                   /* sparse B */
1269     PetscCheck(X != B, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_IDN, "X and B must be different matrices");
1270     PetscCall(PetscObjectTypeCompare((PetscObject)B, MATTRANSPOSEVIRTUAL, &flgT));
1271     if (flgT) { /* input B is transpose of actural RHS matrix,
1272                  because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */
1273       PetscCall(MatTransposeGetMat(B, &Bt));
1274     } else SETERRQ(PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_WRONG, "Matrix B must be MATTRANSPOSEVIRTUAL matrix");
1275     mumps->id.ICNTL(20) = 1; /* sparse RHS */
1276   }
1277 
1278   PetscCall(MatGetSize(B, &M, &nrhs));
1279   mumps->id.nrhs = nrhs;
1280   mumps->id.lrhs = M;
1281   mumps->id.rhs  = NULL;
1282 
1283   if (mumps->petsc_size == 1) {
1284     PetscScalar *aa;
1285     PetscInt     spnr, *ia, *ja;
1286     PetscBool    second_solve = PETSC_FALSE;
1287 
1288     PetscCall(MatDenseGetArray(X, &array));
1289     mumps->id.rhs = (MumpsScalar *)array;
1290 
1291     if (denseB) {
1292       /* copy B to X */
1293       PetscCall(MatDenseGetArrayRead(B, &rbray));
1294       PetscCall(PetscArraycpy(array, rbray, M * nrhs));
1295       PetscCall(MatDenseRestoreArrayRead(B, &rbray));
1296     } else { /* sparse B */
1297       PetscCall(MatSeqAIJGetArray(Bt, &aa));
1298       PetscCall(MatGetRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
1299       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
1300       PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs));
1301       mumps->id.rhs_sparse = (MumpsScalar *)aa;
1302     }
1303     /* handle condensation step of Schur complement (if any) */
1304     if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) {
1305       second_solve = PETSC_TRUE;
1306       PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE));
1307     }
1308     /* solve phase */
1309     /*-------------*/
1310     mumps->id.job = JOB_SOLVE;
1311     PetscMUMPS_c(mumps);
1312     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1));
1313 
1314     /* handle expansion step of Schur complement (if any) */
1315     if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE));
1316     if (!denseB) { /* sparse B */
1317       PetscCall(MatSeqAIJRestoreArray(Bt, &aa));
1318       PetscCall(MatRestoreRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
1319       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure");
1320     }
1321     PetscCall(MatDenseRestoreArray(X, &array));
1322     PetscFunctionReturn(0);
1323   }
1324 
1325   /*--------- parallel case: MUMPS requires rhs B to be centralized on the host! --------*/
1326   PetscCheck(mumps->petsc_size <= 1 || !mumps->id.ICNTL(19), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc");
1327 
1328   /* create msol_loc to hold mumps local solution */
1329   isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */
1330   sol_loc_save  = (PetscScalar *)mumps->id.sol_loc;
1331 
1332   lsol_loc  = mumps->id.lsol_loc;
1333   nlsol_loc = nrhs * lsol_loc; /* length of sol_loc */
1334   PetscCall(PetscMalloc2(nlsol_loc, &sol_loc, lsol_loc, &isol_loc));
1335   mumps->id.sol_loc  = (MumpsScalar *)sol_loc;
1336   mumps->id.isol_loc = isol_loc;
1337 
1338   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nlsol_loc, (PetscScalar *)sol_loc, &msol_loc));
1339 
1340   if (denseB) {
1341     if (mumps->ICNTL20 == 10) {
1342       mumps->id.ICNTL(20) = 10; /* dense distributed RHS */
1343       PetscCall(MatDenseGetArrayRead(B, &rbray));
1344       PetscCall(MatMumpsSetUpDistRHSInfo(A, nrhs, rbray));
1345       PetscCall(MatDenseRestoreArrayRead(B, &rbray));
1346       PetscCall(MatGetLocalSize(B, &m, NULL));
1347       PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhs * M, NULL, &v_mpi));
1348     } else {
1349       mumps->id.ICNTL(20) = 0; /* dense centralized RHS */
1350       /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in
1351         very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank
1352         0, re-arrange B into desired order, which is a local operation.
1353       */
1354 
1355       /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */
1356       /* wrap dense rhs matrix B into a vector v_mpi */
1357       PetscCall(MatGetLocalSize(B, &m, NULL));
1358       PetscCall(MatDenseGetArray(B, &bray));
1359       PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhs * M, (const PetscScalar *)bray, &v_mpi));
1360       PetscCall(MatDenseRestoreArray(B, &bray));
1361 
1362       /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */
1363       if (!mumps->myid) {
1364         PetscInt *idx;
1365         /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */
1366         PetscCall(PetscMalloc1(nrhs * M, &idx));
1367         PetscCall(MatGetOwnershipRanges(B, &rstart));
1368         k = 0;
1369         for (proc = 0; proc < mumps->petsc_size; proc++) {
1370           for (j = 0; j < nrhs; j++) {
1371             for (i = rstart[proc]; i < rstart[proc + 1]; i++) idx[k++] = j * M + i;
1372           }
1373         }
1374 
1375         PetscCall(VecCreateSeq(PETSC_COMM_SELF, nrhs * M, &b_seq));
1376         PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nrhs * M, idx, PETSC_OWN_POINTER, &is_to));
1377         PetscCall(ISCreateStride(PETSC_COMM_SELF, nrhs * M, 0, 1, &is_from));
1378       } else {
1379         PetscCall(VecCreateSeq(PETSC_COMM_SELF, 0, &b_seq));
1380         PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_to));
1381         PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_from));
1382       }
1383       PetscCall(VecScatterCreate(v_mpi, is_from, b_seq, is_to, &scat_rhs));
1384       PetscCall(VecScatterBegin(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD));
1385       PetscCall(ISDestroy(&is_to));
1386       PetscCall(ISDestroy(&is_from));
1387       PetscCall(VecScatterEnd(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD));
1388 
1389       if (!mumps->myid) { /* define rhs on the host */
1390         PetscCall(VecGetArray(b_seq, &bray));
1391         mumps->id.rhs = (MumpsScalar *)bray;
1392         PetscCall(VecRestoreArray(b_seq, &bray));
1393       }
1394     }
1395   } else { /* sparse B */
1396     b = (Mat_MPIAIJ *)Bt->data;
1397 
1398     /* wrap dense X into a vector v_mpi */
1399     PetscCall(MatGetLocalSize(X, &m, NULL));
1400     PetscCall(MatDenseGetArray(X, &bray));
1401     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)X), 1, nrhs * m, nrhs * M, (const PetscScalar *)bray, &v_mpi));
1402     PetscCall(MatDenseRestoreArray(X, &bray));
1403 
1404     if (!mumps->myid) {
1405       PetscCall(MatSeqAIJGetArray(b->A, &aa));
1406       PetscCall(MatGetRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
1407       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
1408       PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs));
1409       mumps->id.rhs_sparse = (MumpsScalar *)aa;
1410     } else {
1411       mumps->id.irhs_ptr    = NULL;
1412       mumps->id.irhs_sparse = NULL;
1413       mumps->id.nz_rhs      = 0;
1414       mumps->id.rhs_sparse  = NULL;
1415     }
1416   }
1417 
1418   /* solve phase */
1419   /*-------------*/
1420   mumps->id.job = JOB_SOLVE;
1421   PetscMUMPS_c(mumps);
1422   PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1));
1423 
1424   /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */
1425   PetscCall(MatDenseGetArray(X, &array));
1426   PetscCall(VecPlaceArray(v_mpi, array));
1427 
1428   /* create scatter scat_sol */
1429   PetscCall(MatGetOwnershipRanges(X, &rstart));
1430   /* iidx: index for scatter mumps solution to petsc X */
1431 
1432   PetscCall(ISCreateStride(PETSC_COMM_SELF, nlsol_loc, 0, 1, &is_from));
1433   PetscCall(PetscMalloc1(nlsol_loc, &idxx));
1434   for (i = 0; i < lsol_loc; i++) {
1435     isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */
1436 
1437     for (proc = 0; proc < mumps->petsc_size; proc++) {
1438       if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc + 1]) {
1439         myrstart = rstart[proc];
1440         k        = isol_loc[i] - myrstart;          /* local index on 1st column of petsc vector X */
1441         iidx     = k + myrstart * nrhs;             /* maps mumps isol_loc[i] to petsc index in X */
1442         m        = rstart[proc + 1] - rstart[proc]; /* rows of X for this proc */
1443         break;
1444       }
1445     }
1446 
1447     for (j = 0; j < nrhs; j++) idxx[i + j * lsol_loc] = iidx + j * m;
1448   }
1449   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nlsol_loc, idxx, PETSC_COPY_VALUES, &is_to));
1450   PetscCall(VecScatterCreate(msol_loc, is_from, v_mpi, is_to, &scat_sol));
1451   PetscCall(VecScatterBegin(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD));
1452   PetscCall(ISDestroy(&is_from));
1453   PetscCall(ISDestroy(&is_to));
1454   PetscCall(VecScatterEnd(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD));
1455   PetscCall(MatDenseRestoreArray(X, &array));
1456 
1457   /* free spaces */
1458   mumps->id.sol_loc  = (MumpsScalar *)sol_loc_save;
1459   mumps->id.isol_loc = isol_loc_save;
1460 
1461   PetscCall(PetscFree2(sol_loc, isol_loc));
1462   PetscCall(PetscFree(idxx));
1463   PetscCall(VecDestroy(&msol_loc));
1464   PetscCall(VecDestroy(&v_mpi));
1465   if (!denseB) {
1466     if (!mumps->myid) {
1467       b = (Mat_MPIAIJ *)Bt->data;
1468       PetscCall(MatSeqAIJRestoreArray(b->A, &aa));
1469       PetscCall(MatRestoreRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
1470       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure");
1471     }
1472   } else {
1473     if (mumps->ICNTL20 == 0) {
1474       PetscCall(VecDestroy(&b_seq));
1475       PetscCall(VecScatterDestroy(&scat_rhs));
1476     }
1477   }
1478   PetscCall(VecScatterDestroy(&scat_sol));
1479   PetscCall(PetscLogFlops(2.0 * nrhs * mumps->id.RINFO(3)));
1480   PetscFunctionReturn(0);
1481 }
1482 
1483 PetscErrorCode MatMatSolveTranspose_MUMPS(Mat A, Mat B, Mat X)
1484 {
1485   Mat_MUMPS    *mumps    = (Mat_MUMPS *)A->data;
1486   PetscMUMPSInt oldvalue = mumps->id.ICNTL(9);
1487 
1488   PetscFunctionBegin;
1489   mumps->id.ICNTL(9) = 0;
1490   PetscCall(MatMatSolve_MUMPS(A, B, X));
1491   mumps->id.ICNTL(9) = oldvalue;
1492   PetscFunctionReturn(0);
1493 }
1494 
1495 PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A, Mat Bt, Mat X)
1496 {
1497   PetscBool flg;
1498   Mat       B;
1499 
1500   PetscFunctionBegin;
1501   PetscCall(PetscObjectTypeCompareAny((PetscObject)Bt, &flg, MATSEQAIJ, MATMPIAIJ, NULL));
1502   PetscCheck(flg, PetscObjectComm((PetscObject)Bt), PETSC_ERR_ARG_WRONG, "Matrix Bt must be MATAIJ matrix");
1503 
1504   /* Create B=Bt^T that uses Bt's data structure */
1505   PetscCall(MatCreateTranspose(Bt, &B));
1506 
1507   PetscCall(MatMatSolve_MUMPS(A, B, X));
1508   PetscCall(MatDestroy(&B));
1509   PetscFunctionReturn(0);
1510 }
1511 
1512 #if !defined(PETSC_USE_COMPLEX)
1513 /*
1514   input:
1515    F:        numeric factor
1516   output:
1517    nneg:     total number of negative pivots
1518    nzero:    total number of zero pivots
1519    npos:     (global dimension of F) - nneg - nzero
1520 */
1521 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos)
1522 {
1523   Mat_MUMPS  *mumps = (Mat_MUMPS *)F->data;
1524   PetscMPIInt size;
1525 
1526   PetscFunctionBegin;
1527   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)F), &size));
1528   /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */
1529   PetscCheck(size <= 1 || mumps->id.ICNTL(13) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia", mumps->id.INFOG(13));
1530 
1531   if (nneg) *nneg = mumps->id.INFOG(12);
1532   if (nzero || npos) {
1533     PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection");
1534     if (nzero) *nzero = mumps->id.INFOG(28);
1535     if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28));
1536   }
1537   PetscFunctionReturn(0);
1538 }
1539 #endif
1540 
1541 PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse, Mat_MUMPS *mumps)
1542 {
1543   PetscInt       i, nreqs;
1544   PetscMUMPSInt *irn, *jcn;
1545   PetscMPIInt    count;
1546   PetscInt64     totnnz, remain;
1547   const PetscInt osize = mumps->omp_comm_size;
1548   PetscScalar   *val;
1549 
1550   PetscFunctionBegin;
1551   if (osize > 1) {
1552     if (reuse == MAT_INITIAL_MATRIX) {
1553       /* master first gathers counts of nonzeros to receive */
1554       if (mumps->is_omp_master) PetscCall(PetscMalloc1(osize, &mumps->recvcount));
1555       PetscCallMPI(MPI_Gather(&mumps->nnz, 1, MPIU_INT64, mumps->recvcount, 1, MPIU_INT64, 0 /*master*/, mumps->omp_comm));
1556 
1557       /* Then each computes number of send/recvs */
1558       if (mumps->is_omp_master) {
1559         /* Start from 1 since self communication is not done in MPI */
1560         nreqs = 0;
1561         for (i = 1; i < osize; i++) nreqs += (mumps->recvcount[i] + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX;
1562       } else {
1563         nreqs = (mumps->nnz + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX;
1564       }
1565       PetscCall(PetscMalloc1(nreqs * 3, &mumps->reqs)); /* Triple the requests since we send irn, jcn and val separately */
1566 
1567       /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others.
1568          MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz
1569          might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size
1570          is very small, the current approach should have no extra overhead compared to MPI_Gatherv.
1571        */
1572       nreqs = 0; /* counter for actual send/recvs */
1573       if (mumps->is_omp_master) {
1574         for (i = 0, totnnz = 0; i < osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */
1575         PetscCall(PetscMalloc2(totnnz, &irn, totnnz, &jcn));
1576         PetscCall(PetscMalloc1(totnnz, &val));
1577 
1578         /* Self communication */
1579         PetscCall(PetscArraycpy(irn, mumps->irn, mumps->nnz));
1580         PetscCall(PetscArraycpy(jcn, mumps->jcn, mumps->nnz));
1581         PetscCall(PetscArraycpy(val, mumps->val, mumps->nnz));
1582 
1583         /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */
1584         PetscCall(PetscFree2(mumps->irn, mumps->jcn));
1585         PetscCall(PetscFree(mumps->val_alloc));
1586         mumps->nnz = totnnz;
1587         mumps->irn = irn;
1588         mumps->jcn = jcn;
1589         mumps->val = mumps->val_alloc = val;
1590 
1591         irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */
1592         jcn += mumps->recvcount[0];
1593         val += mumps->recvcount[0];
1594 
1595         /* Remote communication */
1596         for (i = 1; i < osize; i++) {
1597           count  = PetscMin(mumps->recvcount[i], PETSC_MPI_INT_MAX);
1598           remain = mumps->recvcount[i] - count;
1599           while (count > 0) {
1600             PetscCallMPI(MPI_Irecv(irn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1601             PetscCallMPI(MPI_Irecv(jcn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1602             PetscCallMPI(MPI_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1603             irn += count;
1604             jcn += count;
1605             val += count;
1606             count = PetscMin(remain, PETSC_MPI_INT_MAX);
1607             remain -= count;
1608           }
1609         }
1610       } else {
1611         irn    = mumps->irn;
1612         jcn    = mumps->jcn;
1613         val    = mumps->val;
1614         count  = PetscMin(mumps->nnz, PETSC_MPI_INT_MAX);
1615         remain = mumps->nnz - count;
1616         while (count > 0) {
1617           PetscCallMPI(MPI_Isend(irn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1618           PetscCallMPI(MPI_Isend(jcn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1619           PetscCallMPI(MPI_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1620           irn += count;
1621           jcn += count;
1622           val += count;
1623           count = PetscMin(remain, PETSC_MPI_INT_MAX);
1624           remain -= count;
1625         }
1626       }
1627     } else {
1628       nreqs = 0;
1629       if (mumps->is_omp_master) {
1630         val = mumps->val + mumps->recvcount[0];
1631         for (i = 1; i < osize; i++) { /* Remote communication only since self data is already in place */
1632           count  = PetscMin(mumps->recvcount[i], PETSC_MPI_INT_MAX);
1633           remain = mumps->recvcount[i] - count;
1634           while (count > 0) {
1635             PetscCallMPI(MPI_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1636             val += count;
1637             count = PetscMin(remain, PETSC_MPI_INT_MAX);
1638             remain -= count;
1639           }
1640         }
1641       } else {
1642         val    = mumps->val;
1643         count  = PetscMin(mumps->nnz, PETSC_MPI_INT_MAX);
1644         remain = mumps->nnz - count;
1645         while (count > 0) {
1646           PetscCallMPI(MPI_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1647           val += count;
1648           count = PetscMin(remain, PETSC_MPI_INT_MAX);
1649           remain -= count;
1650         }
1651       }
1652     }
1653     PetscCallMPI(MPI_Waitall(nreqs, mumps->reqs, MPI_STATUSES_IGNORE));
1654     mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */
1655   }
1656   PetscFunctionReturn(0);
1657 }
1658 
1659 PetscErrorCode MatFactorNumeric_MUMPS(Mat F, Mat A, const MatFactorInfo *info)
1660 {
1661   Mat_MUMPS *mumps = (Mat_MUMPS *)(F)->data;
1662   PetscBool  isMPIAIJ;
1663 
1664   PetscFunctionBegin;
1665   if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) {
1666     if (mumps->id.INFOG(1) == -6) PetscCall(PetscInfo(A, "MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1667     PetscCall(PetscInfo(A, "MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1668     PetscFunctionReturn(0);
1669   }
1670 
1671   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps));
1672   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX, mumps));
1673 
1674   /* numerical factorization phase */
1675   /*-------------------------------*/
1676   mumps->id.job = JOB_FACTNUMERIC;
1677   if (!mumps->id.ICNTL(18)) { /* A is centralized */
1678     if (!mumps->myid) mumps->id.a = (MumpsScalar *)mumps->val;
1679   } else {
1680     mumps->id.a_loc = (MumpsScalar *)mumps->val;
1681   }
1682   PetscMUMPS_c(mumps);
1683   if (mumps->id.INFOG(1) < 0) {
1684     PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d", mumps->id.INFOG(1), mumps->id.INFO(2));
1685     if (mumps->id.INFOG(1) == -10) { /* numerically singular matrix */
1686       PetscCall(PetscInfo(F, "matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1687       F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
1688     } else if (mumps->id.INFOG(1) == -13) {
1689       PetscCall(PetscInfo(F, "MUMPS in numerical factorization phase: INFOG(1)=%d, cannot allocate required memory %d megabytes\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1690       F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1691     } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) {
1692       PetscCall(PetscInfo(F, "MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d, problem with workarray\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1693       F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1694     } else {
1695       PetscCall(PetscInfo(F, "MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1696       F->factorerrortype = MAT_FACTOR_OTHER;
1697     }
1698   }
1699   PetscCheck(mumps->myid || mumps->id.ICNTL(16) <= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "  mumps->id.ICNTL(16):=%d", mumps->id.INFOG(16));
1700 
1701   F->assembled = PETSC_TRUE;
1702 
1703   if (F->schur) { /* reset Schur status to unfactored */
1704 #if defined(PETSC_HAVE_CUDA)
1705     F->schur->offloadmask = PETSC_OFFLOAD_CPU;
1706 #endif
1707     if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
1708       mumps->id.ICNTL(19) = 2;
1709       PetscCall(MatTranspose(F->schur, MAT_INPLACE_MATRIX, &F->schur));
1710     }
1711     PetscCall(MatFactorRestoreSchurComplement(F, NULL, MAT_FACTOR_SCHUR_UNFACTORED));
1712   }
1713 
1714   /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */
1715   if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3;
1716 
1717   if (!mumps->is_omp_master) mumps->id.INFO(23) = 0;
1718   if (mumps->petsc_size > 1) {
1719     PetscInt     lsol_loc;
1720     PetscScalar *sol_loc;
1721 
1722     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &isMPIAIJ));
1723 
1724     /* distributed solution; Create x_seq=sol_loc for repeated use */
1725     if (mumps->x_seq) {
1726       PetscCall(VecScatterDestroy(&mumps->scat_sol));
1727       PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc));
1728       PetscCall(VecDestroy(&mumps->x_seq));
1729     }
1730     lsol_loc = mumps->id.INFO(23); /* length of sol_loc */
1731     PetscCall(PetscMalloc2(lsol_loc, &sol_loc, lsol_loc, &mumps->id.isol_loc));
1732     mumps->id.lsol_loc = lsol_loc;
1733     mumps->id.sol_loc  = (MumpsScalar *)sol_loc;
1734     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, lsol_loc, sol_loc, &mumps->x_seq));
1735   }
1736   PetscCall(PetscLogFlops(mumps->id.RINFO(2)));
1737   PetscFunctionReturn(0);
1738 }
1739 
1740 /* Sets MUMPS options from the options database */
1741 PetscErrorCode MatSetFromOptions_MUMPS(Mat F, Mat A)
1742 {
1743   Mat_MUMPS    *mumps = (Mat_MUMPS *)F->data;
1744   PetscMUMPSInt icntl = 0, size, *listvar_schur;
1745   PetscInt      info[80], i, ninfo = 80, rbs, cbs;
1746   PetscBool     flg = PETSC_FALSE, schur = (PetscBool)(mumps->id.ICNTL(26) == -1);
1747   MumpsScalar  *arr;
1748 
1749   PetscFunctionBegin;
1750   PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "MUMPS Options", "Mat");
1751   if (mumps->id.job == JOB_NULL) { /* MatSetFromOptions_MUMPS() has never been called before */
1752     PetscInt nthreads   = 0;
1753     PetscInt nCNTL_pre  = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0;
1754     PetscInt nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0;
1755 
1756     mumps->petsc_comm = PetscObjectComm((PetscObject)A);
1757     PetscCallMPI(MPI_Comm_size(mumps->petsc_comm, &mumps->petsc_size));
1758     PetscCallMPI(MPI_Comm_rank(mumps->petsc_comm, &mumps->myid)); /* "if (!myid)" still works even if mumps_comm is different */
1759 
1760     PetscCall(PetscOptionsName("-mat_mumps_use_omp_threads", "Convert MPI processes into OpenMP threads", "None", &mumps->use_petsc_omp_support));
1761     if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */
1762     /* do not use PetscOptionsInt() so that the option -mat_mumps_use_omp_threads is not displayed twice in the help */
1763     PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)F)->prefix, "-mat_mumps_use_omp_threads", &nthreads, NULL));
1764     if (mumps->use_petsc_omp_support) {
1765       PetscCheck(PetscDefined(HAVE_OPENMP_SUPPORT), PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "The system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual",
1766                  ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : "");
1767       PetscCheck(!schur, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use -%smat_mumps_use_omp_threads with the Schur complement feature", ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : "");
1768 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1769       PetscCall(PetscOmpCtrlCreate(mumps->petsc_comm, nthreads, &mumps->omp_ctrl));
1770       PetscCall(PetscOmpCtrlGetOmpComms(mumps->omp_ctrl, &mumps->omp_comm, &mumps->mumps_comm, &mumps->is_omp_master));
1771 #endif
1772     } else {
1773       mumps->omp_comm      = PETSC_COMM_SELF;
1774       mumps->mumps_comm    = mumps->petsc_comm;
1775       mumps->is_omp_master = PETSC_TRUE;
1776     }
1777     PetscCallMPI(MPI_Comm_size(mumps->omp_comm, &mumps->omp_comm_size));
1778     mumps->reqs = NULL;
1779     mumps->tag  = 0;
1780 
1781     if (mumps->mumps_comm != MPI_COMM_NULL) {
1782       if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) {
1783         /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */
1784         MPI_Comm comm;
1785         PetscCallMPI(MPI_Comm_dup(mumps->mumps_comm, &comm));
1786         mumps->mumps_comm = comm;
1787       } else PetscCall(PetscCommGetComm(mumps->petsc_comm, &mumps->mumps_comm));
1788     }
1789 
1790     mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm);
1791     mumps->id.job          = JOB_INIT;
1792     mumps->id.par          = 1; /* host participates factorizaton and solve */
1793     mumps->id.sym          = mumps->sym;
1794 
1795     size          = mumps->id.size_schur;
1796     arr           = mumps->id.schur;
1797     listvar_schur = mumps->id.listvar_schur;
1798     PetscMUMPS_c(mumps);
1799     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS: INFOG(1)=%d", mumps->id.INFOG(1));
1800     /* restore cached ICNTL and CNTL values */
1801     for (icntl = 0; icntl < nICNTL_pre; ++icntl) mumps->id.ICNTL(mumps->ICNTL_pre[1 + 2 * icntl]) = mumps->ICNTL_pre[2 + 2 * icntl];
1802     for (icntl = 0; icntl < nCNTL_pre; ++icntl) mumps->id.CNTL((PetscInt)mumps->CNTL_pre[1 + 2 * icntl]) = mumps->CNTL_pre[2 + 2 * icntl];
1803     PetscCall(PetscFree(mumps->ICNTL_pre));
1804     PetscCall(PetscFree(mumps->CNTL_pre));
1805 
1806     if (schur) {
1807       mumps->id.size_schur    = size;
1808       mumps->id.schur_lld     = size;
1809       mumps->id.schur         = arr;
1810       mumps->id.listvar_schur = listvar_schur;
1811       if (mumps->petsc_size > 1) {
1812         PetscBool gs; /* gs is false if any rank other than root has non-empty IS */
1813 
1814         mumps->id.ICNTL(19) = 1;                                                                            /* MUMPS returns Schur centralized on the host */
1815         gs                  = mumps->myid ? (mumps->id.size_schur ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */
1816         PetscCallMPI(MPI_Allreduce(MPI_IN_PLACE, &gs, 1, MPIU_BOOL, MPI_LAND, mumps->petsc_comm));
1817         PetscCheck(gs, PETSC_COMM_SELF, PETSC_ERR_SUP, "MUMPS distributed parallel Schur complements not yet supported from PETSc");
1818       } else {
1819         if (F->factortype == MAT_FACTOR_LU) {
1820           mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */
1821         } else {
1822           mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */
1823         }
1824       }
1825       mumps->id.ICNTL(26) = -1;
1826     }
1827 
1828     /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code.
1829        For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS.
1830      */
1831     PetscCallMPI(MPI_Bcast(mumps->id.icntl, 40, MPI_INT, 0, mumps->omp_comm));
1832     PetscCallMPI(MPI_Bcast(mumps->id.cntl, 15, MPIU_REAL, 0, mumps->omp_comm));
1833 
1834     mumps->scat_rhs = NULL;
1835     mumps->scat_sol = NULL;
1836 
1837     /* set PETSc-MUMPS default options - override MUMPS default */
1838     mumps->id.ICNTL(3) = 0;
1839     mumps->id.ICNTL(4) = 0;
1840     if (mumps->petsc_size == 1) {
1841       mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */
1842       mumps->id.ICNTL(7)  = 7; /* automatic choice of ordering done by the package */
1843     } else {
1844       mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */
1845       mumps->id.ICNTL(21) = 1; /* distributed solution */
1846     }
1847   }
1848   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_1", "ICNTL(1): output stream for error messages", "None", mumps->id.ICNTL(1), &icntl, &flg));
1849   if (flg) mumps->id.ICNTL(1) = icntl;
1850   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_2", "ICNTL(2): output stream for diagnostic printing, statistics, and warning", "None", mumps->id.ICNTL(2), &icntl, &flg));
1851   if (flg) mumps->id.ICNTL(2) = icntl;
1852   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_3", "ICNTL(3): output stream for global information, collected on the host", "None", mumps->id.ICNTL(3), &icntl, &flg));
1853   if (flg) mumps->id.ICNTL(3) = icntl;
1854 
1855   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_4", "ICNTL(4): level of printing (0 to 4)", "None", mumps->id.ICNTL(4), &icntl, &flg));
1856   if (flg) mumps->id.ICNTL(4) = icntl;
1857   if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */
1858 
1859   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_6", "ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)", "None", mumps->id.ICNTL(6), &icntl, &flg));
1860   if (flg) mumps->id.ICNTL(6) = icntl;
1861 
1862   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_7", "ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)", "None", mumps->id.ICNTL(7), &icntl, &flg));
1863   if (flg) {
1864     PetscCheck(icntl != 1 && icntl >= 0 && icntl <= 7, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto");
1865     mumps->id.ICNTL(7) = icntl;
1866   }
1867 
1868   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_8", "ICNTL(8): scaling strategy (-2 to 8 or 77)", "None", mumps->id.ICNTL(8), &mumps->id.ICNTL(8), NULL));
1869   /* PetscCall(PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL)); handled by MatSolveTranspose_MUMPS() */
1870   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_10", "ICNTL(10): max num of refinements", "None", mumps->id.ICNTL(10), &mumps->id.ICNTL(10), NULL));
1871   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_11", "ICNTL(11): statistics related to an error analysis (via -ksp_view)", "None", mumps->id.ICNTL(11), &mumps->id.ICNTL(11), NULL));
1872   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_12", "ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)", "None", mumps->id.ICNTL(12), &mumps->id.ICNTL(12), NULL));
1873   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_13", "ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting", "None", mumps->id.ICNTL(13), &mumps->id.ICNTL(13), NULL));
1874   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_14", "ICNTL(14): percentage increase in the estimated working space", "None", mumps->id.ICNTL(14), &mumps->id.ICNTL(14), NULL));
1875   PetscCall(MatGetBlockSizes(A, &rbs, &cbs));
1876   if (rbs == cbs && rbs > 1) mumps->id.ICNTL(15) = -rbs;
1877   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_15", "ICNTL(15): compression of the input matrix resulting from a block format", "None", mumps->id.ICNTL(15), &mumps->id.ICNTL(15), &flg));
1878   if (flg) {
1879     PetscCheck(mumps->id.ICNTL(15) <= 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Positive -mat_mumps_icntl_15 not handled");
1880     PetscCheck((-mumps->id.ICNTL(15) % cbs == 0) && (-mumps->id.ICNTL(15) % rbs == 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "The opposite of -mat_mumps_icntl_15 must be a multiple of the column and row blocksizes");
1881   }
1882   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_19", "ICNTL(19): computes the Schur complement", "None", mumps->id.ICNTL(19), &mumps->id.ICNTL(19), NULL));
1883   if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */
1884     PetscCall(MatDestroy(&F->schur));
1885     PetscCall(MatMumpsResetSchur_Private(mumps));
1886   }
1887 
1888   /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps",
1889      and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran
1890      and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...'
1891      This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug,
1892      see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590.
1893      In short, we could not use distributed RHS with MPICH until v4.0b1.
1894    */
1895 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) || (defined(PETSC_HAVE_MPICH_NUMVERSION) && (PETSC_HAVE_MPICH_NUMVERSION < 40000101))
1896   mumps->ICNTL20 = 0; /* Centralized dense RHS*/
1897 #else
1898   mumps->ICNTL20     = 10; /* Distributed dense RHS*/
1899 #endif
1900   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_20", "ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides", "None", mumps->ICNTL20, &mumps->ICNTL20, &flg));
1901   PetscCheck(!flg || mumps->ICNTL20 == 10 || mumps->ICNTL20 == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10", (int)mumps->ICNTL20);
1902 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0)
1903   PetscCheck(!flg || mumps->ICNTL20 != 10, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=10 is not supported before MUMPS-5.3.0");
1904 #endif
1905   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL)); we only use distributed solution vector */
1906 
1907   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_22", "ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)", "None", mumps->id.ICNTL(22), &mumps->id.ICNTL(22), NULL));
1908   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_23", "ICNTL(23): max size of the working memory (MB) that can allocate per processor", "None", mumps->id.ICNTL(23), &mumps->id.ICNTL(23), NULL));
1909   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_24", "ICNTL(24): detection of null pivot rows (0 or 1)", "None", mumps->id.ICNTL(24), &mumps->id.ICNTL(24), NULL));
1910   if (mumps->id.ICNTL(24)) { mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ }
1911 
1912   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_25", "ICNTL(25): computes a solution of a deficient matrix and a null space basis", "None", mumps->id.ICNTL(25), &mumps->id.ICNTL(25), NULL));
1913   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_26", "ICNTL(26): drives the solution phase if a Schur complement matrix", "None", mumps->id.ICNTL(26), &mumps->id.ICNTL(26), NULL));
1914   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_27", "ICNTL(27): controls the blocking size for multiple right-hand sides", "None", mumps->id.ICNTL(27), &mumps->id.ICNTL(27), NULL));
1915   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_28", "ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering", "None", mumps->id.ICNTL(28), &mumps->id.ICNTL(28), NULL));
1916   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_29", "ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis", "None", mumps->id.ICNTL(29), &mumps->id.ICNTL(29), NULL));
1917   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL)); */ /* call MatMumpsGetInverse() directly */
1918   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_31", "ICNTL(31): indicates which factors may be discarded during factorization", "None", mumps->id.ICNTL(31), &mumps->id.ICNTL(31), NULL));
1919   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elemination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL));  -- not supported by PETSc API */
1920   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_33", "ICNTL(33): compute determinant", "None", mumps->id.ICNTL(33), &mumps->id.ICNTL(33), NULL));
1921   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_35", "ICNTL(35): activates Block Low Rank (BLR) based factorization", "None", mumps->id.ICNTL(35), &mumps->id.ICNTL(35), NULL));
1922   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_36", "ICNTL(36): choice of BLR factorization variant", "None", mumps->id.ICNTL(36), &mumps->id.ICNTL(36), NULL));
1923   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_38", "ICNTL(38): estimated compression rate of LU factors with BLR", "None", mumps->id.ICNTL(38), &mumps->id.ICNTL(38), NULL));
1924 
1925   PetscCall(PetscOptionsReal("-mat_mumps_cntl_1", "CNTL(1): relative pivoting threshold", "None", mumps->id.CNTL(1), &mumps->id.CNTL(1), NULL));
1926   PetscCall(PetscOptionsReal("-mat_mumps_cntl_2", "CNTL(2): stopping criterion of refinement", "None", mumps->id.CNTL(2), &mumps->id.CNTL(2), NULL));
1927   PetscCall(PetscOptionsReal("-mat_mumps_cntl_3", "CNTL(3): absolute pivoting threshold", "None", mumps->id.CNTL(3), &mumps->id.CNTL(3), NULL));
1928   PetscCall(PetscOptionsReal("-mat_mumps_cntl_4", "CNTL(4): value for static pivoting", "None", mumps->id.CNTL(4), &mumps->id.CNTL(4), NULL));
1929   PetscCall(PetscOptionsReal("-mat_mumps_cntl_5", "CNTL(5): fixation for null pivots", "None", mumps->id.CNTL(5), &mumps->id.CNTL(5), NULL));
1930   PetscCall(PetscOptionsReal("-mat_mumps_cntl_7", "CNTL(7): dropping parameter used during BLR", "None", mumps->id.CNTL(7), &mumps->id.CNTL(7), NULL));
1931 
1932   PetscCall(PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL));
1933 
1934   PetscCall(PetscOptionsIntArray("-mat_mumps_view_info", "request INFO local to each processor", "", info, &ninfo, NULL));
1935   if (ninfo) {
1936     PetscCheck(ninfo <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "number of INFO %" PetscInt_FMT " must <= 80", ninfo);
1937     PetscCall(PetscMalloc1(ninfo, &mumps->info));
1938     mumps->ninfo = ninfo;
1939     for (i = 0; i < ninfo; i++) {
1940       PetscCheck(info[i] >= 0 && info[i] <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "index of INFO %" PetscInt_FMT " must between 1 and 80", ninfo);
1941       mumps->info[i] = info[i];
1942     }
1943   }
1944   PetscOptionsEnd();
1945   PetscFunctionReturn(0);
1946 }
1947 
1948 PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F, Mat A, const MatFactorInfo *info, Mat_MUMPS *mumps)
1949 {
1950   PetscFunctionBegin;
1951   if (mumps->id.INFOG(1) < 0) {
1952     PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in analysis phase: INFOG(1)=%d", mumps->id.INFOG(1));
1953     if (mumps->id.INFOG(1) == -6) {
1954       PetscCall(PetscInfo(F, "matrix is singular in structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1955       F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT;
1956     } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) {
1957       PetscCall(PetscInfo(F, "problem of workspace, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1958       F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1959     } else if (mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0) {
1960       PetscCall(PetscInfo(F, "Empty matrix\n"));
1961     } else {
1962       PetscCall(PetscInfo(F, "Error reported by MUMPS in analysis phase: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1963       F->factorerrortype = MAT_FACTOR_OTHER;
1964     }
1965   }
1966   PetscFunctionReturn(0);
1967 }
1968 
1969 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info)
1970 {
1971   Mat_MUMPS     *mumps = (Mat_MUMPS *)F->data;
1972   Vec            b;
1973   const PetscInt M = A->rmap->N;
1974 
1975   PetscFunctionBegin;
1976   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
1977     /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */
1978     PetscFunctionReturn(0);
1979   }
1980 
1981   /* Set MUMPS options from the options database */
1982   PetscCall(MatSetFromOptions_MUMPS(F, A));
1983 
1984   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
1985   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps));
1986 
1987   /* analysis phase */
1988   /*----------------*/
1989   mumps->id.job = JOB_FACTSYMBOLIC;
1990   mumps->id.n   = M;
1991   switch (mumps->id.ICNTL(18)) {
1992   case 0: /* centralized assembled matrix input */
1993     if (!mumps->myid) {
1994       mumps->id.nnz = mumps->nnz;
1995       mumps->id.irn = mumps->irn;
1996       mumps->id.jcn = mumps->jcn;
1997       if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val;
1998       if (r) {
1999         mumps->id.ICNTL(7) = 1;
2000         if (!mumps->myid) {
2001           const PetscInt *idx;
2002           PetscInt        i;
2003 
2004           PetscCall(PetscMalloc1(M, &mumps->id.perm_in));
2005           PetscCall(ISGetIndices(r, &idx));
2006           for (i = 0; i < M; i++) PetscCall(PetscMUMPSIntCast(idx[i] + 1, &(mumps->id.perm_in[i]))); /* perm_in[]: start from 1, not 0! */
2007           PetscCall(ISRestoreIndices(r, &idx));
2008         }
2009       }
2010     }
2011     break;
2012   case 3: /* distributed assembled matrix input (size>1) */
2013     mumps->id.nnz_loc = mumps->nnz;
2014     mumps->id.irn_loc = mumps->irn;
2015     mumps->id.jcn_loc = mumps->jcn;
2016     if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val;
2017     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2018       PetscCall(MatCreateVecs(A, NULL, &b));
2019       PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq));
2020       PetscCall(VecDestroy(&b));
2021     }
2022     break;
2023   }
2024   PetscMUMPS_c(mumps);
2025   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps));
2026 
2027   F->ops->lufactornumeric   = MatFactorNumeric_MUMPS;
2028   F->ops->solve             = MatSolve_MUMPS;
2029   F->ops->solvetranspose    = MatSolveTranspose_MUMPS;
2030   F->ops->matsolve          = MatMatSolve_MUMPS;
2031   F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS;
2032   F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS;
2033 
2034   mumps->matstruc = SAME_NONZERO_PATTERN;
2035   PetscFunctionReturn(0);
2036 }
2037 
2038 /* Note the Petsc r and c permutations are ignored */
2039 PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info)
2040 {
2041   Mat_MUMPS     *mumps = (Mat_MUMPS *)F->data;
2042   Vec            b;
2043   const PetscInt M = A->rmap->N;
2044 
2045   PetscFunctionBegin;
2046   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
2047     /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */
2048     PetscFunctionReturn(0);
2049   }
2050 
2051   /* Set MUMPS options from the options database */
2052   PetscCall(MatSetFromOptions_MUMPS(F, A));
2053 
2054   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
2055   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps));
2056 
2057   /* analysis phase */
2058   /*----------------*/
2059   mumps->id.job = JOB_FACTSYMBOLIC;
2060   mumps->id.n   = M;
2061   switch (mumps->id.ICNTL(18)) {
2062   case 0: /* centralized assembled matrix input */
2063     if (!mumps->myid) {
2064       mumps->id.nnz = mumps->nnz;
2065       mumps->id.irn = mumps->irn;
2066       mumps->id.jcn = mumps->jcn;
2067       if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val;
2068     }
2069     break;
2070   case 3: /* distributed assembled matrix input (size>1) */
2071     mumps->id.nnz_loc = mumps->nnz;
2072     mumps->id.irn_loc = mumps->irn;
2073     mumps->id.jcn_loc = mumps->jcn;
2074     if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val;
2075     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2076       PetscCall(MatCreateVecs(A, NULL, &b));
2077       PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq));
2078       PetscCall(VecDestroy(&b));
2079     }
2080     break;
2081   }
2082   PetscMUMPS_c(mumps);
2083   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps));
2084 
2085   F->ops->lufactornumeric   = MatFactorNumeric_MUMPS;
2086   F->ops->solve             = MatSolve_MUMPS;
2087   F->ops->solvetranspose    = MatSolveTranspose_MUMPS;
2088   F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS;
2089 
2090   mumps->matstruc = SAME_NONZERO_PATTERN;
2091   PetscFunctionReturn(0);
2092 }
2093 
2094 /* Note the Petsc r permutation and factor info are ignored */
2095 PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F, Mat A, IS r, const MatFactorInfo *info)
2096 {
2097   Mat_MUMPS     *mumps = (Mat_MUMPS *)F->data;
2098   Vec            b;
2099   const PetscInt M = A->rmap->N;
2100 
2101   PetscFunctionBegin;
2102   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
2103     /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */
2104     PetscFunctionReturn(0);
2105   }
2106 
2107   /* Set MUMPS options from the options database */
2108   PetscCall(MatSetFromOptions_MUMPS(F, A));
2109 
2110   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
2111   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps));
2112 
2113   /* analysis phase */
2114   /*----------------*/
2115   mumps->id.job = JOB_FACTSYMBOLIC;
2116   mumps->id.n   = M;
2117   switch (mumps->id.ICNTL(18)) {
2118   case 0: /* centralized assembled matrix input */
2119     if (!mumps->myid) {
2120       mumps->id.nnz = mumps->nnz;
2121       mumps->id.irn = mumps->irn;
2122       mumps->id.jcn = mumps->jcn;
2123       if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val;
2124     }
2125     break;
2126   case 3: /* distributed assembled matrix input (size>1) */
2127     mumps->id.nnz_loc = mumps->nnz;
2128     mumps->id.irn_loc = mumps->irn;
2129     mumps->id.jcn_loc = mumps->jcn;
2130     if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val;
2131     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2132       PetscCall(MatCreateVecs(A, NULL, &b));
2133       PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq));
2134       PetscCall(VecDestroy(&b));
2135     }
2136     break;
2137   }
2138   PetscMUMPS_c(mumps);
2139   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps));
2140 
2141   F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS;
2142   F->ops->solve                 = MatSolve_MUMPS;
2143   F->ops->solvetranspose        = MatSolve_MUMPS;
2144   F->ops->matsolve              = MatMatSolve_MUMPS;
2145   F->ops->mattransposesolve     = MatMatTransposeSolve_MUMPS;
2146   F->ops->matsolvetranspose     = MatMatSolveTranspose_MUMPS;
2147 #if defined(PETSC_USE_COMPLEX)
2148   F->ops->getinertia = NULL;
2149 #else
2150   F->ops->getinertia = MatGetInertia_SBAIJMUMPS;
2151 #endif
2152 
2153   mumps->matstruc = SAME_NONZERO_PATTERN;
2154   PetscFunctionReturn(0);
2155 }
2156 
2157 PetscErrorCode MatView_MUMPS(Mat A, PetscViewer viewer)
2158 {
2159   PetscBool         iascii;
2160   PetscViewerFormat format;
2161   Mat_MUMPS        *mumps = (Mat_MUMPS *)A->data;
2162 
2163   PetscFunctionBegin;
2164   /* check if matrix is mumps type */
2165   if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(0);
2166 
2167   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
2168   if (iascii) {
2169     PetscCall(PetscViewerGetFormat(viewer, &format));
2170     if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
2171       PetscCall(PetscViewerASCIIPrintf(viewer, "MUMPS run parameters:\n"));
2172       if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
2173         PetscCall(PetscViewerASCIIPrintf(viewer, "  SYM (matrix type):                   %d\n", mumps->id.sym));
2174         PetscCall(PetscViewerASCIIPrintf(viewer, "  PAR (host participation):            %d\n", mumps->id.par));
2175         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(1) (output for error):         %d\n", mumps->id.ICNTL(1)));
2176         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(2) (output of diagnostic msg): %d\n", mumps->id.ICNTL(2)));
2177         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(3) (output for global info):   %d\n", mumps->id.ICNTL(3)));
2178         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(4) (level of printing):        %d\n", mumps->id.ICNTL(4)));
2179         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(5) (input mat struct):         %d\n", mumps->id.ICNTL(5)));
2180         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(6) (matrix prescaling):        %d\n", mumps->id.ICNTL(6)));
2181         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(7) (sequential matrix ordering):%d\n", mumps->id.ICNTL(7)));
2182         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(8) (scaling strategy):         %d\n", mumps->id.ICNTL(8)));
2183         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(10) (max num of refinements):  %d\n", mumps->id.ICNTL(10)));
2184         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(11) (error analysis):          %d\n", mumps->id.ICNTL(11)));
2185         if (mumps->id.ICNTL(11) > 0) {
2186           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(4) (inf norm of input mat):        %g\n", mumps->id.RINFOG(4)));
2187           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(5) (inf norm of solution):         %g\n", mumps->id.RINFOG(5)));
2188           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(6) (inf norm of residual):         %g\n", mumps->id.RINFOG(6)));
2189           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(7),RINFOG(8) (backward error est): %g, %g\n", mumps->id.RINFOG(7), mumps->id.RINFOG(8)));
2190           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(9) (error estimate):               %g\n", mumps->id.RINFOG(9)));
2191           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n", mumps->id.RINFOG(10), mumps->id.RINFOG(11)));
2192         }
2193         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(12) (efficiency control):                         %d\n", mumps->id.ICNTL(12)));
2194         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(13) (sequential factorization of the root node):  %d\n", mumps->id.ICNTL(13)));
2195         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(14) (percentage of estimated workspace increase): %d\n", mumps->id.ICNTL(14)));
2196         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(15) (compression of the input matrix):            %d\n", mumps->id.ICNTL(15)));
2197         /* ICNTL(15-17) not used */
2198         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(18) (input mat struct):                           %d\n", mumps->id.ICNTL(18)));
2199         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(19) (Schur complement info):                      %d\n", mumps->id.ICNTL(19)));
2200         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(20) (RHS sparse pattern):                         %d\n", mumps->id.ICNTL(20)));
2201         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(21) (solution struct):                            %d\n", mumps->id.ICNTL(21)));
2202         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(22) (in-core/out-of-core facility):               %d\n", mumps->id.ICNTL(22)));
2203         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(23) (max size of memory can be allocated locally):%d\n", mumps->id.ICNTL(23)));
2204 
2205         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(24) (detection of null pivot rows):               %d\n", mumps->id.ICNTL(24)));
2206         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(25) (computation of a null space basis):          %d\n", mumps->id.ICNTL(25)));
2207         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(26) (Schur options for RHS or solution):          %d\n", mumps->id.ICNTL(26)));
2208         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(27) (blocking size for multiple RHS):             %d\n", mumps->id.ICNTL(27)));
2209         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(28) (use parallel or sequential ordering):        %d\n", mumps->id.ICNTL(28)));
2210         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(29) (parallel ordering):                          %d\n", mumps->id.ICNTL(29)));
2211 
2212         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(30) (user-specified set of entries in inv(A)):    %d\n", mumps->id.ICNTL(30)));
2213         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(31) (factors is discarded in the solve phase):    %d\n", mumps->id.ICNTL(31)));
2214         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(33) (compute determinant):                        %d\n", mumps->id.ICNTL(33)));
2215         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(35) (activate BLR based factorization):           %d\n", mumps->id.ICNTL(35)));
2216         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(36) (choice of BLR factorization variant):        %d\n", mumps->id.ICNTL(36)));
2217         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(38) (estimated compression rate of LU factors):   %d\n", mumps->id.ICNTL(38)));
2218 
2219         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(1) (relative pivoting threshold):      %g\n", mumps->id.CNTL(1)));
2220         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(2) (stopping criterion of refinement): %g\n", mumps->id.CNTL(2)));
2221         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(3) (absolute pivoting threshold):      %g\n", mumps->id.CNTL(3)));
2222         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(4) (value of static pivoting):         %g\n", mumps->id.CNTL(4)));
2223         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(5) (fixation for null pivots):         %g\n", mumps->id.CNTL(5)));
2224         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(7) (dropping parameter for BLR):       %g\n", mumps->id.CNTL(7)));
2225 
2226         /* information local to each processor */
2227         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(1) (local estimated flops for the elimination after analysis):\n"));
2228         PetscCall(PetscViewerASCIIPushSynchronized(viewer));
2229         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %g\n", mumps->myid, mumps->id.RINFO(1)));
2230         PetscCall(PetscViewerFlush(viewer));
2231         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(2) (local estimated flops for the assembly after factorization):\n"));
2232         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %g\n", mumps->myid, mumps->id.RINFO(2)));
2233         PetscCall(PetscViewerFlush(viewer));
2234         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(3) (local estimated flops for the elimination after factorization):\n"));
2235         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %g\n", mumps->myid, mumps->id.RINFO(3)));
2236         PetscCall(PetscViewerFlush(viewer));
2237 
2238         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization):\n"));
2239         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(15)));
2240         PetscCall(PetscViewerFlush(viewer));
2241 
2242         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization):\n"));
2243         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(16)));
2244         PetscCall(PetscViewerFlush(viewer));
2245 
2246         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(23) (num of pivots eliminated on this processor after factorization):\n"));
2247         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(23)));
2248         PetscCall(PetscViewerFlush(viewer));
2249 
2250         if (mumps->ninfo && mumps->ninfo <= 80) {
2251           PetscInt i;
2252           for (i = 0; i < mumps->ninfo; i++) {
2253             PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(%" PetscInt_FMT "):\n", mumps->info[i]));
2254             PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(mumps->info[i])));
2255             PetscCall(PetscViewerFlush(viewer));
2256           }
2257         }
2258         PetscCall(PetscViewerASCIIPopSynchronized(viewer));
2259       } else PetscCall(PetscViewerASCIIPrintf(viewer, "  Use -%sksp_view ::ascii_info_detail to display information for all processes\n", ((PetscObject)A)->prefix ? ((PetscObject)A)->prefix : ""));
2260 
2261       if (mumps->myid == 0) { /* information from the host */
2262         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFOG(1) (global estimated flops for the elimination after analysis): %g\n", mumps->id.RINFOG(1)));
2263         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFOG(2) (global estimated flops for the assembly after factorization): %g\n", mumps->id.RINFOG(2)));
2264         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFOG(3) (global estimated flops for the elimination after factorization): %g\n", mumps->id.RINFOG(3)));
2265         PetscCall(PetscViewerASCIIPrintf(viewer, "  (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n", mumps->id.RINFOG(12), mumps->id.RINFOG(13), mumps->id.INFOG(34)));
2266 
2267         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(3)));
2268         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(4)));
2269         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(5) (estimated maximum front size in the complete tree): %d\n", mumps->id.INFOG(5)));
2270         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(6) (number of nodes in the complete tree): %d\n", mumps->id.INFOG(6)));
2271         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(7) (ordering option effectively used after analysis): %d\n", mumps->id.INFOG(7)));
2272         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n", mumps->id.INFOG(8)));
2273         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n", mumps->id.INFOG(9)));
2274         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(10) (total integer space store the matrix factors after factorization): %d\n", mumps->id.INFOG(10)));
2275         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(11) (order of largest frontal matrix after factorization): %d\n", mumps->id.INFOG(11)));
2276         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(12) (number of off-diagonal pivots): %d\n", mumps->id.INFOG(12)));
2277         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(13) (number of delayed pivots after factorization): %d\n", mumps->id.INFOG(13)));
2278         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(14) (number of memory compress after factorization): %d\n", mumps->id.INFOG(14)));
2279         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(15) (number of steps of iterative refinement after solution): %d\n", mumps->id.INFOG(15)));
2280         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n", mumps->id.INFOG(16)));
2281         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n", mumps->id.INFOG(17)));
2282         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n", mumps->id.INFOG(18)));
2283         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n", mumps->id.INFOG(19)));
2284         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(20) (estimated number of entries in the factors): %d\n", mumps->id.INFOG(20)));
2285         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n", mumps->id.INFOG(21)));
2286         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n", mumps->id.INFOG(22)));
2287         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n", mumps->id.INFOG(23)));
2288         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n", mumps->id.INFOG(24)));
2289         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n", mumps->id.INFOG(25)));
2290         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(28) (after factorization: number of null pivots encountered): %d\n", mumps->id.INFOG(28)));
2291         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n", mumps->id.INFOG(29)));
2292         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n", mumps->id.INFOG(30), mumps->id.INFOG(31)));
2293         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(32) (after analysis: type of analysis done): %d\n", mumps->id.INFOG(32)));
2294         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(33) (value used for ICNTL(8)): %d\n", mumps->id.INFOG(33)));
2295         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(34) (exponent of the determinant if determinant is requested): %d\n", mumps->id.INFOG(34)));
2296         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n", mumps->id.INFOG(35)));
2297         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(36)));
2298         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n", mumps->id.INFOG(37)));
2299         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(38)));
2300         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n", mumps->id.INFOG(39)));
2301       }
2302     }
2303   }
2304   PetscFunctionReturn(0);
2305 }
2306 
2307 PetscErrorCode MatGetInfo_MUMPS(Mat A, MatInfoType flag, MatInfo *info)
2308 {
2309   Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;
2310 
2311   PetscFunctionBegin;
2312   info->block_size        = 1.0;
2313   info->nz_allocated      = mumps->id.INFOG(20);
2314   info->nz_used           = mumps->id.INFOG(20);
2315   info->nz_unneeded       = 0.0;
2316   info->assemblies        = 0.0;
2317   info->mallocs           = 0.0;
2318   info->memory            = 0.0;
2319   info->fill_ratio_given  = 0;
2320   info->fill_ratio_needed = 0;
2321   info->factor_mallocs    = 0;
2322   PetscFunctionReturn(0);
2323 }
2324 
2325 /* -------------------------------------------------------------------------------------------*/
2326 PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is)
2327 {
2328   Mat_MUMPS         *mumps = (Mat_MUMPS *)F->data;
2329   const PetscScalar *arr;
2330   const PetscInt    *idxs;
2331   PetscInt           size, i;
2332 
2333   PetscFunctionBegin;
2334   PetscCall(ISGetLocalSize(is, &size));
2335   /* Schur complement matrix */
2336   PetscCall(MatDestroy(&F->schur));
2337   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size, size, NULL, &F->schur));
2338   PetscCall(MatDenseGetArrayRead(F->schur, &arr));
2339   mumps->id.schur      = (MumpsScalar *)arr;
2340   mumps->id.size_schur = size;
2341   mumps->id.schur_lld  = size;
2342   PetscCall(MatDenseRestoreArrayRead(F->schur, &arr));
2343   if (mumps->sym == 1) PetscCall(MatSetOption(F->schur, MAT_SPD, PETSC_TRUE));
2344 
2345   /* MUMPS expects Fortran style indices */
2346   PetscCall(PetscFree(mumps->id.listvar_schur));
2347   PetscCall(PetscMalloc1(size, &mumps->id.listvar_schur));
2348   PetscCall(ISGetIndices(is, &idxs));
2349   for (i = 0; i < size; i++) PetscCall(PetscMUMPSIntCast(idxs[i] + 1, &(mumps->id.listvar_schur[i])));
2350   PetscCall(ISRestoreIndices(is, &idxs));
2351   /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */
2352   mumps->id.ICNTL(26) = -1;
2353   PetscFunctionReturn(0);
2354 }
2355 
2356 /* -------------------------------------------------------------------------------------------*/
2357 PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F, Mat *S)
2358 {
2359   Mat          St;
2360   Mat_MUMPS   *mumps = (Mat_MUMPS *)F->data;
2361   PetscScalar *array;
2362 #if defined(PETSC_USE_COMPLEX)
2363   PetscScalar im = PetscSqrtScalar((PetscScalar)-1.0);
2364 #endif
2365 
2366   PetscFunctionBegin;
2367   PetscCheck(mumps->id.ICNTL(19), PetscObjectComm((PetscObject)F), PETSC_ERR_ORDER, "Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it");
2368   PetscCall(MatCreate(PETSC_COMM_SELF, &St));
2369   PetscCall(MatSetSizes(St, PETSC_DECIDE, PETSC_DECIDE, mumps->id.size_schur, mumps->id.size_schur));
2370   PetscCall(MatSetType(St, MATDENSE));
2371   PetscCall(MatSetUp(St));
2372   PetscCall(MatDenseGetArray(St, &array));
2373   if (!mumps->sym) {                /* MUMPS always return a full matrix */
2374     if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
2375       PetscInt i, j, N = mumps->id.size_schur;
2376       for (i = 0; i < N; i++) {
2377         for (j = 0; j < N; j++) {
2378 #if !defined(PETSC_USE_COMPLEX)
2379           PetscScalar val = mumps->id.schur[i * N + j];
2380 #else
2381           PetscScalar val = mumps->id.schur[i * N + j].r + im * mumps->id.schur[i * N + j].i;
2382 #endif
2383           array[j * N + i] = val;
2384         }
2385       }
2386     } else { /* stored by columns */
2387       PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur));
2388     }
2389   } else {                          /* either full or lower-triangular (not packed) */
2390     if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */
2391       PetscInt i, j, N = mumps->id.size_schur;
2392       for (i = 0; i < N; i++) {
2393         for (j = i; j < N; j++) {
2394 #if !defined(PETSC_USE_COMPLEX)
2395           PetscScalar val = mumps->id.schur[i * N + j];
2396 #else
2397           PetscScalar val = mumps->id.schur[i * N + j].r + im * mumps->id.schur[i * N + j].i;
2398 #endif
2399           array[i * N + j] = val;
2400           array[j * N + i] = val;
2401         }
2402       }
2403     } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */
2404       PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur));
2405     } else { /* ICNTL(19) == 1 lower triangular stored by rows */
2406       PetscInt i, j, N = mumps->id.size_schur;
2407       for (i = 0; i < N; i++) {
2408         for (j = 0; j < i + 1; j++) {
2409 #if !defined(PETSC_USE_COMPLEX)
2410           PetscScalar val = mumps->id.schur[i * N + j];
2411 #else
2412           PetscScalar val = mumps->id.schur[i * N + j].r + im * mumps->id.schur[i * N + j].i;
2413 #endif
2414           array[i * N + j] = val;
2415           array[j * N + i] = val;
2416         }
2417       }
2418     }
2419   }
2420   PetscCall(MatDenseRestoreArray(St, &array));
2421   *S = St;
2422   PetscFunctionReturn(0);
2423 }
2424 
2425 /* -------------------------------------------------------------------------------------------*/
2426 PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt ival)
2427 {
2428   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2429 
2430   PetscFunctionBegin;
2431   if (mumps->id.job == JOB_NULL) {                                       /* need to cache icntl and ival since PetscMUMPS_c() has never been called */
2432     PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; /* number of already cached ICNTL */
2433     for (i = 0; i < nICNTL_pre; ++i)
2434       if (mumps->ICNTL_pre[1 + 2 * i] == icntl) break; /* is this ICNTL already cached? */
2435     if (i == nICNTL_pre) {                             /* not already cached */
2436       if (i > 0) PetscCall(PetscRealloc(sizeof(PetscMUMPSInt) * (2 * nICNTL_pre + 3), &mumps->ICNTL_pre));
2437       else PetscCall(PetscCalloc(sizeof(PetscMUMPSInt) * 3, &mumps->ICNTL_pre));
2438       mumps->ICNTL_pre[0]++;
2439     }
2440     mumps->ICNTL_pre[1 + 2 * i] = icntl;
2441     PetscCall(PetscMUMPSIntCast(ival, mumps->ICNTL_pre + 2 + 2 * i));
2442   } else PetscCall(PetscMUMPSIntCast(ival, &mumps->id.ICNTL(icntl)));
2443   PetscFunctionReturn(0);
2444 }
2445 
2446 PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt *ival)
2447 {
2448   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2449 
2450   PetscFunctionBegin;
2451   *ival = mumps->id.ICNTL(icntl);
2452   PetscFunctionReturn(0);
2453 }
2454 
2455 /*@
2456   MatMumpsSetIcntl - Set MUMPS parameter ICNTL()
2457 
2458    Logically Collective
2459 
2460    Input Parameters:
2461 +  F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2462 .  icntl - index of MUMPS parameter array ICNTL()
2463 -  ival - value of MUMPS ICNTL(icntl)
2464 
2465   Options Database Key:
2466 .   -mat_mumps_icntl_<icntl> <ival> - change the option numbered icntl to ival
2467 
2468    Level: beginner
2469 
2470    References:
2471 .  * - MUMPS Users' Guide
2472 
2473 .seealso: `MatGetFactor()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2474 @*/
2475 PetscErrorCode MatMumpsSetIcntl(Mat F, PetscInt icntl, PetscInt ival)
2476 {
2477   PetscFunctionBegin;
2478   PetscValidType(F, 1);
2479   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2480   PetscValidLogicalCollectiveInt(F, icntl, 2);
2481   PetscValidLogicalCollectiveInt(F, ival, 3);
2482   PetscCheck(icntl >= 1 && icntl <= 38, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl);
2483   PetscTryMethod(F, "MatMumpsSetIcntl_C", (Mat, PetscInt, PetscInt), (F, icntl, ival));
2484   PetscFunctionReturn(0);
2485 }
2486 
2487 /*@
2488   MatMumpsGetIcntl - Get MUMPS parameter ICNTL()
2489 
2490    Logically Collective
2491 
2492    Input Parameters:
2493 +  F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2494 -  icntl - index of MUMPS parameter array ICNTL()
2495 
2496   Output Parameter:
2497 .  ival - value of MUMPS ICNTL(icntl)
2498 
2499    Level: beginner
2500 
2501    References:
2502 .  * - MUMPS Users' Guide
2503 
2504 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2505 @*/
2506 PetscErrorCode MatMumpsGetIcntl(Mat F, PetscInt icntl, PetscInt *ival)
2507 {
2508   PetscFunctionBegin;
2509   PetscValidType(F, 1);
2510   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2511   PetscValidLogicalCollectiveInt(F, icntl, 2);
2512   PetscValidIntPointer(ival, 3);
2513   PetscCheck(icntl >= 1 && icntl <= 38, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl);
2514   PetscUseMethod(F, "MatMumpsGetIcntl_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival));
2515   PetscFunctionReturn(0);
2516 }
2517 
2518 /* -------------------------------------------------------------------------------------------*/
2519 PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal val)
2520 {
2521   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2522 
2523   PetscFunctionBegin;
2524   if (mumps->id.job == JOB_NULL) {
2525     PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0;
2526     for (i = 0; i < nCNTL_pre; ++i)
2527       if (mumps->CNTL_pre[1 + 2 * i] == icntl) break;
2528     if (i == nCNTL_pre) {
2529       if (i > 0) PetscCall(PetscRealloc(sizeof(PetscReal) * (2 * nCNTL_pre + 3), &mumps->CNTL_pre));
2530       else PetscCall(PetscCalloc(sizeof(PetscReal) * 3, &mumps->CNTL_pre));
2531       mumps->CNTL_pre[0]++;
2532     }
2533     mumps->CNTL_pre[1 + 2 * i] = icntl;
2534     mumps->CNTL_pre[2 + 2 * i] = val;
2535   } else mumps->id.CNTL(icntl) = val;
2536   PetscFunctionReturn(0);
2537 }
2538 
2539 PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal *val)
2540 {
2541   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2542 
2543   PetscFunctionBegin;
2544   *val = mumps->id.CNTL(icntl);
2545   PetscFunctionReturn(0);
2546 }
2547 
2548 /*@
2549   MatMumpsSetCntl - Set MUMPS parameter CNTL()
2550 
2551    Logically Collective
2552 
2553    Input Parameters:
2554 +  F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2555 .  icntl - index of MUMPS parameter array CNTL()
2556 -  val - value of MUMPS CNTL(icntl)
2557 
2558   Options Database Key:
2559 .   -mat_mumps_cntl_<icntl> <val>  - change the option numbered icntl to ival
2560 
2561    Level: beginner
2562 
2563    References:
2564 .  * - MUMPS Users' Guide
2565 
2566 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2567 @*/
2568 PetscErrorCode MatMumpsSetCntl(Mat F, PetscInt icntl, PetscReal val)
2569 {
2570   PetscFunctionBegin;
2571   PetscValidType(F, 1);
2572   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2573   PetscValidLogicalCollectiveInt(F, icntl, 2);
2574   PetscValidLogicalCollectiveReal(F, val, 3);
2575   PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl);
2576   PetscTryMethod(F, "MatMumpsSetCntl_C", (Mat, PetscInt, PetscReal), (F, icntl, val));
2577   PetscFunctionReturn(0);
2578 }
2579 
2580 /*@
2581   MatMumpsGetCntl - Get MUMPS parameter CNTL()
2582 
2583    Logically Collective
2584 
2585    Input Parameters:
2586 +  F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2587 -  icntl - index of MUMPS parameter array CNTL()
2588 
2589   Output Parameter:
2590 .  val - value of MUMPS CNTL(icntl)
2591 
2592    Level: beginner
2593 
2594    References:
2595 .  * - MUMPS Users' Guide
2596 
2597 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2598 @*/
2599 PetscErrorCode MatMumpsGetCntl(Mat F, PetscInt icntl, PetscReal *val)
2600 {
2601   PetscFunctionBegin;
2602   PetscValidType(F, 1);
2603   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2604   PetscValidLogicalCollectiveInt(F, icntl, 2);
2605   PetscValidRealPointer(val, 3);
2606   PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl);
2607   PetscUseMethod(F, "MatMumpsGetCntl_C", (Mat, PetscInt, PetscReal *), (F, icntl, val));
2608   PetscFunctionReturn(0);
2609 }
2610 
2611 PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F, PetscInt icntl, PetscInt *info)
2612 {
2613   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2614 
2615   PetscFunctionBegin;
2616   *info = mumps->id.INFO(icntl);
2617   PetscFunctionReturn(0);
2618 }
2619 
2620 PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F, PetscInt icntl, PetscInt *infog)
2621 {
2622   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2623 
2624   PetscFunctionBegin;
2625   *infog = mumps->id.INFOG(icntl);
2626   PetscFunctionReturn(0);
2627 }
2628 
2629 PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfo)
2630 {
2631   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2632 
2633   PetscFunctionBegin;
2634   *rinfo = mumps->id.RINFO(icntl);
2635   PetscFunctionReturn(0);
2636 }
2637 
2638 PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfog)
2639 {
2640   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2641 
2642   PetscFunctionBegin;
2643   *rinfog = mumps->id.RINFOG(icntl);
2644   PetscFunctionReturn(0);
2645 }
2646 
2647 PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F, Mat spRHS)
2648 {
2649   Mat          Bt = NULL, Btseq = NULL;
2650   PetscBool    flg;
2651   Mat_MUMPS   *mumps = (Mat_MUMPS *)F->data;
2652   PetscScalar *aa;
2653   PetscInt     spnr, *ia, *ja, M, nrhs;
2654 
2655   PetscFunctionBegin;
2656   PetscValidPointer(spRHS, 2);
2657   PetscCall(PetscObjectTypeCompare((PetscObject)spRHS, MATTRANSPOSEVIRTUAL, &flg));
2658   if (flg) {
2659     PetscCall(MatTransposeGetMat(spRHS, &Bt));
2660   } else SETERRQ(PetscObjectComm((PetscObject)spRHS), PETSC_ERR_ARG_WRONG, "Matrix spRHS must be type MATTRANSPOSEVIRTUAL matrix");
2661 
2662   PetscCall(MatMumpsSetIcntl(F, 30, 1));
2663 
2664   if (mumps->petsc_size > 1) {
2665     Mat_MPIAIJ *b = (Mat_MPIAIJ *)Bt->data;
2666     Btseq         = b->A;
2667   } else {
2668     Btseq = Bt;
2669   }
2670 
2671   PetscCall(MatGetSize(spRHS, &M, &nrhs));
2672   mumps->id.nrhs = nrhs;
2673   mumps->id.lrhs = M;
2674   mumps->id.rhs  = NULL;
2675 
2676   if (!mumps->myid) {
2677     PetscCall(MatSeqAIJGetArray(Btseq, &aa));
2678     PetscCall(MatGetRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
2679     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
2680     PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs));
2681     mumps->id.rhs_sparse = (MumpsScalar *)aa;
2682   } else {
2683     mumps->id.irhs_ptr    = NULL;
2684     mumps->id.irhs_sparse = NULL;
2685     mumps->id.nz_rhs      = 0;
2686     mumps->id.rhs_sparse  = NULL;
2687   }
2688   mumps->id.ICNTL(20) = 1; /* rhs is sparse */
2689   mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */
2690 
2691   /* solve phase */
2692   /*-------------*/
2693   mumps->id.job = JOB_SOLVE;
2694   PetscMUMPS_c(mumps);
2695   PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d INFO(2)=%d", mumps->id.INFOG(1), mumps->id.INFO(2));
2696 
2697   if (!mumps->myid) {
2698     PetscCall(MatSeqAIJRestoreArray(Btseq, &aa));
2699     PetscCall(MatRestoreRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
2700     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
2701   }
2702   PetscFunctionReturn(0);
2703 }
2704 
2705 /*@
2706   MatMumpsGetInverse - Get user-specified set of entries in inverse of A
2707 
2708    Logically Collective
2709 
2710    Input Parameters:
2711 +  F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2712 -  spRHS - sequential sparse matrix in `MATTRANSPOSEVIRTUAL` format holding specified indices in processor[0]
2713 
2714   Output Parameter:
2715 . spRHS - requested entries of inverse of A
2716 
2717    Level: beginner
2718 
2719    References:
2720 .  * - MUMPS Users' Guide
2721 
2722 .seealso: `MatGetFactor()`, `MatCreateTranspose()`
2723 @*/
2724 PetscErrorCode MatMumpsGetInverse(Mat F, Mat spRHS)
2725 {
2726   PetscFunctionBegin;
2727   PetscValidType(F, 1);
2728   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2729   PetscUseMethod(F, "MatMumpsGetInverse_C", (Mat, Mat), (F, spRHS));
2730   PetscFunctionReturn(0);
2731 }
2732 
2733 PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F, Mat spRHST)
2734 {
2735   Mat spRHS;
2736 
2737   PetscFunctionBegin;
2738   PetscCall(MatCreateTranspose(spRHST, &spRHS));
2739   PetscCall(MatMumpsGetInverse_MUMPS(F, spRHS));
2740   PetscCall(MatDestroy(&spRHS));
2741   PetscFunctionReturn(0);
2742 }
2743 
2744 /*@
2745   MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix A^T
2746 
2747    Logically Collective
2748 
2749    Input Parameters:
2750 +  F - the factored matrix of A obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2751 -  spRHST - sequential sparse matrix in `MATAIJ` format holding specified indices of A^T in processor[0]
2752 
2753   Output Parameter:
2754 . spRHST - requested entries of inverse of A^T
2755 
2756    Level: beginner
2757 
2758    References:
2759 .  * - MUMPS Users' Guide
2760 
2761 .seealso: `MatGetFactor()`, `MatCreateTranspose()`, `MatMumpsGetInverse()`
2762 @*/
2763 PetscErrorCode MatMumpsGetInverseTranspose(Mat F, Mat spRHST)
2764 {
2765   PetscBool flg;
2766 
2767   PetscFunctionBegin;
2768   PetscValidType(F, 1);
2769   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2770   PetscCall(PetscObjectTypeCompareAny((PetscObject)spRHST, &flg, MATSEQAIJ, MATMPIAIJ, NULL));
2771   PetscCheck(flg, PetscObjectComm((PetscObject)spRHST), PETSC_ERR_ARG_WRONG, "Matrix spRHST must be MATAIJ matrix");
2772 
2773   PetscUseMethod(F, "MatMumpsGetInverseTranspose_C", (Mat, Mat), (F, spRHST));
2774   PetscFunctionReturn(0);
2775 }
2776 
2777 /*@
2778   MatMumpsGetInfo - Get MUMPS parameter INFO()
2779 
2780    Logically Collective
2781 
2782    Input Parameters:
2783 +  F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2784 -  icntl - index of MUMPS parameter array INFO()
2785 
2786   Output Parameter:
2787 .  ival - value of MUMPS INFO(icntl)
2788 
2789    Level: beginner
2790 
2791    References:
2792 .  * - MUMPS Users' Guide
2793 
2794 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2795 @*/
2796 PetscErrorCode MatMumpsGetInfo(Mat F, PetscInt icntl, PetscInt *ival)
2797 {
2798   PetscFunctionBegin;
2799   PetscValidType(F, 1);
2800   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2801   PetscValidIntPointer(ival, 3);
2802   PetscUseMethod(F, "MatMumpsGetInfo_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival));
2803   PetscFunctionReturn(0);
2804 }
2805 
2806 /*@
2807   MatMumpsGetInfog - Get MUMPS parameter INFOG()
2808 
2809    Logically Collective
2810 
2811    Input Parameters:
2812 +  F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2813 -  icntl - index of MUMPS parameter array INFOG()
2814 
2815   Output Parameter:
2816 .  ival - value of MUMPS INFOG(icntl)
2817 
2818    Level: beginner
2819 
2820    References:
2821 .  * - MUMPS Users' Guide
2822 
2823 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2824 @*/
2825 PetscErrorCode MatMumpsGetInfog(Mat F, PetscInt icntl, PetscInt *ival)
2826 {
2827   PetscFunctionBegin;
2828   PetscValidType(F, 1);
2829   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2830   PetscValidIntPointer(ival, 3);
2831   PetscUseMethod(F, "MatMumpsGetInfog_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival));
2832   PetscFunctionReturn(0);
2833 }
2834 
2835 /*@
2836   MatMumpsGetRinfo - Get MUMPS parameter RINFO()
2837 
2838    Logically Collective
2839 
2840    Input Parameters:
2841 +  F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2842 -  icntl - index of MUMPS parameter array RINFO()
2843 
2844   Output Parameter:
2845 .  val - value of MUMPS RINFO(icntl)
2846 
2847    Level: beginner
2848 
2849    References:
2850 .  * - MUMPS Users' Guide
2851 
2852 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfog()`
2853 @*/
2854 PetscErrorCode MatMumpsGetRinfo(Mat F, PetscInt icntl, PetscReal *val)
2855 {
2856   PetscFunctionBegin;
2857   PetscValidType(F, 1);
2858   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2859   PetscValidRealPointer(val, 3);
2860   PetscUseMethod(F, "MatMumpsGetRinfo_C", (Mat, PetscInt, PetscReal *), (F, icntl, val));
2861   PetscFunctionReturn(0);
2862 }
2863 
2864 /*@
2865   MatMumpsGetRinfog - Get MUMPS parameter RINFOG()
2866 
2867    Logically Collective
2868 
2869    Input Parameters:
2870 +  F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2871 -  icntl - index of MUMPS parameter array RINFOG()
2872 
2873   Output Parameter:
2874 .  val - value of MUMPS RINFOG(icntl)
2875 
2876    Level: beginner
2877 
2878    References:
2879 .  * - MUMPS Users' Guide
2880 
2881 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`
2882 @*/
2883 PetscErrorCode MatMumpsGetRinfog(Mat F, PetscInt icntl, PetscReal *val)
2884 {
2885   PetscFunctionBegin;
2886   PetscValidType(F, 1);
2887   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2888   PetscValidRealPointer(val, 3);
2889   PetscUseMethod(F, "MatMumpsGetRinfog_C", (Mat, PetscInt, PetscReal *), (F, icntl, val));
2890   PetscFunctionReturn(0);
2891 }
2892 
2893 /*MC
2894   MATSOLVERMUMPS -  A matrix type providing direct solvers (LU and Cholesky) for
2895   distributed and sequential matrices via the external package MUMPS.
2896 
2897   Works with `MATAIJ` and `MATSBAIJ` matrices
2898 
2899   Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS
2900 
2901   Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. See details below.
2902 
2903   Use -pc_type cholesky or lu -pc_factor_mat_solver_type mumps to use this direct solver
2904 
2905   Options Database Keys:
2906 +  -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages
2907 .  -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning
2908 .  -mat_mumps_icntl_3 -  ICNTL(3): output stream for global information, collected on the host
2909 .  -mat_mumps_icntl_4 -  ICNTL(4): level of printing (0 to 4)
2910 .  -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)
2911 .  -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto
2912                         Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only)
2913 .  -mat_mumps_icntl_8  - ICNTL(8): scaling strategy (-2 to 8 or 77)
2914 .  -mat_mumps_icntl_10  - ICNTL(10): max num of refinements
2915 .  -mat_mumps_icntl_11  - ICNTL(11): statistics related to an error analysis (via -ksp_view)
2916 .  -mat_mumps_icntl_12  - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)
2917 .  -mat_mumps_icntl_13  - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting
2918 .  -mat_mumps_icntl_14  - ICNTL(14): percentage increase in the estimated working space
2919 .  -mat_mumps_icntl_15  - ICNTL(15): compression of the input matrix resulting from a block format
2920 .  -mat_mumps_icntl_19  - ICNTL(19): computes the Schur complement
2921 .  -mat_mumps_icntl_20  - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS
2922 .  -mat_mumps_icntl_22  - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)
2923 .  -mat_mumps_icntl_23  - ICNTL(23): max size of the working memory (MB) that can allocate per processor
2924 .  -mat_mumps_icntl_24  - ICNTL(24): detection of null pivot rows (0 or 1)
2925 .  -mat_mumps_icntl_25  - ICNTL(25): compute a solution of a deficient matrix and a null space basis
2926 .  -mat_mumps_icntl_26  - ICNTL(26): drives the solution phase if a Schur complement matrix
2927 .  -mat_mumps_icntl_28  - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering
2928 .  -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis
2929 .  -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A)
2930 .  -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization
2931 .  -mat_mumps_icntl_33 - ICNTL(33): compute determinant
2932 .  -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature
2933 .  -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant
2934 .  -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR
2935 .  -mat_mumps_cntl_1  - CNTL(1): relative pivoting threshold
2936 .  -mat_mumps_cntl_2  -  CNTL(2): stopping criterion of refinement
2937 .  -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold
2938 .  -mat_mumps_cntl_4 - CNTL(4): value for static pivoting
2939 .  -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots
2940 .  -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization
2941 -  -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS.
2942                                    Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual.
2943 
2944   Level: beginner
2945 
2946     Notes:
2947     MUMPS Cholesky does not handle (complex) Hermitian matrices (see User's Guide at https://mumps-solver.org/index.php?page=doc) so using it will error if the matrix is Hermitian.
2948 
2949     When used within a `KSP`/`PC` solve the options are prefixed with that of the `PC`. Otherwise one can set the options prefix by calling
2950     `MatSetOptionsPrefixFactor()` on the matrix from which the factor was obtained or `MatSetOptionsPrefix()` on the factor matrix.
2951 
2952     When a MUMPS factorization fails inside a KSP solve, for example with a `KSP_DIVERGED_PC_FAILED`, one can find the MUMPS information about the failure by calling
2953 $          KSPGetPC(ksp,&pc);
2954 $          PCFactorGetMatrix(pc,&mat);
2955 $          MatMumpsGetInfo(mat,....);
2956 $          MatMumpsGetInfog(mat,....); etc.
2957            Or you can run with -ksp_error_if_not_converged and the program will be stopped and the information printed in the error message.
2958 
2959   Using MUMPS with 64-bit integers
2960     MUMPS provides 64-bit integer support in two build modes:
2961       full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and
2962       requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI).
2963 
2964       selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices,
2965       MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and
2966       columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit
2967       integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS.
2968 
2969     With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc.
2970 
2971   Two modes to run MUMPS/PETSc with OpenMP
2972 $     Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP
2973 $     threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test".
2974 
2975 $     -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example,
2976 $     if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16"
2977 
2978    To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part
2979    (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with --with-openmp --download-hwloc
2980    (or --with-hwloc), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS
2981    libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS
2982    (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided).
2983 
2984    If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI
2985    processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of
2986    size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm
2987    are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set
2988    by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs.
2989    In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets,
2990    if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind
2991    MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half
2992    cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the
2993    problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding.
2994    For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and
2995    examine the mapping result.
2996 
2997    PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set `OMP_PROC_BIND` and `OMP_PLACES` in job scripts,
2998    for example, export `OMP_PLACES`=threads and export `OMP_PROC_BIND`=spread. One does not need to export `OMP_NUM_THREADS`=m in job scripts as PETSc
2999    calls `omp_set_num_threads`(m) internally before calling MUMPS.
3000 
3001    References:
3002 +  * - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011).
3003 -  * - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017.
3004 
3005 .seealso: `PCFactorSetMatSolverType()`, `MatSolverType`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`, `KSPGetPC()`, `PCFactorGetMatrix()`
3006 M*/
3007 
3008 static PetscErrorCode MatFactorGetSolverType_mumps(Mat A, MatSolverType *type)
3009 {
3010   PetscFunctionBegin;
3011   *type = MATSOLVERMUMPS;
3012   PetscFunctionReturn(0);
3013 }
3014 
3015 /* MatGetFactor for Seq and MPI AIJ matrices */
3016 static PetscErrorCode MatGetFactor_aij_mumps(Mat A, MatFactorType ftype, Mat *F)
3017 {
3018   Mat         B;
3019   Mat_MUMPS  *mumps;
3020   PetscBool   isSeqAIJ;
3021   PetscMPIInt size;
3022 
3023   PetscFunctionBegin;
3024 #if defined(PETSC_USE_COMPLEX)
3025   PetscCheck(A->hermitian != PETSC_BOOL3_TRUE || A->symmetric == PETSC_BOOL3_TRUE || ftype != MAT_FACTOR_CHOLESKY, PETSC_COMM_SELF, PETSC_ERR_SUP, "Hermitian CHOLESKY Factor is not supported");
3026 #endif
3027   /* Create the factorization matrix */
3028   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isSeqAIJ));
3029   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
3030   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
3031   PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name));
3032   PetscCall(MatSetUp(B));
3033 
3034   PetscCall(PetscNew(&mumps));
3035 
3036   B->ops->view    = MatView_MUMPS;
3037   B->ops->getinfo = MatGetInfo_MUMPS;
3038 
3039   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
3040   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
3041   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
3042   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
3043   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
3044   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
3045   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
3046   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
3047   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
3048   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
3049   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
3050   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
3051   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
3052 
3053   if (ftype == MAT_FACTOR_LU) {
3054     B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
3055     B->factortype            = MAT_FACTOR_LU;
3056     if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij;
3057     else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij;
3058     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]));
3059     mumps->sym = 0;
3060   } else {
3061     B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
3062     B->factortype                  = MAT_FACTOR_CHOLESKY;
3063     if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij;
3064     else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij;
3065     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY]));
3066 #if defined(PETSC_USE_COMPLEX)
3067     mumps->sym = 2;
3068 #else
3069     if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1;
3070     else mumps->sym = 2;
3071 #endif
3072   }
3073 
3074   /* set solvertype */
3075   PetscCall(PetscFree(B->solvertype));
3076   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
3077   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
3078   if (size == 1) {
3079     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3080     B->canuseordering = PETSC_TRUE;
3081   }
3082   B->ops->destroy = MatDestroy_MUMPS;
3083   B->data         = (void *)mumps;
3084 
3085   *F               = B;
3086   mumps->id.job    = JOB_NULL;
3087   mumps->ICNTL_pre = NULL;
3088   mumps->CNTL_pre  = NULL;
3089   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
3090   PetscFunctionReturn(0);
3091 }
3092 
3093 /* MatGetFactor for Seq and MPI SBAIJ matrices */
3094 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A, MatFactorType ftype, Mat *F)
3095 {
3096   Mat         B;
3097   Mat_MUMPS  *mumps;
3098   PetscBool   isSeqSBAIJ;
3099   PetscMPIInt size;
3100 
3101   PetscFunctionBegin;
3102 #if defined(PETSC_USE_COMPLEX)
3103   PetscCheck(A->hermitian != PETSC_BOOL3_TRUE || A->symmetric == PETSC_BOOL3_TRUE, PETSC_COMM_SELF, PETSC_ERR_SUP, "Hermitian CHOLESKY Factor is not supported");
3104 #endif
3105   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
3106   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
3107   PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name));
3108   PetscCall(MatSetUp(B));
3109 
3110   PetscCall(PetscNew(&mumps));
3111   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &isSeqSBAIJ));
3112   if (isSeqSBAIJ) {
3113     mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij;
3114   } else {
3115     mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij;
3116   }
3117 
3118   B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
3119   B->ops->view                   = MatView_MUMPS;
3120   B->ops->getinfo                = MatGetInfo_MUMPS;
3121 
3122   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
3123   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
3124   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
3125   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
3126   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
3127   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
3128   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
3129   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
3130   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
3131   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
3132   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
3133   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
3134   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
3135 
3136   B->factortype = MAT_FACTOR_CHOLESKY;
3137 #if defined(PETSC_USE_COMPLEX)
3138   mumps->sym = 2;
3139 #else
3140   if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1;
3141   else mumps->sym = 2;
3142 #endif
3143 
3144   /* set solvertype */
3145   PetscCall(PetscFree(B->solvertype));
3146   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
3147   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
3148   if (size == 1) {
3149     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3150     B->canuseordering = PETSC_TRUE;
3151   }
3152   PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY]));
3153   B->ops->destroy = MatDestroy_MUMPS;
3154   B->data         = (void *)mumps;
3155 
3156   *F               = B;
3157   mumps->id.job    = JOB_NULL;
3158   mumps->ICNTL_pre = NULL;
3159   mumps->CNTL_pre  = NULL;
3160   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
3161   PetscFunctionReturn(0);
3162 }
3163 
3164 static PetscErrorCode MatGetFactor_baij_mumps(Mat A, MatFactorType ftype, Mat *F)
3165 {
3166   Mat         B;
3167   Mat_MUMPS  *mumps;
3168   PetscBool   isSeqBAIJ;
3169   PetscMPIInt size;
3170 
3171   PetscFunctionBegin;
3172   /* Create the factorization matrix */
3173   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &isSeqBAIJ));
3174   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
3175   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
3176   PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name));
3177   PetscCall(MatSetUp(B));
3178 
3179   PetscCall(PetscNew(&mumps));
3180   if (ftype == MAT_FACTOR_LU) {
3181     B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS;
3182     B->factortype            = MAT_FACTOR_LU;
3183     if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij;
3184     else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij;
3185     mumps->sym = 0;
3186     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]));
3187   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead");
3188 
3189   B->ops->view    = MatView_MUMPS;
3190   B->ops->getinfo = MatGetInfo_MUMPS;
3191 
3192   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
3193   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
3194   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
3195   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
3196   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
3197   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
3198   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
3199   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
3200   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
3201   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
3202   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
3203   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
3204   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
3205 
3206   /* set solvertype */
3207   PetscCall(PetscFree(B->solvertype));
3208   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
3209   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
3210   if (size == 1) {
3211     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3212     B->canuseordering = PETSC_TRUE;
3213   }
3214   B->ops->destroy = MatDestroy_MUMPS;
3215   B->data         = (void *)mumps;
3216 
3217   *F               = B;
3218   mumps->id.job    = JOB_NULL;
3219   mumps->ICNTL_pre = NULL;
3220   mumps->CNTL_pre  = NULL;
3221   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
3222   PetscFunctionReturn(0);
3223 }
3224 
3225 /* MatGetFactor for Seq and MPI SELL matrices */
3226 static PetscErrorCode MatGetFactor_sell_mumps(Mat A, MatFactorType ftype, Mat *F)
3227 {
3228   Mat         B;
3229   Mat_MUMPS  *mumps;
3230   PetscBool   isSeqSELL;
3231   PetscMPIInt size;
3232 
3233   PetscFunctionBegin;
3234   /* Create the factorization matrix */
3235   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSELL, &isSeqSELL));
3236   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
3237   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
3238   PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name));
3239   PetscCall(MatSetUp(B));
3240 
3241   PetscCall(PetscNew(&mumps));
3242 
3243   B->ops->view    = MatView_MUMPS;
3244   B->ops->getinfo = MatGetInfo_MUMPS;
3245 
3246   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
3247   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
3248   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
3249   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
3250   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
3251   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
3252   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
3253   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
3254   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
3255   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
3256   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
3257 
3258   if (ftype == MAT_FACTOR_LU) {
3259     B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
3260     B->factortype            = MAT_FACTOR_LU;
3261     if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij;
3262     else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented");
3263     mumps->sym = 0;
3264     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]));
3265   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented");
3266 
3267   /* set solvertype */
3268   PetscCall(PetscFree(B->solvertype));
3269   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
3270   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
3271   if (size == 1) {
3272     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization  */
3273     B->canuseordering = PETSC_TRUE;
3274   }
3275   B->ops->destroy = MatDestroy_MUMPS;
3276   B->data         = (void *)mumps;
3277 
3278   *F               = B;
3279   mumps->id.job    = JOB_NULL;
3280   mumps->ICNTL_pre = NULL;
3281   mumps->CNTL_pre  = NULL;
3282   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
3283   PetscFunctionReturn(0);
3284 }
3285 
3286 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void)
3287 {
3288   PetscFunctionBegin;
3289   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
3290   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
3291   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps));
3292   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps));
3293   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPISBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps));
3294   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
3295   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
3296   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps));
3297   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps));
3298   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps));
3299   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSELL, MAT_FACTOR_LU, MatGetFactor_sell_mumps));
3300   PetscFunctionReturn(0);
3301 }
3302