xref: /petsc/src/mat/impls/aij/mpi/mumps/mumps.c (revision d9acb416d05abeed0a33bde3a81aeb2ea0364f6a)
1 /*
2     Provides an interface to the MUMPS sparse solver
3 */
4 #include <petscpkg_version.h>
5 #include <petscsf.h>
6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I  "petscmat.h"  I*/
7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
8 #include <../src/mat/impls/sell/mpi/mpisell.h>
9 
10 #define MUMPS_MANUALS "(see users manual https://mumps-solver.org/index.php?page=doc \"Error and warning diagnostics\")"
11 
12 EXTERN_C_BEGIN
13 #if defined(PETSC_USE_COMPLEX)
14   #if defined(PETSC_USE_REAL_SINGLE)
15     #include <cmumps_c.h>
16   #else
17     #include <zmumps_c.h>
18   #endif
19 #else
20   #if defined(PETSC_USE_REAL_SINGLE)
21     #include <smumps_c.h>
22   #else
23     #include <dmumps_c.h>
24   #endif
25 #endif
26 EXTERN_C_END
27 #define JOB_INIT         -1
28 #define JOB_NULL         0
29 #define JOB_FACTSYMBOLIC 1
30 #define JOB_FACTNUMERIC  2
31 #define JOB_SOLVE        3
32 #define JOB_END          -2
33 
34 /* calls to MUMPS */
35 #if defined(PETSC_USE_COMPLEX)
36   #if defined(PETSC_USE_REAL_SINGLE)
37     #define MUMPS_c cmumps_c
38   #else
39     #define MUMPS_c zmumps_c
40   #endif
41 #else
42   #if defined(PETSC_USE_REAL_SINGLE)
43     #define MUMPS_c smumps_c
44   #else
45     #define MUMPS_c dmumps_c
46   #endif
47 #endif
48 
49 /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for
50    number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the
51    naming convention in PetscMPIInt, PetscBLASInt etc.
52 */
53 typedef MUMPS_INT PetscMUMPSInt;
54 
55 #if PETSC_PKG_MUMPS_VERSION_GE(5, 3, 0)
56   #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */
57     #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out"
58   #endif
59 #else
60   #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */
61     #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out"
62   #endif
63 #endif
64 
65 #define MPIU_MUMPSINT       MPI_INT
66 #define PETSC_MUMPS_INT_MAX 2147483647
67 #define PETSC_MUMPS_INT_MIN -2147483648
68 
69 /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/
70 static inline PetscErrorCode PetscMUMPSIntCast(PetscInt a, PetscMUMPSInt *b)
71 {
72   PetscFunctionBegin;
73 #if PetscDefined(USE_64BIT_INDICES)
74   PetscAssert(a <= PETSC_MUMPS_INT_MAX && a >= PETSC_MUMPS_INT_MIN, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt");
75 #endif
76   *b = (PetscMUMPSInt)(a);
77   PetscFunctionReturn(PETSC_SUCCESS);
78 }
79 
80 /* Put these utility routines here since they are only used in this file */
81 static inline PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems *PetscOptionsObject, const char opt[], const char text[], const char man[], PetscMUMPSInt currentvalue, PetscMUMPSInt *value, PetscBool *set, PetscMUMPSInt lb, PetscMUMPSInt ub)
82 {
83   PetscInt  myval;
84   PetscBool myset;
85   PetscFunctionBegin;
86   /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */
87   PetscCall(PetscOptionsInt_Private(PetscOptionsObject, opt, text, man, (PetscInt)currentvalue, &myval, &myset, lb, ub));
88   if (myset) PetscCall(PetscMUMPSIntCast(myval, value));
89   if (set) *set = myset;
90   PetscFunctionReturn(PETSC_SUCCESS);
91 }
92 #define PetscOptionsMUMPSInt(a, b, c, d, e, f) PetscOptionsMUMPSInt_Private(PetscOptionsObject, a, b, c, d, e, f, PETSC_MUMPS_INT_MIN, PETSC_MUMPS_INT_MAX)
93 
94 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */
95 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
96   #define PetscMUMPS_c(mumps) \
97     do { \
98       if (mumps->use_petsc_omp_support) { \
99         if (mumps->is_omp_master) { \
100           PetscCall(PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl)); \
101           PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \
102           PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \
103           PetscCall(PetscFPTrapPop()); \
104           PetscCall(PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl)); \
105         } \
106         PetscCall(PetscOmpCtrlBarrier(mumps->omp_ctrl)); \
107         /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific      \
108          to processes, so we only Bcast info[1], an error code and leave others (since they do not have   \
109          an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82.                   \
110          omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \
111       */ \
112         PetscCallMPI(MPI_Bcast(mumps->id.infog, PETSC_STATIC_ARRAY_LENGTH(mumps->id.infog), MPIU_MUMPSINT, 0, mumps->omp_comm)); \
113         PetscCallMPI(MPI_Bcast(mumps->id.rinfog, PETSC_STATIC_ARRAY_LENGTH(mumps->id.rinfog), MPIU_REAL, 0, mumps->omp_comm)); \
114         PetscCallMPI(MPI_Bcast(mumps->id.info, PETSC_STATIC_ARRAY_LENGTH(mumps->id.info), MPIU_MUMPSINT, 0, mumps->omp_comm)); \
115         PetscCallMPI(MPI_Bcast(mumps->id.rinfo, PETSC_STATIC_ARRAY_LENGTH(mumps->id.rinfo), MPIU_REAL, 0, mumps->omp_comm)); \
116       } else { \
117         PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \
118         PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \
119         PetscCall(PetscFPTrapPop()); \
120       } \
121     } while (0)
122 #else
123   #define PetscMUMPS_c(mumps) \
124     do { \
125       PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \
126       PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \
127       PetscCall(PetscFPTrapPop()); \
128     } while (0)
129 #endif
130 
131 /* declare MumpsScalar */
132 #if defined(PETSC_USE_COMPLEX)
133   #if defined(PETSC_USE_REAL_SINGLE)
134     #define MumpsScalar mumps_complex
135   #else
136     #define MumpsScalar mumps_double_complex
137   #endif
138 #else
139   #define MumpsScalar PetscScalar
140 #endif
141 
142 /* macros s.t. indices match MUMPS documentation */
143 #define ICNTL(I)  icntl[(I)-1]
144 #define CNTL(I)   cntl[(I)-1]
145 #define INFOG(I)  infog[(I)-1]
146 #define INFO(I)   info[(I)-1]
147 #define RINFOG(I) rinfog[(I)-1]
148 #define RINFO(I)  rinfo[(I)-1]
149 
150 typedef struct Mat_MUMPS Mat_MUMPS;
151 struct Mat_MUMPS {
152 #if defined(PETSC_USE_COMPLEX)
153   #if defined(PETSC_USE_REAL_SINGLE)
154   CMUMPS_STRUC_C id;
155   #else
156   ZMUMPS_STRUC_C id;
157   #endif
158 #else
159   #if defined(PETSC_USE_REAL_SINGLE)
160   SMUMPS_STRUC_C id;
161   #else
162   DMUMPS_STRUC_C id;
163   #endif
164 #endif
165 
166   MatStructure   matstruc;
167   PetscMPIInt    myid, petsc_size;
168   PetscMUMPSInt *irn, *jcn;       /* the (i,j,v) triplets passed to mumps. */
169   PetscScalar   *val, *val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */
170   PetscInt64     nnz;             /* number of nonzeros. The type is called selective 64-bit in mumps */
171   PetscMUMPSInt  sym;
172   MPI_Comm       mumps_comm;
173   PetscMUMPSInt *ICNTL_pre;
174   PetscReal     *CNTL_pre;
175   PetscMUMPSInt  ICNTL9_pre;         /* check if ICNTL(9) is changed from previous MatSolve */
176   VecScatter     scat_rhs, scat_sol; /* used by MatSolve() */
177   PetscMUMPSInt  ICNTL20;            /* use centralized (0) or distributed (10) dense RHS */
178   PetscMUMPSInt  lrhs_loc, nloc_rhs, *irhs_loc;
179 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
180   PetscInt    *rhs_nrow, max_nrhs;
181   PetscMPIInt *rhs_recvcounts, *rhs_disps;
182   PetscScalar *rhs_loc, *rhs_recvbuf;
183 #endif
184   Vec            b_seq, x_seq;
185   PetscInt       ninfo, *info; /* which INFO to display */
186   PetscInt       sizeredrhs;
187   PetscScalar   *schur_sol;
188   PetscInt       schur_sizesol;
189   PetscMUMPSInt *ia_alloc, *ja_alloc; /* work arrays used for the CSR struct for sparse rhs */
190   PetscInt64     cur_ilen, cur_jlen;  /* current len of ia_alloc[], ja_alloc[] */
191   PetscErrorCode (*ConvertToTriples)(Mat, PetscInt, MatReuse, Mat_MUMPS *);
192 
193   /* Support for MATNEST */
194   PetscErrorCode (**nest_convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *);
195   PetscInt64  *nest_vals_start;
196   PetscScalar *nest_vals;
197 
198   /* stuff used by petsc/mumps OpenMP support*/
199   PetscBool    use_petsc_omp_support;
200   PetscOmpCtrl omp_ctrl;             /* an OpenMP controller that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */
201   MPI_Comm     petsc_comm, omp_comm; /* petsc_comm is petsc matrix's comm */
202   PetscInt64  *recvcount;            /* a collection of nnz on omp_master */
203   PetscMPIInt  tag, omp_comm_size;
204   PetscBool    is_omp_master; /* is this rank the master of omp_comm */
205   MPI_Request *reqs;
206 };
207 
208 /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt.
209    Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices.
210  */
211 static PetscErrorCode PetscMUMPSIntCSRCast(PETSC_UNUSED Mat_MUMPS *mumps, PetscInt nrow, PetscInt *ia, PetscInt *ja, PetscMUMPSInt **ia_mumps, PetscMUMPSInt **ja_mumps, PetscMUMPSInt *nnz_mumps)
212 {
213   PetscInt nnz = ia[nrow] - 1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscInt64 since mumps only uses PetscMUMPSInt for rhs */
214 
215   PetscFunctionBegin;
216 #if defined(PETSC_USE_64BIT_INDICES)
217   {
218     PetscInt i;
219     if (nrow + 1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */
220       PetscCall(PetscFree(mumps->ia_alloc));
221       PetscCall(PetscMalloc1(nrow + 1, &mumps->ia_alloc));
222       mumps->cur_ilen = nrow + 1;
223     }
224     if (nnz > mumps->cur_jlen) {
225       PetscCall(PetscFree(mumps->ja_alloc));
226       PetscCall(PetscMalloc1(nnz, &mumps->ja_alloc));
227       mumps->cur_jlen = nnz;
228     }
229     for (i = 0; i < nrow + 1; i++) PetscCall(PetscMUMPSIntCast(ia[i], &(mumps->ia_alloc[i])));
230     for (i = 0; i < nnz; i++) PetscCall(PetscMUMPSIntCast(ja[i], &(mumps->ja_alloc[i])));
231     *ia_mumps = mumps->ia_alloc;
232     *ja_mumps = mumps->ja_alloc;
233   }
234 #else
235   *ia_mumps = ia;
236   *ja_mumps = ja;
237 #endif
238   PetscCall(PetscMUMPSIntCast(nnz, nnz_mumps));
239   PetscFunctionReturn(PETSC_SUCCESS);
240 }
241 
242 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS *mumps)
243 {
244   PetscFunctionBegin;
245   PetscCall(PetscFree(mumps->id.listvar_schur));
246   PetscCall(PetscFree(mumps->id.redrhs));
247   PetscCall(PetscFree(mumps->schur_sol));
248   mumps->id.size_schur = 0;
249   mumps->id.schur_lld  = 0;
250   mumps->id.ICNTL(19)  = 0;
251   PetscFunctionReturn(PETSC_SUCCESS);
252 }
253 
254 /* solve with rhs in mumps->id.redrhs and return in the same location */
255 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F)
256 {
257   Mat_MUMPS           *mumps = (Mat_MUMPS *)F->data;
258   Mat                  S, B, X;
259   MatFactorSchurStatus schurstatus;
260   PetscInt             sizesol;
261 
262   PetscFunctionBegin;
263   PetscCall(MatFactorFactorizeSchurComplement(F));
264   PetscCall(MatFactorGetSchurComplement(F, &S, &schurstatus));
265   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &B));
266   PetscCall(MatSetType(B, ((PetscObject)S)->type_name));
267 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
268   PetscCall(MatBindToCPU(B, S->boundtocpu));
269 #endif
270   switch (schurstatus) {
271   case MAT_FACTOR_SCHUR_FACTORED:
272     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &X));
273     PetscCall(MatSetType(X, ((PetscObject)S)->type_name));
274 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
275     PetscCall(MatBindToCPU(X, S->boundtocpu));
276 #endif
277     if (!mumps->id.ICNTL(9)) { /* transpose solve */
278       PetscCall(MatMatSolveTranspose(S, B, X));
279     } else {
280       PetscCall(MatMatSolve(S, B, X));
281     }
282     break;
283   case MAT_FACTOR_SCHUR_INVERTED:
284     sizesol = mumps->id.nrhs * mumps->id.size_schur;
285     if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) {
286       PetscCall(PetscFree(mumps->schur_sol));
287       PetscCall(PetscMalloc1(sizesol, &mumps->schur_sol));
288       mumps->schur_sizesol = sizesol;
289     }
290     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->schur_sol, &X));
291     PetscCall(MatSetType(X, ((PetscObject)S)->type_name));
292 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
293     PetscCall(MatBindToCPU(X, S->boundtocpu));
294 #endif
295     PetscCall(MatProductCreateWithMat(S, B, NULL, X));
296     if (!mumps->id.ICNTL(9)) { /* transpose solve */
297       PetscCall(MatProductSetType(X, MATPRODUCT_AtB));
298     } else {
299       PetscCall(MatProductSetType(X, MATPRODUCT_AB));
300     }
301     PetscCall(MatProductSetFromOptions(X));
302     PetscCall(MatProductSymbolic(X));
303     PetscCall(MatProductNumeric(X));
304 
305     PetscCall(MatCopy(X, B, SAME_NONZERO_PATTERN));
306     break;
307   default:
308     SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unhandled MatFactorSchurStatus %d", F->schur_status);
309   }
310   PetscCall(MatFactorRestoreSchurComplement(F, &S, schurstatus));
311   PetscCall(MatDestroy(&B));
312   PetscCall(MatDestroy(&X));
313   PetscFunctionReturn(PETSC_SUCCESS);
314 }
315 
316 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion)
317 {
318   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
319 
320   PetscFunctionBegin;
321   if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */
322     PetscFunctionReturn(PETSC_SUCCESS);
323   }
324   if (!expansion) { /* prepare for the condensation step */
325     PetscInt sizeredrhs = mumps->id.nrhs * mumps->id.size_schur;
326     /* allocate MUMPS internal array to store reduced right-hand sides */
327     if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) {
328       PetscCall(PetscFree(mumps->id.redrhs));
329       mumps->id.lredrhs = mumps->id.size_schur;
330       PetscCall(PetscMalloc1(mumps->id.nrhs * mumps->id.lredrhs, &mumps->id.redrhs));
331       mumps->sizeredrhs = mumps->id.nrhs * mumps->id.lredrhs;
332     }
333   } else { /* prepare for the expansion step */
334     /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */
335     PetscCall(MatMumpsSolveSchur_Private(F));
336     mumps->id.ICNTL(26) = 2; /* expansion phase */
337     PetscMUMPS_c(mumps);
338     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
339     /* restore defaults */
340     mumps->id.ICNTL(26) = -1;
341     /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */
342     if (mumps->id.nrhs > 1) {
343       PetscCall(PetscFree(mumps->id.redrhs));
344       mumps->id.lredrhs = 0;
345       mumps->sizeredrhs = 0;
346     }
347   }
348   PetscFunctionReturn(PETSC_SUCCESS);
349 }
350 
351 /*
352   MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz]
353 
354   input:
355     A       - matrix in aij,baij or sbaij format
356     shift   - 0: C style output triple; 1: Fortran style output triple.
357     reuse   - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple
358               MAT_REUSE_MATRIX:   only the values in v array are updated
359   output:
360     nnz     - dim of r, c, and v (number of local nonzero entries of A)
361     r, c, v - row and col index, matrix values (matrix triples)
362 
363   The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is
364   freed with PetscFree(mumps->irn);  This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means
365   that the PetscMalloc() cannot easily be replaced with a PetscMalloc3().
366 
367  */
368 
369 static PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
370 {
371   const PetscScalar *av;
372   const PetscInt    *ai, *aj, *ajj, M = A->rmap->n;
373   PetscInt64         nz, rnz, i, j, k;
374   PetscMUMPSInt     *row, *col;
375   Mat_SeqAIJ        *aa = (Mat_SeqAIJ *)A->data;
376 
377   PetscFunctionBegin;
378   PetscCall(MatSeqAIJGetArrayRead(A, &av));
379   if (reuse == MAT_INITIAL_MATRIX) {
380     nz = aa->nz;
381     ai = aa->i;
382     aj = aa->j;
383     PetscCall(PetscMalloc2(nz, &row, nz, &col));
384     for (i = k = 0; i < M; i++) {
385       rnz = ai[i + 1] - ai[i];
386       ajj = aj + ai[i];
387       for (j = 0; j < rnz; j++) {
388         PetscCall(PetscMUMPSIntCast(i + shift, &row[k]));
389         PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[k]));
390         k++;
391       }
392     }
393     mumps->val = (PetscScalar *)av;
394     mumps->irn = row;
395     mumps->jcn = col;
396     mumps->nnz = nz;
397   } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, av, aa->nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqaij_seqaij(), so one needs to copy the memory */
398   else mumps->val = (PetscScalar *)av;                                           /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */
399   PetscCall(MatSeqAIJRestoreArrayRead(A, &av));
400   PetscFunctionReturn(PETSC_SUCCESS);
401 }
402 
403 static PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
404 {
405   PetscInt64     nz, i, j, k, r;
406   Mat_SeqSELL   *a = (Mat_SeqSELL *)A->data;
407   PetscMUMPSInt *row, *col;
408 
409   PetscFunctionBegin;
410   nz = a->sliidx[a->totalslices];
411   if (reuse == MAT_INITIAL_MATRIX) {
412     PetscCall(PetscMalloc2(nz, &row, nz, &col));
413     for (i = k = 0; i < a->totalslices; i++) {
414       for (j = a->sliidx[i], r = 0; j < a->sliidx[i + 1]; j++, r = ((r + 1) & 0x07)) PetscCall(PetscMUMPSIntCast(8 * i + r + shift, &row[k++]));
415     }
416     for (i = 0; i < nz; i++) PetscCall(PetscMUMPSIntCast(a->colidx[i] + shift, &col[i]));
417     mumps->irn = row;
418     mumps->jcn = col;
419     mumps->nnz = nz;
420     mumps->val = a->val;
421   } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, a->val, nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqsell_seqaij(), so one needs to copy the memory */
422   else mumps->val = a->val;                                                      /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */
423   PetscFunctionReturn(PETSC_SUCCESS);
424 }
425 
426 static PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
427 {
428   Mat_SeqBAIJ    *aa = (Mat_SeqBAIJ *)A->data;
429   const PetscInt *ai, *aj, *ajj, bs2 = aa->bs2;
430   PetscInt64      M, nz = bs2 * aa->nz, idx = 0, rnz, i, j, k, m;
431   PetscInt        bs;
432   PetscMUMPSInt  *row, *col;
433 
434   PetscFunctionBegin;
435   if (reuse == MAT_INITIAL_MATRIX) {
436     PetscCall(MatGetBlockSize(A, &bs));
437     M  = A->rmap->N / bs;
438     ai = aa->i;
439     aj = aa->j;
440     PetscCall(PetscMalloc2(nz, &row, nz, &col));
441     for (i = 0; i < M; i++) {
442       ajj = aj + ai[i];
443       rnz = ai[i + 1] - ai[i];
444       for (k = 0; k < rnz; k++) {
445         for (j = 0; j < bs; j++) {
446           for (m = 0; m < bs; m++) {
447             PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[idx]));
448             PetscCall(PetscMUMPSIntCast(bs * ajj[k] + j + shift, &col[idx]));
449             idx++;
450           }
451         }
452       }
453     }
454     mumps->irn = row;
455     mumps->jcn = col;
456     mumps->nnz = nz;
457     mumps->val = aa->a;
458   } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, aa->a, nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqbaij_seqaij(), so one needs to copy the memory */
459   else mumps->val = aa->a;                                                      /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */
460   PetscFunctionReturn(PETSC_SUCCESS);
461 }
462 
463 static PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
464 {
465   const PetscInt *ai, *aj, *ajj;
466   PetscInt        bs;
467   PetscInt64      nz, rnz, i, j, k, m;
468   PetscMUMPSInt  *row, *col;
469   PetscScalar    *val;
470   Mat_SeqSBAIJ   *aa  = (Mat_SeqSBAIJ *)A->data;
471   const PetscInt  bs2 = aa->bs2, mbs = aa->mbs;
472 #if defined(PETSC_USE_COMPLEX)
473   PetscBool isset, hermitian;
474 #endif
475 
476   PetscFunctionBegin;
477 #if defined(PETSC_USE_COMPLEX)
478   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
479   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
480 #endif
481   ai = aa->i;
482   aj = aa->j;
483   PetscCall(MatGetBlockSize(A, &bs));
484   if (reuse == MAT_INITIAL_MATRIX) {
485     const PetscInt64 alloc_size = aa->nz * bs2;
486 
487     PetscCall(PetscMalloc2(alloc_size, &row, alloc_size, &col));
488     if (bs > 1) {
489       PetscCall(PetscMalloc1(alloc_size, &mumps->val_alloc));
490       mumps->val = mumps->val_alloc;
491     } else {
492       mumps->val = aa->a;
493     }
494     mumps->irn = row;
495     mumps->jcn = col;
496   } else {
497     row = mumps->irn;
498     col = mumps->jcn;
499   }
500   val = mumps->val;
501 
502   nz = 0;
503   if (bs > 1) {
504     for (i = 0; i < mbs; i++) {
505       rnz = ai[i + 1] - ai[i];
506       ajj = aj + ai[i];
507       for (j = 0; j < rnz; j++) {
508         for (k = 0; k < bs; k++) {
509           for (m = 0; m < bs; m++) {
510             if (ajj[j] > i || k >= m) {
511               if (reuse == MAT_INITIAL_MATRIX) {
512                 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[nz]));
513                 PetscCall(PetscMUMPSIntCast(ajj[j] * bs + k + shift, &col[nz]));
514               }
515               val[nz++] = aa->a[(ai[i] + j) * bs2 + m + k * bs];
516             }
517           }
518         }
519       }
520     }
521   } else if (reuse == MAT_INITIAL_MATRIX) {
522     for (i = 0; i < mbs; i++) {
523       rnz = ai[i + 1] - ai[i];
524       ajj = aj + ai[i];
525       for (j = 0; j < rnz; j++) {
526         PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
527         PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz]));
528         nz++;
529       }
530     }
531     PetscCheck(nz == aa->nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different numbers of nonzeros %" PetscInt64_FMT " != %" PetscInt_FMT, nz, aa->nz);
532   } else if (mumps->nest_vals)
533     PetscCall(PetscArraycpy(mumps->val, aa->a, aa->nz)); /* bs == 1 and MAT_REUSE_MATRIX, MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqsbaij_seqsbaij(), so one needs to copy the memory */
534   else mumps->val = aa->a;                               /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */
535   if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz;
536   PetscFunctionReturn(PETSC_SUCCESS);
537 }
538 
539 static PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
540 {
541   const PetscInt    *ai, *aj, *ajj, *adiag, M = A->rmap->n;
542   PetscInt64         nz, rnz, i, j;
543   const PetscScalar *av, *v1;
544   PetscScalar       *val;
545   PetscMUMPSInt     *row, *col;
546   Mat_SeqAIJ        *aa = (Mat_SeqAIJ *)A->data;
547   PetscBool          missing;
548 #if defined(PETSC_USE_COMPLEX)
549   PetscBool hermitian, isset;
550 #endif
551 
552   PetscFunctionBegin;
553 #if defined(PETSC_USE_COMPLEX)
554   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
555   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
556 #endif
557   PetscCall(MatSeqAIJGetArrayRead(A, &av));
558   ai    = aa->i;
559   aj    = aa->j;
560   adiag = aa->diag;
561   PetscCall(MatMissingDiagonal_SeqAIJ(A, &missing, NULL));
562   if (reuse == MAT_INITIAL_MATRIX) {
563     /* count nz in the upper triangular part of A */
564     nz = 0;
565     if (missing) {
566       for (i = 0; i < M; i++) {
567         if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
568           for (j = ai[i]; j < ai[i + 1]; j++) {
569             if (aj[j] < i) continue;
570             nz++;
571           }
572         } else {
573           nz += ai[i + 1] - adiag[i];
574         }
575       }
576     } else {
577       for (i = 0; i < M; i++) nz += ai[i + 1] - adiag[i];
578     }
579     PetscCall(PetscMalloc2(nz, &row, nz, &col));
580     PetscCall(PetscMalloc1(nz, &val));
581     mumps->nnz = nz;
582     mumps->irn = row;
583     mumps->jcn = col;
584     mumps->val = mumps->val_alloc = val;
585 
586     nz = 0;
587     if (missing) {
588       for (i = 0; i < M; i++) {
589         if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
590           for (j = ai[i]; j < ai[i + 1]; j++) {
591             if (aj[j] < i) continue;
592             PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
593             PetscCall(PetscMUMPSIntCast(aj[j] + shift, &col[nz]));
594             val[nz] = av[j];
595             nz++;
596           }
597         } else {
598           rnz = ai[i + 1] - adiag[i];
599           ajj = aj + adiag[i];
600           v1  = av + adiag[i];
601           for (j = 0; j < rnz; j++) {
602             PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
603             PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz]));
604             val[nz++] = v1[j];
605           }
606         }
607       }
608     } else {
609       for (i = 0; i < M; i++) {
610         rnz = ai[i + 1] - adiag[i];
611         ajj = aj + adiag[i];
612         v1  = av + adiag[i];
613         for (j = 0; j < rnz; j++) {
614           PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
615           PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz]));
616           val[nz++] = v1[j];
617         }
618       }
619     }
620   } else {
621     nz  = 0;
622     val = mumps->val;
623     if (missing) {
624       for (i = 0; i < M; i++) {
625         if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
626           for (j = ai[i]; j < ai[i + 1]; j++) {
627             if (aj[j] < i) continue;
628             val[nz++] = av[j];
629           }
630         } else {
631           rnz = ai[i + 1] - adiag[i];
632           v1  = av + adiag[i];
633           for (j = 0; j < rnz; j++) val[nz++] = v1[j];
634         }
635       }
636     } else {
637       for (i = 0; i < M; i++) {
638         rnz = ai[i + 1] - adiag[i];
639         v1  = av + adiag[i];
640         for (j = 0; j < rnz; j++) val[nz++] = v1[j];
641       }
642     }
643   }
644   PetscCall(MatSeqAIJRestoreArrayRead(A, &av));
645   PetscFunctionReturn(PETSC_SUCCESS);
646 }
647 
648 static PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
649 {
650   const PetscInt    *ai, *aj, *bi, *bj, *garray, *ajj, *bjj;
651   PetscInt           bs;
652   PetscInt64         rstart, nz, i, j, k, m, jj, irow, countA, countB;
653   PetscMUMPSInt     *row, *col;
654   const PetscScalar *av, *bv, *v1, *v2;
655   PetscScalar       *val;
656   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ *)A->data;
657   Mat_SeqSBAIJ      *aa  = (Mat_SeqSBAIJ *)(mat->A)->data;
658   Mat_SeqBAIJ       *bb  = (Mat_SeqBAIJ *)(mat->B)->data;
659   const PetscInt     bs2 = aa->bs2, mbs = aa->mbs;
660 #if defined(PETSC_USE_COMPLEX)
661   PetscBool hermitian, isset;
662 #endif
663 
664   PetscFunctionBegin;
665 #if defined(PETSC_USE_COMPLEX)
666   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
667   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
668 #endif
669   PetscCall(MatGetBlockSize(A, &bs));
670   rstart = A->rmap->rstart;
671   ai     = aa->i;
672   aj     = aa->j;
673   bi     = bb->i;
674   bj     = bb->j;
675   av     = aa->a;
676   bv     = bb->a;
677 
678   garray = mat->garray;
679 
680   if (reuse == MAT_INITIAL_MATRIX) {
681     nz = (aa->nz + bb->nz) * bs2; /* just a conservative estimate */
682     PetscCall(PetscMalloc2(nz, &row, nz, &col));
683     PetscCall(PetscMalloc1(nz, &val));
684     /* can not decide the exact mumps->nnz now because of the SBAIJ */
685     mumps->irn = row;
686     mumps->jcn = col;
687     mumps->val = mumps->val_alloc = val;
688   } else {
689     val = mumps->val;
690   }
691 
692   jj   = 0;
693   irow = rstart;
694   for (i = 0; i < mbs; i++) {
695     ajj    = aj + ai[i]; /* ptr to the beginning of this row */
696     countA = ai[i + 1] - ai[i];
697     countB = bi[i + 1] - bi[i];
698     bjj    = bj + bi[i];
699     v1     = av + ai[i] * bs2;
700     v2     = bv + bi[i] * bs2;
701 
702     if (bs > 1) {
703       /* A-part */
704       for (j = 0; j < countA; j++) {
705         for (k = 0; k < bs; k++) {
706           for (m = 0; m < bs; m++) {
707             if (rstart + ajj[j] * bs > irow || k >= m) {
708               if (reuse == MAT_INITIAL_MATRIX) {
709                 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj]));
710                 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] * bs + k + shift, &col[jj]));
711               }
712               val[jj++] = v1[j * bs2 + m + k * bs];
713             }
714           }
715         }
716       }
717 
718       /* B-part */
719       for (j = 0; j < countB; j++) {
720         for (k = 0; k < bs; k++) {
721           for (m = 0; m < bs; m++) {
722             if (reuse == MAT_INITIAL_MATRIX) {
723               PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj]));
724               PetscCall(PetscMUMPSIntCast(garray[bjj[j]] * bs + k + shift, &col[jj]));
725             }
726             val[jj++] = v2[j * bs2 + m + k * bs];
727           }
728         }
729       }
730     } else {
731       /* A-part */
732       for (j = 0; j < countA; j++) {
733         if (reuse == MAT_INITIAL_MATRIX) {
734           PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
735           PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj]));
736         }
737         val[jj++] = v1[j];
738       }
739 
740       /* B-part */
741       for (j = 0; j < countB; j++) {
742         if (reuse == MAT_INITIAL_MATRIX) {
743           PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
744           PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]));
745         }
746         val[jj++] = v2[j];
747       }
748     }
749     irow += bs;
750   }
751   if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = jj;
752   PetscFunctionReturn(PETSC_SUCCESS);
753 }
754 
755 static PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
756 {
757   const PetscInt    *ai, *aj, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj;
758   PetscInt64         rstart, cstart, nz, i, j, jj, irow, countA, countB;
759   PetscMUMPSInt     *row, *col;
760   const PetscScalar *av, *bv, *v1, *v2;
761   PetscScalar       *val;
762   Mat                Ad, Ao;
763   Mat_SeqAIJ        *aa;
764   Mat_SeqAIJ        *bb;
765 
766   PetscFunctionBegin;
767   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray));
768   PetscCall(MatSeqAIJGetArrayRead(Ad, &av));
769   PetscCall(MatSeqAIJGetArrayRead(Ao, &bv));
770 
771   aa = (Mat_SeqAIJ *)(Ad)->data;
772   bb = (Mat_SeqAIJ *)(Ao)->data;
773   ai = aa->i;
774   aj = aa->j;
775   bi = bb->i;
776   bj = bb->j;
777 
778   rstart = A->rmap->rstart;
779   cstart = A->cmap->rstart;
780 
781   if (reuse == MAT_INITIAL_MATRIX) {
782     nz = (PetscInt64)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */
783     PetscCall(PetscMalloc2(nz, &row, nz, &col));
784     PetscCall(PetscMalloc1(nz, &val));
785     mumps->nnz = nz;
786     mumps->irn = row;
787     mumps->jcn = col;
788     mumps->val = mumps->val_alloc = val;
789   } else {
790     val = mumps->val;
791   }
792 
793   jj   = 0;
794   irow = rstart;
795   for (i = 0; i < m; i++) {
796     ajj    = aj + ai[i]; /* ptr to the beginning of this row */
797     countA = ai[i + 1] - ai[i];
798     countB = bi[i + 1] - bi[i];
799     bjj    = bj + bi[i];
800     v1     = av + ai[i];
801     v2     = bv + bi[i];
802 
803     /* A-part */
804     for (j = 0; j < countA; j++) {
805       if (reuse == MAT_INITIAL_MATRIX) {
806         PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
807         PetscCall(PetscMUMPSIntCast(cstart + ajj[j] + shift, &col[jj]));
808       }
809       val[jj++] = v1[j];
810     }
811 
812     /* B-part */
813     for (j = 0; j < countB; j++) {
814       if (reuse == MAT_INITIAL_MATRIX) {
815         PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
816         PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]));
817       }
818       val[jj++] = v2[j];
819     }
820     irow++;
821   }
822   PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av));
823   PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv));
824   PetscFunctionReturn(PETSC_SUCCESS);
825 }
826 
827 static PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
828 {
829   Mat_MPIBAIJ       *mat = (Mat_MPIBAIJ *)A->data;
830   Mat_SeqBAIJ       *aa  = (Mat_SeqBAIJ *)(mat->A)->data;
831   Mat_SeqBAIJ       *bb  = (Mat_SeqBAIJ *)(mat->B)->data;
832   const PetscInt    *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j, *ajj, *bjj;
833   const PetscInt    *garray = mat->garray, mbs = mat->mbs, rstart = A->rmap->rstart, cstart = A->cmap->rstart;
834   const PetscInt     bs2 = mat->bs2;
835   PetscInt           bs;
836   PetscInt64         nz, i, j, k, n, jj, irow, countA, countB, idx;
837   PetscMUMPSInt     *row, *col;
838   const PetscScalar *av = aa->a, *bv = bb->a, *v1, *v2;
839   PetscScalar       *val;
840 
841   PetscFunctionBegin;
842   PetscCall(MatGetBlockSize(A, &bs));
843   if (reuse == MAT_INITIAL_MATRIX) {
844     nz = bs2 * (aa->nz + bb->nz);
845     PetscCall(PetscMalloc2(nz, &row, nz, &col));
846     PetscCall(PetscMalloc1(nz, &val));
847     mumps->nnz = nz;
848     mumps->irn = row;
849     mumps->jcn = col;
850     mumps->val = mumps->val_alloc = val;
851   } else {
852     val = mumps->val;
853   }
854 
855   jj   = 0;
856   irow = rstart;
857   for (i = 0; i < mbs; i++) {
858     countA = ai[i + 1] - ai[i];
859     countB = bi[i + 1] - bi[i];
860     ajj    = aj + ai[i];
861     bjj    = bj + bi[i];
862     v1     = av + bs2 * ai[i];
863     v2     = bv + bs2 * bi[i];
864 
865     idx = 0;
866     /* A-part */
867     for (k = 0; k < countA; k++) {
868       for (j = 0; j < bs; j++) {
869         for (n = 0; n < bs; n++) {
870           if (reuse == MAT_INITIAL_MATRIX) {
871             PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj]));
872             PetscCall(PetscMUMPSIntCast(cstart + bs * ajj[k] + j + shift, &col[jj]));
873           }
874           val[jj++] = v1[idx++];
875         }
876       }
877     }
878 
879     idx = 0;
880     /* B-part */
881     for (k = 0; k < countB; k++) {
882       for (j = 0; j < bs; j++) {
883         for (n = 0; n < bs; n++) {
884           if (reuse == MAT_INITIAL_MATRIX) {
885             PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj]));
886             PetscCall(PetscMUMPSIntCast(bs * garray[bjj[k]] + j + shift, &col[jj]));
887           }
888           val[jj++] = v2[idx++];
889         }
890       }
891     }
892     irow += bs;
893   }
894   PetscFunctionReturn(PETSC_SUCCESS);
895 }
896 
897 static PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
898 {
899   const PetscInt    *ai, *aj, *adiag, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj;
900   PetscInt64         rstart, nz, nza, nzb, i, j, jj, irow, countA, countB;
901   PetscMUMPSInt     *row, *col;
902   const PetscScalar *av, *bv, *v1, *v2;
903   PetscScalar       *val;
904   Mat                Ad, Ao;
905   Mat_SeqAIJ        *aa;
906   Mat_SeqAIJ        *bb;
907 #if defined(PETSC_USE_COMPLEX)
908   PetscBool hermitian, isset;
909 #endif
910 
911   PetscFunctionBegin;
912 #if defined(PETSC_USE_COMPLEX)
913   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
914   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
915 #endif
916   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray));
917   PetscCall(MatSeqAIJGetArrayRead(Ad, &av));
918   PetscCall(MatSeqAIJGetArrayRead(Ao, &bv));
919 
920   aa    = (Mat_SeqAIJ *)(Ad)->data;
921   bb    = (Mat_SeqAIJ *)(Ao)->data;
922   ai    = aa->i;
923   aj    = aa->j;
924   adiag = aa->diag;
925   bi    = bb->i;
926   bj    = bb->j;
927 
928   rstart = A->rmap->rstart;
929 
930   if (reuse == MAT_INITIAL_MATRIX) {
931     nza = 0; /* num of upper triangular entries in mat->A, including diagonals */
932     nzb = 0; /* num of upper triangular entries in mat->B */
933     for (i = 0; i < m; i++) {
934       nza += (ai[i + 1] - adiag[i]);
935       countB = bi[i + 1] - bi[i];
936       bjj    = bj + bi[i];
937       for (j = 0; j < countB; j++) {
938         if (garray[bjj[j]] > rstart) nzb++;
939       }
940     }
941 
942     nz = nza + nzb; /* total nz of upper triangular part of mat */
943     PetscCall(PetscMalloc2(nz, &row, nz, &col));
944     PetscCall(PetscMalloc1(nz, &val));
945     mumps->nnz = nz;
946     mumps->irn = row;
947     mumps->jcn = col;
948     mumps->val = mumps->val_alloc = val;
949   } else {
950     val = mumps->val;
951   }
952 
953   jj   = 0;
954   irow = rstart;
955   for (i = 0; i < m; i++) {
956     ajj    = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */
957     v1     = av + adiag[i];
958     countA = ai[i + 1] - adiag[i];
959     countB = bi[i + 1] - bi[i];
960     bjj    = bj + bi[i];
961     v2     = bv + bi[i];
962 
963     /* A-part */
964     for (j = 0; j < countA; j++) {
965       if (reuse == MAT_INITIAL_MATRIX) {
966         PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
967         PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj]));
968       }
969       val[jj++] = v1[j];
970     }
971 
972     /* B-part */
973     for (j = 0; j < countB; j++) {
974       if (garray[bjj[j]] > rstart) {
975         if (reuse == MAT_INITIAL_MATRIX) {
976           PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
977           PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]));
978         }
979         val[jj++] = v2[j];
980       }
981     }
982     irow++;
983   }
984   PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av));
985   PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv));
986   PetscFunctionReturn(PETSC_SUCCESS);
987 }
988 
989 static PetscErrorCode MatConvertToTriples_diagonal_xaij(Mat A, PETSC_UNUSED PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
990 {
991   const PetscScalar *av;
992   const PetscInt     M = A->rmap->n;
993   PetscInt64         i;
994   PetscMUMPSInt     *row, *col;
995   Vec                v;
996 
997   PetscFunctionBegin;
998   PetscCall(MatDiagonalGetDiagonal(A, &v));
999   PetscCall(VecGetArrayRead(v, &av));
1000   if (reuse == MAT_INITIAL_MATRIX) {
1001     PetscCall(PetscMalloc2(M, &row, M, &col));
1002     for (i = 0; i < M; i++) {
1003       PetscCall(PetscMUMPSIntCast(i + A->rmap->rstart, &row[i]));
1004       col[i] = row[i];
1005     }
1006     mumps->val = (PetscScalar *)av;
1007     mumps->irn = row;
1008     mumps->jcn = col;
1009     mumps->nnz = M;
1010   } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, av, M)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_diagonal_xaij(), so one needs to copy the memory */
1011   else mumps->val = (PetscScalar *)av;                                      /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */
1012   PetscCall(VecRestoreArrayRead(v, &av));
1013   PetscFunctionReturn(PETSC_SUCCESS);
1014 }
1015 
1016 static PetscErrorCode MatConvertToTriples_dense_xaij(Mat A, PETSC_UNUSED PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1017 {
1018   PetscScalar   *v;
1019   const PetscInt m = A->rmap->n, N = A->cmap->N;
1020   PetscInt       lda;
1021   PetscInt64     i, j;
1022   PetscMUMPSInt *row, *col;
1023 
1024   PetscFunctionBegin;
1025   PetscCall(MatDenseGetArray(A, &v));
1026   PetscCall(MatDenseGetLDA(A, &lda));
1027   if (reuse == MAT_INITIAL_MATRIX) {
1028     PetscCall(PetscMalloc2(m * N, &row, m * N, &col));
1029     for (i = 0; i < m; i++) {
1030       col[i] = 0;
1031       PetscCall(PetscMUMPSIntCast(i + A->rmap->rstart, &row[i]));
1032     }
1033     for (j = 1; j < N; j++) {
1034       for (i = 0; i < m; i++) PetscCall(PetscMUMPSIntCast(j, col + i + m * j));
1035       PetscCall(PetscArraycpy(row + m * j, row + m * (j - 1), m));
1036     }
1037     if (lda == m) mumps->val = v;
1038     else {
1039       PetscCall(PetscMalloc1(m * N, &mumps->val));
1040       mumps->val_alloc = mumps->val;
1041       for (j = 0; j < N; j++) PetscCall(PetscArraycpy(mumps->val + m * j, v + lda * j, m));
1042     }
1043     mumps->irn = row;
1044     mumps->jcn = col;
1045     mumps->nnz = m * N;
1046   } else {
1047     if (lda == m && !mumps->nest_vals) mumps->val = v;
1048     else {
1049       for (j = 0; j < N; j++) PetscCall(PetscArraycpy(mumps->val + m * j, v + lda * j, m));
1050     }
1051   }
1052   PetscCall(MatDenseRestoreArray(A, &v));
1053   PetscFunctionReturn(PETSC_SUCCESS);
1054 }
1055 
1056 static PetscErrorCode MatConvertToTriples_nest_xaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1057 {
1058   Mat     **mats;
1059   PetscInt  nr, nc;
1060   PetscBool chol = mumps->sym ? PETSC_TRUE : PETSC_FALSE;
1061 
1062   PetscFunctionBegin;
1063   PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats));
1064   if (reuse == MAT_INITIAL_MATRIX) {
1065     PetscMUMPSInt *irns, *jcns;
1066     PetscScalar   *vals;
1067     PetscInt64     totnnz, cumnnz, maxnnz;
1068     PetscInt      *pjcns_w;
1069     IS            *rows, *cols;
1070     PetscInt     **rows_idx, **cols_idx;
1071 
1072     cumnnz = 0;
1073     maxnnz = 0;
1074     PetscCall(PetscMalloc2(nr * nc + 1, &mumps->nest_vals_start, nr * nc, &mumps->nest_convert_to_triples));
1075     for (PetscInt r = 0; r < nr; r++) {
1076       for (PetscInt c = 0; c < nc; c++) {
1077         Mat sub = mats[r][c];
1078 
1079         mumps->nest_convert_to_triples[r * nc + c] = NULL;
1080         if (chol && c < r) continue; /* skip lower-triangular block for Cholesky */
1081         if (sub) {
1082           PetscErrorCode (*convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *) = NULL;
1083           PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isTrans, isHTrans = PETSC_FALSE, isDiag, isDense;
1084           MatInfo   info;
1085 
1086           PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans));
1087           if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub));
1088           else {
1089             PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans));
1090             if (isHTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub));
1091           }
1092           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ));
1093           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ));
1094           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ));
1095           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ));
1096           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ));
1097           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ));
1098           PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATDIAGONAL, &isDiag));
1099           PetscCall(PetscObjectTypeCompareAny((PetscObject)sub, &isDense, MATSEQDENSE, MATMPIDENSE, NULL));
1100 
1101           if (chol) {
1102             if (r == c) {
1103               if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqsbaij;
1104               else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpisbaij;
1105               else if (isSeqSBAIJ) convert_to_triples = MatConvertToTriples_seqsbaij_seqsbaij;
1106               else if (isMPISBAIJ) convert_to_triples = MatConvertToTriples_mpisbaij_mpisbaij;
1107               else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij;
1108               else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij;
1109             } else {
1110               if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij;
1111               else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij;
1112               else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij;
1113               else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij;
1114               else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij;
1115               else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij;
1116             }
1117           } else {
1118             if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij;
1119             else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij;
1120             else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij;
1121             else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij;
1122             else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij;
1123             else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij;
1124           }
1125           PetscCheck(convert_to_triples, PetscObjectComm((PetscObject)sub), PETSC_ERR_SUP, "Not for block of type %s", ((PetscObject)sub)->type_name);
1126           mumps->nest_convert_to_triples[r * nc + c] = convert_to_triples;
1127           PetscCall(MatGetInfo(sub, MAT_LOCAL, &info));
1128           cumnnz += (PetscInt64)info.nz_used; /* can be overestimated for Cholesky */
1129           maxnnz = PetscMax(maxnnz, info.nz_used);
1130         }
1131       }
1132     }
1133 
1134     /* Allocate total COO */
1135     totnnz = cumnnz;
1136     PetscCall(PetscMalloc2(totnnz, &irns, totnnz, &jcns));
1137     PetscCall(PetscMalloc1(totnnz, &vals));
1138 
1139     /* Handle rows and column maps
1140        We directly map rows and use an SF for the columns */
1141     PetscCall(PetscMalloc4(nr, &rows, nc, &cols, nr, &rows_idx, nc, &cols_idx));
1142     PetscCall(MatNestGetISs(A, rows, cols));
1143     for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetIndices(rows[r], (const PetscInt **)&rows_idx[r]));
1144     for (PetscInt c = 0; c < nc; c++) PetscCall(ISGetIndices(cols[c], (const PetscInt **)&cols_idx[c]));
1145     if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscMalloc1(maxnnz, &pjcns_w));
1146     else (void)maxnnz;
1147 
1148     cumnnz = 0;
1149     for (PetscInt r = 0; r < nr; r++) {
1150       for (PetscInt c = 0; c < nc; c++) {
1151         Mat             sub  = mats[r][c];
1152         const PetscInt *ridx = rows_idx[r];
1153         const PetscInt *cidx = cols_idx[c];
1154         PetscInt        rst;
1155         PetscSF         csf;
1156         PetscBool       isTrans, isHTrans = PETSC_FALSE, swap;
1157         PetscLayout     cmap;
1158 
1159         mumps->nest_vals_start[r * nc + c] = cumnnz;
1160         if (!mumps->nest_convert_to_triples[r * nc + c]) continue;
1161 
1162         /* Extract inner blocks if needed */
1163         PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans));
1164         if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub));
1165         else {
1166           PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans));
1167           if (isHTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub));
1168         }
1169         swap = (PetscBool)(isTrans || isHTrans);
1170 
1171         /* Get column layout to map off-process columns */
1172         PetscCall(MatGetLayouts(sub, NULL, &cmap));
1173 
1174         /* Get row start to map on-process rows */
1175         PetscCall(MatGetOwnershipRange(sub, &rst, NULL));
1176 
1177         /* Directly use the mumps datastructure and use C ordering for now */
1178         PetscCall((*mumps->nest_convert_to_triples[r * nc + c])(sub, 0, MAT_INITIAL_MATRIX, mumps));
1179 
1180         /* Swap the role of rows and columns indices for transposed blocks
1181            since we need values with global final ordering */
1182         if (swap) {
1183           cidx = rows_idx[r];
1184           ridx = cols_idx[c];
1185         }
1186 
1187         /* Communicate column indices
1188            This could have been done with a single SF but it would have complicated the code a lot.
1189            But since we do it only once, we pay the price of setting up an SF for each block */
1190         if (PetscDefined(USE_64BIT_INDICES)) {
1191           for (PetscInt k = 0; k < mumps->nnz; k++) pjcns_w[k] = mumps->jcn[k];
1192         } else pjcns_w = (PetscInt *)(mumps->jcn); /* This cast is needed only to silence warnings for 64bit integers builds */
1193         PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &csf));
1194         PetscCall(PetscSFSetGraphLayout(csf, cmap, mumps->nnz, NULL, PETSC_OWN_POINTER, pjcns_w));
1195         PetscCall(PetscSFBcastBegin(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE));
1196         PetscCall(PetscSFBcastEnd(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE));
1197         PetscCall(PetscSFDestroy(&csf));
1198 
1199         /* Import indices: use direct map for rows and mapped indices for columns */
1200         if (swap) {
1201           for (PetscInt k = 0; k < mumps->nnz; k++) {
1202             PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &jcns[cumnnz + k]));
1203             PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &irns[cumnnz + k]));
1204           }
1205         } else {
1206           for (PetscInt k = 0; k < mumps->nnz; k++) {
1207             PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &irns[cumnnz + k]));
1208             PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &jcns[cumnnz + k]));
1209           }
1210         }
1211 
1212         /* Import values to full COO */
1213         PetscCall(PetscArraycpy(vals + cumnnz, mumps->val, mumps->nnz));
1214         if (isHTrans) { /* conjugate the entries */
1215           PetscScalar *v = vals + cumnnz;
1216           for (PetscInt k = 0; k < mumps->nnz; k++) v[k] = PetscConj(v[k]);
1217         }
1218 
1219         /* Shift new starting point and sanity check */
1220         cumnnz += mumps->nnz;
1221         PetscCheck(cumnnz <= totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected number of nonzeros %" PetscInt64_FMT " != %" PetscInt64_FMT, cumnnz, totnnz);
1222 
1223         /* Free scratch memory */
1224         PetscCall(PetscFree2(mumps->irn, mumps->jcn));
1225         PetscCall(PetscFree(mumps->val_alloc));
1226         mumps->val = NULL;
1227         mumps->nnz = 0;
1228       }
1229     }
1230     if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscFree(pjcns_w));
1231     for (PetscInt r = 0; r < nr; r++) PetscCall(ISRestoreIndices(rows[r], (const PetscInt **)&rows_idx[r]));
1232     for (PetscInt c = 0; c < nc; c++) PetscCall(ISRestoreIndices(cols[c], (const PetscInt **)&cols_idx[c]));
1233     PetscCall(PetscFree4(rows, cols, rows_idx, cols_idx));
1234     if (!chol) PetscCheck(cumnnz == totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different number of nonzeros %" PetscInt64_FMT " != %" PetscInt64_FMT, cumnnz, totnnz);
1235     mumps->nest_vals_start[nr * nc] = cumnnz;
1236 
1237     /* Set pointers for final MUMPS data structure */
1238     mumps->nest_vals = vals;
1239     mumps->val_alloc = NULL; /* do not use val_alloc since it may be reallocated with the OMP callpath */
1240     mumps->val       = vals;
1241     mumps->irn       = irns;
1242     mumps->jcn       = jcns;
1243     mumps->nnz       = cumnnz;
1244   } else {
1245     PetscScalar *oval = mumps->nest_vals;
1246     for (PetscInt r = 0; r < nr; r++) {
1247       for (PetscInt c = 0; c < nc; c++) {
1248         PetscBool isTrans, isHTrans = PETSC_FALSE;
1249         Mat       sub  = mats[r][c];
1250         PetscInt  midx = r * nc + c;
1251 
1252         if (!mumps->nest_convert_to_triples[midx]) continue;
1253         PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans));
1254         if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub));
1255         else {
1256           PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans));
1257           if (isHTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub));
1258         }
1259         mumps->val = oval + mumps->nest_vals_start[midx];
1260         PetscCall((*mumps->nest_convert_to_triples[midx])(sub, shift, MAT_REUSE_MATRIX, mumps));
1261         if (isHTrans) {
1262           PetscInt nnz = mumps->nest_vals_start[midx + 1] - mumps->nest_vals_start[midx];
1263           for (PetscInt k = 0; k < nnz; k++) mumps->val[k] = PetscConj(mumps->val[k]);
1264         }
1265       }
1266     }
1267     mumps->val = oval;
1268   }
1269   PetscFunctionReturn(PETSC_SUCCESS);
1270 }
1271 
1272 static PetscErrorCode MatDestroy_MUMPS(Mat A)
1273 {
1274   Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;
1275 
1276   PetscFunctionBegin;
1277   PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc));
1278   PetscCall(VecScatterDestroy(&mumps->scat_rhs));
1279   PetscCall(VecScatterDestroy(&mumps->scat_sol));
1280   PetscCall(VecDestroy(&mumps->b_seq));
1281   PetscCall(VecDestroy(&mumps->x_seq));
1282   PetscCall(PetscFree(mumps->id.perm_in));
1283   PetscCall(PetscFree2(mumps->irn, mumps->jcn));
1284   PetscCall(PetscFree(mumps->val_alloc));
1285   PetscCall(PetscFree(mumps->info));
1286   PetscCall(PetscFree(mumps->ICNTL_pre));
1287   PetscCall(PetscFree(mumps->CNTL_pre));
1288   PetscCall(MatMumpsResetSchur_Private(mumps));
1289   if (mumps->id.job != JOB_NULL) { /* cannot call PetscMUMPS_c() if JOB_INIT has never been called for this instance */
1290     mumps->id.job = JOB_END;
1291     PetscMUMPS_c(mumps);
1292     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in termination: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
1293     if (mumps->mumps_comm != MPI_COMM_NULL) {
1294       if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) PetscCallMPI(MPI_Comm_free(&mumps->mumps_comm));
1295       else PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A), &mumps->mumps_comm));
1296     }
1297   }
1298 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1299   if (mumps->use_petsc_omp_support) {
1300     PetscCall(PetscOmpCtrlDestroy(&mumps->omp_ctrl));
1301     PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf));
1302     PetscCall(PetscFree3(mumps->rhs_nrow, mumps->rhs_recvcounts, mumps->rhs_disps));
1303   }
1304 #endif
1305   PetscCall(PetscFree(mumps->ia_alloc));
1306   PetscCall(PetscFree(mumps->ja_alloc));
1307   PetscCall(PetscFree(mumps->recvcount));
1308   PetscCall(PetscFree(mumps->reqs));
1309   PetscCall(PetscFree(mumps->irhs_loc));
1310   PetscCall(PetscFree2(mumps->nest_vals_start, mumps->nest_convert_to_triples));
1311   PetscCall(PetscFree(mumps->nest_vals));
1312   PetscCall(PetscFree(A->data));
1313 
1314   /* clear composed functions */
1315   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
1316   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorSetSchurIS_C", NULL));
1317   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorCreateSchurComplement_C", NULL));
1318   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetIcntl_C", NULL));
1319   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetIcntl_C", NULL));
1320   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetCntl_C", NULL));
1321   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetCntl_C", NULL));
1322   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfo_C", NULL));
1323   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfog_C", NULL));
1324   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfo_C", NULL));
1325   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfog_C", NULL));
1326   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetNullPivots_C", NULL));
1327   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverse_C", NULL));
1328   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverseTranspose_C", NULL));
1329   PetscFunctionReturn(PETSC_SUCCESS);
1330 }
1331 
1332 /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */
1333 static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A, PetscInt nrhs, const PetscScalar *array)
1334 {
1335   Mat_MUMPS        *mumps   = (Mat_MUMPS *)A->data;
1336   const PetscMPIInt ompsize = mumps->omp_comm_size;
1337   PetscInt          i, m, M, rstart;
1338 
1339   PetscFunctionBegin;
1340   PetscCall(MatGetSize(A, &M, NULL));
1341   PetscCall(MatGetLocalSize(A, &m, NULL));
1342   PetscCheck(M <= PETSC_MUMPS_INT_MAX, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt");
1343   if (ompsize == 1) {
1344     if (!mumps->irhs_loc) {
1345       mumps->nloc_rhs = m;
1346       PetscCall(PetscMalloc1(m, &mumps->irhs_loc));
1347       PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
1348       for (i = 0; i < m; i++) mumps->irhs_loc[i] = rstart + i + 1; /* use 1-based indices */
1349     }
1350     mumps->id.rhs_loc = (MumpsScalar *)array;
1351   } else {
1352 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1353     const PetscInt *ranges;
1354     PetscMPIInt     j, k, sendcount, *petsc_ranks, *omp_ranks;
1355     MPI_Group       petsc_group, omp_group;
1356     PetscScalar    *recvbuf = NULL;
1357 
1358     if (mumps->is_omp_master) {
1359       /* Lazily initialize the omp stuff for distributed rhs */
1360       if (!mumps->irhs_loc) {
1361         PetscCall(PetscMalloc2(ompsize, &omp_ranks, ompsize, &petsc_ranks));
1362         PetscCall(PetscMalloc3(ompsize, &mumps->rhs_nrow, ompsize, &mumps->rhs_recvcounts, ompsize, &mumps->rhs_disps));
1363         PetscCallMPI(MPI_Comm_group(mumps->petsc_comm, &petsc_group));
1364         PetscCallMPI(MPI_Comm_group(mumps->omp_comm, &omp_group));
1365         for (j = 0; j < ompsize; j++) omp_ranks[j] = j;
1366         PetscCallMPI(MPI_Group_translate_ranks(omp_group, ompsize, omp_ranks, petsc_group, petsc_ranks));
1367 
1368         /* Populate mumps->irhs_loc[], rhs_nrow[] */
1369         mumps->nloc_rhs = 0;
1370         PetscCall(MatGetOwnershipRanges(A, &ranges));
1371         for (j = 0; j < ompsize; j++) {
1372           mumps->rhs_nrow[j] = ranges[petsc_ranks[j] + 1] - ranges[petsc_ranks[j]];
1373           mumps->nloc_rhs += mumps->rhs_nrow[j];
1374         }
1375         PetscCall(PetscMalloc1(mumps->nloc_rhs, &mumps->irhs_loc));
1376         for (j = k = 0; j < ompsize; j++) {
1377           for (i = ranges[petsc_ranks[j]]; i < ranges[petsc_ranks[j] + 1]; i++, k++) mumps->irhs_loc[k] = i + 1; /* uses 1-based indices */
1378         }
1379 
1380         PetscCall(PetscFree2(omp_ranks, petsc_ranks));
1381         PetscCallMPI(MPI_Group_free(&petsc_group));
1382         PetscCallMPI(MPI_Group_free(&omp_group));
1383       }
1384 
1385       /* Realloc buffers when current nrhs is bigger than what we have met */
1386       if (nrhs > mumps->max_nrhs) {
1387         PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf));
1388         PetscCall(PetscMalloc2(mumps->nloc_rhs * nrhs, &mumps->rhs_loc, mumps->nloc_rhs * nrhs, &mumps->rhs_recvbuf));
1389         mumps->max_nrhs = nrhs;
1390       }
1391 
1392       /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */
1393       for (j = 0; j < ompsize; j++) PetscCall(PetscMPIIntCast(mumps->rhs_nrow[j] * nrhs, &mumps->rhs_recvcounts[j]));
1394       mumps->rhs_disps[0] = 0;
1395       for (j = 1; j < ompsize; j++) {
1396         mumps->rhs_disps[j] = mumps->rhs_disps[j - 1] + mumps->rhs_recvcounts[j - 1];
1397         PetscCheck(mumps->rhs_disps[j] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscMPIInt overflow!");
1398       }
1399       recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */
1400     }
1401 
1402     PetscCall(PetscMPIIntCast(m * nrhs, &sendcount));
1403     PetscCallMPI(MPI_Gatherv(array, sendcount, MPIU_SCALAR, recvbuf, mumps->rhs_recvcounts, mumps->rhs_disps, MPIU_SCALAR, 0, mumps->omp_comm));
1404 
1405     if (mumps->is_omp_master) {
1406       if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */
1407         PetscScalar *dst, *dstbase = mumps->rhs_loc;
1408         for (j = 0; j < ompsize; j++) {
1409           const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j];
1410           dst                    = dstbase;
1411           for (i = 0; i < nrhs; i++) {
1412             PetscCall(PetscArraycpy(dst, src, mumps->rhs_nrow[j]));
1413             src += mumps->rhs_nrow[j];
1414             dst += mumps->nloc_rhs;
1415           }
1416           dstbase += mumps->rhs_nrow[j];
1417         }
1418       }
1419       mumps->id.rhs_loc = (MumpsScalar *)mumps->rhs_loc;
1420     }
1421 #endif /* PETSC_HAVE_OPENMP_SUPPORT */
1422   }
1423   mumps->id.nrhs     = nrhs;
1424   mumps->id.nloc_rhs = mumps->nloc_rhs;
1425   mumps->id.lrhs_loc = mumps->nloc_rhs;
1426   mumps->id.irhs_loc = mumps->irhs_loc;
1427   PetscFunctionReturn(PETSC_SUCCESS);
1428 }
1429 
1430 static PetscErrorCode MatSolve_MUMPS(Mat A, Vec b, Vec x)
1431 {
1432   Mat_MUMPS         *mumps  = (Mat_MUMPS *)A->data;
1433   const PetscScalar *rarray = NULL;
1434   PetscScalar       *array;
1435   IS                 is_iden, is_petsc;
1436   PetscInt           i;
1437   PetscBool          second_solve = PETSC_FALSE;
1438   static PetscBool   cite1 = PETSC_FALSE, cite2 = PETSC_FALSE;
1439 
1440   PetscFunctionBegin;
1441   PetscCall(PetscCitationsRegister("@article{MUMPS01,\n  author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n  title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n  journal = {SIAM "
1442                                    "Journal on Matrix Analysis and Applications},\n  volume = {23},\n  number = {1},\n  pages = {15--41},\n  year = {2001}\n}\n",
1443                                    &cite1));
1444   PetscCall(PetscCitationsRegister("@article{MUMPS02,\n  author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n  title = {Hybrid scheduling for the parallel solution of linear systems},\n  journal = {Parallel "
1445                                    "Computing},\n  volume = {32},\n  number = {2},\n  pages = {136--156},\n  year = {2006}\n}\n",
1446                                    &cite2));
1447 
1448   if (A->factorerrortype) {
1449     PetscCall(PetscInfo(A, "MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1450     PetscCall(VecSetInf(x));
1451     PetscFunctionReturn(PETSC_SUCCESS);
1452   }
1453 
1454   mumps->id.nrhs = 1;
1455   if (mumps->petsc_size > 1) {
1456     if (mumps->ICNTL20 == 10) {
1457       mumps->id.ICNTL(20) = 10; /* dense distributed RHS */
1458       PetscCall(VecGetArrayRead(b, &rarray));
1459       PetscCall(MatMumpsSetUpDistRHSInfo(A, 1, rarray));
1460     } else {
1461       mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential rhs vector*/
1462       PetscCall(VecScatterBegin(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD));
1463       PetscCall(VecScatterEnd(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD));
1464       if (!mumps->myid) {
1465         PetscCall(VecGetArray(mumps->b_seq, &array));
1466         mumps->id.rhs = (MumpsScalar *)array;
1467       }
1468     }
1469   } else {                   /* petsc_size == 1 */
1470     mumps->id.ICNTL(20) = 0; /* dense centralized RHS */
1471     PetscCall(VecCopy(b, x));
1472     PetscCall(VecGetArray(x, &array));
1473     mumps->id.rhs = (MumpsScalar *)array;
1474   }
1475 
1476   /*
1477      handle condensation step of Schur complement (if any)
1478      We set by default ICNTL(26) == -1 when Schur indices have been provided by the user.
1479      According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase
1480      Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system.
1481      This requires an extra call to PetscMUMPS_c and the computation of the factors for S
1482   */
1483   if (mumps->id.size_schur > 0) {
1484     PetscCheck(mumps->petsc_size <= 1, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc");
1485     if (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2) {
1486       second_solve = PETSC_TRUE;
1487       PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE));
1488       mumps->id.ICNTL(26) = 1; /* condensation phase */
1489     } else if (mumps->id.ICNTL(26) == 1) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE));
1490   }
1491   /* solve phase */
1492   mumps->id.job = JOB_SOLVE;
1493   PetscMUMPS_c(mumps);
1494   PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
1495 
1496   /* handle expansion step of Schur complement (if any) */
1497   if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE));
1498   else if (mumps->id.ICNTL(26) == 1) {
1499     PetscCall(MatMumpsSolveSchur_Private(A));
1500     for (i = 0; i < mumps->id.size_schur; ++i) {
1501 #if !defined(PETSC_USE_COMPLEX)
1502       PetscScalar val = mumps->id.redrhs[i];
1503 #else
1504       PetscScalar val = mumps->id.redrhs[i].r + PETSC_i * mumps->id.redrhs[i].i;
1505 #endif
1506       array[mumps->id.listvar_schur[i] - 1] = val;
1507     }
1508   }
1509 
1510   if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */
1511     if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) {
1512       /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */
1513       PetscCall(VecScatterDestroy(&mumps->scat_sol));
1514     }
1515     if (!mumps->scat_sol) { /* create scatter scat_sol */
1516       PetscInt *isol2_loc = NULL;
1517       PetscCall(ISCreateStride(PETSC_COMM_SELF, mumps->id.lsol_loc, 0, 1, &is_iden)); /* from */
1518       PetscCall(PetscMalloc1(mumps->id.lsol_loc, &isol2_loc));
1519       for (i = 0; i < mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i] - 1;                        /* change Fortran style to C style */
1520       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, mumps->id.lsol_loc, isol2_loc, PETSC_OWN_POINTER, &is_petsc)); /* to */
1521       PetscCall(VecScatterCreate(mumps->x_seq, is_iden, x, is_petsc, &mumps->scat_sol));
1522       PetscCall(ISDestroy(&is_iden));
1523       PetscCall(ISDestroy(&is_petsc));
1524       mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */
1525     }
1526 
1527     PetscCall(VecScatterBegin(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD));
1528     PetscCall(VecScatterEnd(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD));
1529   }
1530 
1531   if (mumps->petsc_size > 1) {
1532     if (mumps->ICNTL20 == 10) {
1533       PetscCall(VecRestoreArrayRead(b, &rarray));
1534     } else if (!mumps->myid) {
1535       PetscCall(VecRestoreArray(mumps->b_seq, &array));
1536     }
1537   } else PetscCall(VecRestoreArray(x, &array));
1538 
1539   PetscCall(PetscLogFlops(2.0 * PetscMax(0, (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n)));
1540   PetscFunctionReturn(PETSC_SUCCESS);
1541 }
1542 
1543 static PetscErrorCode MatSolveTranspose_MUMPS(Mat A, Vec b, Vec x)
1544 {
1545   Mat_MUMPS          *mumps = (Mat_MUMPS *)A->data;
1546   const PetscMUMPSInt value = mumps->id.ICNTL(9);
1547 
1548   PetscFunctionBegin;
1549   mumps->id.ICNTL(9) = 0;
1550   PetscCall(MatSolve_MUMPS(A, b, x));
1551   mumps->id.ICNTL(9) = value;
1552   PetscFunctionReturn(PETSC_SUCCESS);
1553 }
1554 
1555 static PetscErrorCode MatMatSolve_MUMPS(Mat A, Mat B, Mat X)
1556 {
1557   Mat                Bt = NULL;
1558   PetscBool          denseX, denseB, flg, flgT;
1559   Mat_MUMPS         *mumps = (Mat_MUMPS *)A->data;
1560   PetscInt           i, nrhs, M;
1561   PetscScalar       *array;
1562   const PetscScalar *rbray;
1563   PetscInt           lsol_loc, nlsol_loc, *idxx, iidx = 0;
1564   PetscMUMPSInt     *isol_loc, *isol_loc_save;
1565   PetscScalar       *bray, *sol_loc, *sol_loc_save;
1566   IS                 is_to, is_from;
1567   PetscInt           k, proc, j, m, myrstart;
1568   const PetscInt    *rstart;
1569   Vec                v_mpi, msol_loc;
1570   VecScatter         scat_sol;
1571   Vec                b_seq;
1572   VecScatter         scat_rhs;
1573   PetscScalar       *aa;
1574   PetscInt           spnr, *ia, *ja;
1575   Mat_MPIAIJ        *b = NULL;
1576 
1577   PetscFunctionBegin;
1578   PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &denseX, MATSEQDENSE, MATMPIDENSE, NULL));
1579   PetscCheck(denseX, PetscObjectComm((PetscObject)X), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix");
1580 
1581   PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &denseB, MATSEQDENSE, MATMPIDENSE, NULL));
1582   if (denseB) {
1583     PetscCheck(B->rmap->n == X->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Matrix B and X must have same row distribution");
1584     mumps->id.ICNTL(20) = 0; /* dense RHS */
1585   } else {                   /* sparse B */
1586     PetscCheck(X != B, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_IDN, "X and B must be different matrices");
1587     PetscCall(PetscObjectTypeCompare((PetscObject)B, MATTRANSPOSEVIRTUAL, &flgT));
1588     if (flgT) { /* input B is transpose of actual RHS matrix,
1589                  because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */
1590       PetscCall(MatTransposeGetMat(B, &Bt));
1591     } else SETERRQ(PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_WRONG, "Matrix B must be MATTRANSPOSEVIRTUAL matrix");
1592     mumps->id.ICNTL(20) = 1; /* sparse RHS */
1593   }
1594 
1595   PetscCall(MatGetSize(B, &M, &nrhs));
1596   mumps->id.nrhs = nrhs;
1597   mumps->id.lrhs = M;
1598   mumps->id.rhs  = NULL;
1599 
1600   if (mumps->petsc_size == 1) {
1601     PetscScalar *aa;
1602     PetscInt     spnr, *ia, *ja;
1603     PetscBool    second_solve = PETSC_FALSE;
1604 
1605     PetscCall(MatDenseGetArray(X, &array));
1606     mumps->id.rhs = (MumpsScalar *)array;
1607 
1608     if (denseB) {
1609       /* copy B to X */
1610       PetscCall(MatDenseGetArrayRead(B, &rbray));
1611       PetscCall(PetscArraycpy(array, rbray, M * nrhs));
1612       PetscCall(MatDenseRestoreArrayRead(B, &rbray));
1613     } else { /* sparse B */
1614       PetscCall(MatSeqAIJGetArray(Bt, &aa));
1615       PetscCall(MatGetRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
1616       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
1617       PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs));
1618       mumps->id.rhs_sparse = (MumpsScalar *)aa;
1619     }
1620     /* handle condensation step of Schur complement (if any) */
1621     if (mumps->id.size_schur > 0) {
1622       if (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2) {
1623         second_solve = PETSC_TRUE;
1624         PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE));
1625         mumps->id.ICNTL(26) = 1; /* condensation phase */
1626       } else if (mumps->id.ICNTL(26) == 1) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE));
1627     }
1628     /* solve phase */
1629     mumps->id.job = JOB_SOLVE;
1630     PetscMUMPS_c(mumps);
1631     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
1632 
1633     /* handle expansion step of Schur complement (if any) */
1634     if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE));
1635     else if (mumps->id.ICNTL(26) == 1) {
1636       PetscCall(MatMumpsSolveSchur_Private(A));
1637       for (j = 0; j < nrhs; ++j)
1638         for (i = 0; i < mumps->id.size_schur; ++i) {
1639 #if !defined(PETSC_USE_COMPLEX)
1640           PetscScalar val = mumps->id.redrhs[i + j * mumps->id.lredrhs];
1641 #else
1642           PetscScalar val = mumps->id.redrhs[i + j * mumps->id.lredrhs].r + PETSC_i * mumps->id.redrhs[i + j * mumps->id.lredrhs].i;
1643 #endif
1644           array[mumps->id.listvar_schur[i] - 1 + j * M] = val;
1645         }
1646     }
1647     if (!denseB) { /* sparse B */
1648       PetscCall(MatSeqAIJRestoreArray(Bt, &aa));
1649       PetscCall(MatRestoreRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
1650       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure");
1651     }
1652     PetscCall(MatDenseRestoreArray(X, &array));
1653     PetscFunctionReturn(PETSC_SUCCESS);
1654   }
1655 
1656   /* parallel case: MUMPS requires rhs B to be centralized on the host! */
1657   PetscCheck(!mumps->id.ICNTL(19), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc");
1658 
1659   /* create msol_loc to hold mumps local solution */
1660   isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */
1661   sol_loc_save  = (PetscScalar *)mumps->id.sol_loc;
1662 
1663   lsol_loc  = mumps->id.lsol_loc;
1664   nlsol_loc = nrhs * lsol_loc; /* length of sol_loc */
1665   PetscCall(PetscMalloc2(nlsol_loc, &sol_loc, lsol_loc, &isol_loc));
1666   mumps->id.sol_loc  = (MumpsScalar *)sol_loc;
1667   mumps->id.isol_loc = isol_loc;
1668 
1669   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nlsol_loc, (PetscScalar *)sol_loc, &msol_loc));
1670 
1671   if (denseB) {
1672     if (mumps->ICNTL20 == 10) {
1673       mumps->id.ICNTL(20) = 10; /* dense distributed RHS */
1674       PetscCall(MatDenseGetArrayRead(B, &rbray));
1675       PetscCall(MatMumpsSetUpDistRHSInfo(A, nrhs, rbray));
1676       PetscCall(MatDenseRestoreArrayRead(B, &rbray));
1677       PetscCall(MatGetLocalSize(B, &m, NULL));
1678       PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhs * M, NULL, &v_mpi));
1679     } else {
1680       mumps->id.ICNTL(20) = 0; /* dense centralized RHS */
1681       /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in
1682         very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank
1683         0, re-arrange B into desired order, which is a local operation.
1684       */
1685 
1686       /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */
1687       /* wrap dense rhs matrix B into a vector v_mpi */
1688       PetscCall(MatGetLocalSize(B, &m, NULL));
1689       PetscCall(MatDenseGetArray(B, &bray));
1690       PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhs * M, (const PetscScalar *)bray, &v_mpi));
1691       PetscCall(MatDenseRestoreArray(B, &bray));
1692 
1693       /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */
1694       if (!mumps->myid) {
1695         PetscInt *idx;
1696         /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */
1697         PetscCall(PetscMalloc1(nrhs * M, &idx));
1698         PetscCall(MatGetOwnershipRanges(B, &rstart));
1699         k = 0;
1700         for (proc = 0; proc < mumps->petsc_size; proc++) {
1701           for (j = 0; j < nrhs; j++) {
1702             for (i = rstart[proc]; i < rstart[proc + 1]; i++) idx[k++] = j * M + i;
1703           }
1704         }
1705 
1706         PetscCall(VecCreateSeq(PETSC_COMM_SELF, nrhs * M, &b_seq));
1707         PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nrhs * M, idx, PETSC_OWN_POINTER, &is_to));
1708         PetscCall(ISCreateStride(PETSC_COMM_SELF, nrhs * M, 0, 1, &is_from));
1709       } else {
1710         PetscCall(VecCreateSeq(PETSC_COMM_SELF, 0, &b_seq));
1711         PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_to));
1712         PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_from));
1713       }
1714       PetscCall(VecScatterCreate(v_mpi, is_from, b_seq, is_to, &scat_rhs));
1715       PetscCall(VecScatterBegin(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD));
1716       PetscCall(ISDestroy(&is_to));
1717       PetscCall(ISDestroy(&is_from));
1718       PetscCall(VecScatterEnd(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD));
1719 
1720       if (!mumps->myid) { /* define rhs on the host */
1721         PetscCall(VecGetArray(b_seq, &bray));
1722         mumps->id.rhs = (MumpsScalar *)bray;
1723         PetscCall(VecRestoreArray(b_seq, &bray));
1724       }
1725     }
1726   } else { /* sparse B */
1727     b = (Mat_MPIAIJ *)Bt->data;
1728 
1729     /* wrap dense X into a vector v_mpi */
1730     PetscCall(MatGetLocalSize(X, &m, NULL));
1731     PetscCall(MatDenseGetArray(X, &bray));
1732     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)X), 1, nrhs * m, nrhs * M, (const PetscScalar *)bray, &v_mpi));
1733     PetscCall(MatDenseRestoreArray(X, &bray));
1734 
1735     if (!mumps->myid) {
1736       PetscCall(MatSeqAIJGetArray(b->A, &aa));
1737       PetscCall(MatGetRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
1738       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
1739       PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs));
1740       mumps->id.rhs_sparse = (MumpsScalar *)aa;
1741     } else {
1742       mumps->id.irhs_ptr    = NULL;
1743       mumps->id.irhs_sparse = NULL;
1744       mumps->id.nz_rhs      = 0;
1745       mumps->id.rhs_sparse  = NULL;
1746     }
1747   }
1748 
1749   /* solve phase */
1750   mumps->id.job = JOB_SOLVE;
1751   PetscMUMPS_c(mumps);
1752   PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
1753 
1754   /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */
1755   PetscCall(MatDenseGetArray(X, &array));
1756   PetscCall(VecPlaceArray(v_mpi, array));
1757 
1758   /* create scatter scat_sol */
1759   PetscCall(MatGetOwnershipRanges(X, &rstart));
1760   /* iidx: index for scatter mumps solution to petsc X */
1761 
1762   PetscCall(ISCreateStride(PETSC_COMM_SELF, nlsol_loc, 0, 1, &is_from));
1763   PetscCall(PetscMalloc1(nlsol_loc, &idxx));
1764   for (i = 0; i < lsol_loc; i++) {
1765     isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */
1766 
1767     for (proc = 0; proc < mumps->petsc_size; proc++) {
1768       if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc + 1]) {
1769         myrstart = rstart[proc];
1770         k        = isol_loc[i] - myrstart;          /* local index on 1st column of petsc vector X */
1771         iidx     = k + myrstart * nrhs;             /* maps mumps isol_loc[i] to petsc index in X */
1772         m        = rstart[proc + 1] - rstart[proc]; /* rows of X for this proc */
1773         break;
1774       }
1775     }
1776 
1777     for (j = 0; j < nrhs; j++) idxx[i + j * lsol_loc] = iidx + j * m;
1778   }
1779   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nlsol_loc, idxx, PETSC_COPY_VALUES, &is_to));
1780   PetscCall(VecScatterCreate(msol_loc, is_from, v_mpi, is_to, &scat_sol));
1781   PetscCall(VecScatterBegin(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD));
1782   PetscCall(ISDestroy(&is_from));
1783   PetscCall(ISDestroy(&is_to));
1784   PetscCall(VecScatterEnd(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD));
1785   PetscCall(MatDenseRestoreArray(X, &array));
1786 
1787   /* free spaces */
1788   mumps->id.sol_loc  = (MumpsScalar *)sol_loc_save;
1789   mumps->id.isol_loc = isol_loc_save;
1790 
1791   PetscCall(PetscFree2(sol_loc, isol_loc));
1792   PetscCall(PetscFree(idxx));
1793   PetscCall(VecDestroy(&msol_loc));
1794   PetscCall(VecDestroy(&v_mpi));
1795   if (!denseB) {
1796     if (!mumps->myid) {
1797       b = (Mat_MPIAIJ *)Bt->data;
1798       PetscCall(MatSeqAIJRestoreArray(b->A, &aa));
1799       PetscCall(MatRestoreRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
1800       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure");
1801     }
1802   } else {
1803     if (mumps->ICNTL20 == 0) {
1804       PetscCall(VecDestroy(&b_seq));
1805       PetscCall(VecScatterDestroy(&scat_rhs));
1806     }
1807   }
1808   PetscCall(VecScatterDestroy(&scat_sol));
1809   PetscCall(PetscLogFlops(nrhs * PetscMax(0, (2.0 * (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n))));
1810   PetscFunctionReturn(PETSC_SUCCESS);
1811 }
1812 
1813 static PetscErrorCode MatMatSolveTranspose_MUMPS(Mat A, Mat B, Mat X)
1814 {
1815   Mat_MUMPS          *mumps = (Mat_MUMPS *)A->data;
1816   const PetscMUMPSInt value = mumps->id.ICNTL(9);
1817 
1818   PetscFunctionBegin;
1819   mumps->id.ICNTL(9) = 0;
1820   PetscCall(MatMatSolve_MUMPS(A, B, X));
1821   mumps->id.ICNTL(9) = value;
1822   PetscFunctionReturn(PETSC_SUCCESS);
1823 }
1824 
1825 static PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A, Mat Bt, Mat X)
1826 {
1827   PetscBool flg;
1828   Mat       B;
1829 
1830   PetscFunctionBegin;
1831   PetscCall(PetscObjectTypeCompareAny((PetscObject)Bt, &flg, MATSEQAIJ, MATMPIAIJ, NULL));
1832   PetscCheck(flg, PetscObjectComm((PetscObject)Bt), PETSC_ERR_ARG_WRONG, "Matrix Bt must be MATAIJ matrix");
1833 
1834   /* Create B=Bt^T that uses Bt's data structure */
1835   PetscCall(MatCreateTranspose(Bt, &B));
1836 
1837   PetscCall(MatMatSolve_MUMPS(A, B, X));
1838   PetscCall(MatDestroy(&B));
1839   PetscFunctionReturn(PETSC_SUCCESS);
1840 }
1841 
1842 #if !defined(PETSC_USE_COMPLEX)
1843 /*
1844   input:
1845    F:        numeric factor
1846   output:
1847    nneg:     total number of negative pivots
1848    nzero:    total number of zero pivots
1849    npos:     (global dimension of F) - nneg - nzero
1850 */
1851 static PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos)
1852 {
1853   Mat_MUMPS  *mumps = (Mat_MUMPS *)F->data;
1854   PetscMPIInt size;
1855 
1856   PetscFunctionBegin;
1857   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)F), &size));
1858   /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */
1859   PetscCheck(size <= 1 || mumps->id.ICNTL(13) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia", mumps->id.INFOG(13));
1860 
1861   if (nneg) *nneg = mumps->id.INFOG(12);
1862   if (nzero || npos) {
1863     PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection");
1864     if (nzero) *nzero = mumps->id.INFOG(28);
1865     if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28));
1866   }
1867   PetscFunctionReturn(PETSC_SUCCESS);
1868 }
1869 #endif
1870 
1871 static PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse, Mat_MUMPS *mumps)
1872 {
1873   PetscInt       i, nreqs;
1874   PetscMUMPSInt *irn, *jcn;
1875   PetscMPIInt    count;
1876   PetscInt64     totnnz, remain;
1877   const PetscInt osize = mumps->omp_comm_size;
1878   PetscScalar   *val;
1879 
1880   PetscFunctionBegin;
1881   if (osize > 1) {
1882     if (reuse == MAT_INITIAL_MATRIX) {
1883       /* master first gathers counts of nonzeros to receive */
1884       if (mumps->is_omp_master) PetscCall(PetscMalloc1(osize, &mumps->recvcount));
1885       PetscCallMPI(MPI_Gather(&mumps->nnz, 1, MPIU_INT64, mumps->recvcount, 1, MPIU_INT64, 0 /*master*/, mumps->omp_comm));
1886 
1887       /* Then each computes number of send/recvs */
1888       if (mumps->is_omp_master) {
1889         /* Start from 1 since self communication is not done in MPI */
1890         nreqs = 0;
1891         for (i = 1; i < osize; i++) nreqs += (mumps->recvcount[i] + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX;
1892       } else {
1893         nreqs = (mumps->nnz + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX;
1894       }
1895       PetscCall(PetscMalloc1(nreqs * 3, &mumps->reqs)); /* Triple the requests since we send irn, jcn and val separately */
1896 
1897       /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others.
1898          MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz
1899          might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size
1900          is very small, the current approach should have no extra overhead compared to MPI_Gatherv.
1901        */
1902       nreqs = 0; /* counter for actual send/recvs */
1903       if (mumps->is_omp_master) {
1904         for (i = 0, totnnz = 0; i < osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */
1905         PetscCall(PetscMalloc2(totnnz, &irn, totnnz, &jcn));
1906         PetscCall(PetscMalloc1(totnnz, &val));
1907 
1908         /* Self communication */
1909         PetscCall(PetscArraycpy(irn, mumps->irn, mumps->nnz));
1910         PetscCall(PetscArraycpy(jcn, mumps->jcn, mumps->nnz));
1911         PetscCall(PetscArraycpy(val, mumps->val, mumps->nnz));
1912 
1913         /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */
1914         PetscCall(PetscFree2(mumps->irn, mumps->jcn));
1915         PetscCall(PetscFree(mumps->val_alloc));
1916         mumps->nnz = totnnz;
1917         mumps->irn = irn;
1918         mumps->jcn = jcn;
1919         mumps->val = mumps->val_alloc = val;
1920 
1921         irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */
1922         jcn += mumps->recvcount[0];
1923         val += mumps->recvcount[0];
1924 
1925         /* Remote communication */
1926         for (i = 1; i < osize; i++) {
1927           count  = PetscMin(mumps->recvcount[i], (PetscMPIInt)PETSC_MPI_INT_MAX);
1928           remain = mumps->recvcount[i] - count;
1929           while (count > 0) {
1930             PetscCallMPI(MPI_Irecv(irn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1931             PetscCallMPI(MPI_Irecv(jcn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1932             PetscCallMPI(MPI_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1933             irn += count;
1934             jcn += count;
1935             val += count;
1936             count = PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX);
1937             remain -= count;
1938           }
1939         }
1940       } else {
1941         irn    = mumps->irn;
1942         jcn    = mumps->jcn;
1943         val    = mumps->val;
1944         count  = PetscMin(mumps->nnz, (PetscMPIInt)PETSC_MPI_INT_MAX);
1945         remain = mumps->nnz - count;
1946         while (count > 0) {
1947           PetscCallMPI(MPI_Isend(irn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1948           PetscCallMPI(MPI_Isend(jcn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1949           PetscCallMPI(MPI_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1950           irn += count;
1951           jcn += count;
1952           val += count;
1953           count = PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX);
1954           remain -= count;
1955         }
1956       }
1957     } else {
1958       nreqs = 0;
1959       if (mumps->is_omp_master) {
1960         val = mumps->val + mumps->recvcount[0];
1961         for (i = 1; i < osize; i++) { /* Remote communication only since self data is already in place */
1962           count  = PetscMin(mumps->recvcount[i], (PetscMPIInt)PETSC_MPI_INT_MAX);
1963           remain = mumps->recvcount[i] - count;
1964           while (count > 0) {
1965             PetscCallMPI(MPI_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1966             val += count;
1967             count = PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX);
1968             remain -= count;
1969           }
1970         }
1971       } else {
1972         val    = mumps->val;
1973         count  = PetscMin(mumps->nnz, (PetscMPIInt)PETSC_MPI_INT_MAX);
1974         remain = mumps->nnz - count;
1975         while (count > 0) {
1976           PetscCallMPI(MPI_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1977           val += count;
1978           count = PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX);
1979           remain -= count;
1980         }
1981       }
1982     }
1983     PetscCallMPI(MPI_Waitall(nreqs, mumps->reqs, MPI_STATUSES_IGNORE));
1984     mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */
1985   }
1986   PetscFunctionReturn(PETSC_SUCCESS);
1987 }
1988 
1989 static PetscErrorCode MatFactorNumeric_MUMPS(Mat F, Mat A, PETSC_UNUSED const MatFactorInfo *info)
1990 {
1991   Mat_MUMPS *mumps = (Mat_MUMPS *)(F)->data;
1992   PetscBool  isMPIAIJ;
1993 
1994   PetscFunctionBegin;
1995   if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) {
1996     if (mumps->id.INFOG(1) == -6) PetscCall(PetscInfo(A, "MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1997     PetscCall(PetscInfo(A, "MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1998     PetscFunctionReturn(PETSC_SUCCESS);
1999   }
2000 
2001   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps));
2002   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX, mumps));
2003 
2004   /* numerical factorization phase */
2005   mumps->id.job = JOB_FACTNUMERIC;
2006   if (!mumps->id.ICNTL(18)) { /* A is centralized */
2007     if (!mumps->myid) mumps->id.a = (MumpsScalar *)mumps->val;
2008   } else {
2009     mumps->id.a_loc = (MumpsScalar *)mumps->val;
2010   }
2011   PetscMUMPS_c(mumps);
2012   if (mumps->id.INFOG(1) < 0) {
2013     PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2));
2014     if (mumps->id.INFOG(1) == -10) {
2015       PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2016       F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
2017     } else if (mumps->id.INFOG(1) == -13) {
2018       PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, cannot allocate required memory %d megabytes\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2019       F->factorerrortype = MAT_FACTOR_OUTMEMORY;
2020     } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) {
2021       PetscCall(PetscInfo(F, "MUMPS error in numerical factorizatione: INFOG(1)=%d, INFO(2)=%d, problem with work array\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2022       F->factorerrortype = MAT_FACTOR_OUTMEMORY;
2023     } else {
2024       PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2025       F->factorerrortype = MAT_FACTOR_OTHER;
2026     }
2027   }
2028   PetscCheck(mumps->myid || mumps->id.ICNTL(16) <= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in numerical factorization: ICNTL(16)=%d " MUMPS_MANUALS, mumps->id.INFOG(16));
2029 
2030   F->assembled = PETSC_TRUE;
2031 
2032   if (F->schur) { /* reset Schur status to unfactored */
2033 #if defined(PETSC_HAVE_CUDA)
2034     F->schur->offloadmask = PETSC_OFFLOAD_CPU;
2035 #endif
2036     if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
2037       mumps->id.ICNTL(19) = 2;
2038       PetscCall(MatTranspose(F->schur, MAT_INPLACE_MATRIX, &F->schur));
2039     }
2040     PetscCall(MatFactorRestoreSchurComplement(F, NULL, MAT_FACTOR_SCHUR_UNFACTORED));
2041   }
2042 
2043   /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */
2044   if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3;
2045 
2046   if (!mumps->is_omp_master) mumps->id.INFO(23) = 0;
2047   if (mumps->petsc_size > 1) {
2048     PetscInt     lsol_loc;
2049     PetscScalar *sol_loc;
2050 
2051     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &isMPIAIJ));
2052 
2053     /* distributed solution; Create x_seq=sol_loc for repeated use */
2054     if (mumps->x_seq) {
2055       PetscCall(VecScatterDestroy(&mumps->scat_sol));
2056       PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc));
2057       PetscCall(VecDestroy(&mumps->x_seq));
2058     }
2059     lsol_loc = mumps->id.INFO(23); /* length of sol_loc */
2060     PetscCall(PetscMalloc2(lsol_loc, &sol_loc, lsol_loc, &mumps->id.isol_loc));
2061     mumps->id.lsol_loc = lsol_loc;
2062     mumps->id.sol_loc  = (MumpsScalar *)sol_loc;
2063     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, lsol_loc, sol_loc, &mumps->x_seq));
2064   }
2065   PetscCall(PetscLogFlops((double)mumps->id.RINFO(2)));
2066   PetscFunctionReturn(PETSC_SUCCESS);
2067 }
2068 
2069 /* Sets MUMPS options from the options database */
2070 static PetscErrorCode MatSetFromOptions_MUMPS(Mat F, Mat A)
2071 {
2072   Mat_MUMPS    *mumps = (Mat_MUMPS *)F->data;
2073   PetscMUMPSInt icntl = 0, size, *listvar_schur;
2074   PetscInt      info[80], i, ninfo = 80, rbs, cbs;
2075   PetscBool     flg = PETSC_FALSE, schur = (PetscBool)(mumps->id.ICNTL(26) == -1);
2076   MumpsScalar  *arr;
2077 
2078   PetscFunctionBegin;
2079   PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "MUMPS Options", "Mat");
2080   if (mumps->id.job == JOB_NULL) { /* MatSetFromOptions_MUMPS() has never been called before */
2081     PetscInt nthreads   = 0;
2082     PetscInt nCNTL_pre  = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0;
2083     PetscInt nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0;
2084 
2085     mumps->petsc_comm = PetscObjectComm((PetscObject)A);
2086     PetscCallMPI(MPI_Comm_size(mumps->petsc_comm, &mumps->petsc_size));
2087     PetscCallMPI(MPI_Comm_rank(mumps->petsc_comm, &mumps->myid)); /* "if (!myid)" still works even if mumps_comm is different */
2088 
2089     PetscCall(PetscOptionsName("-mat_mumps_use_omp_threads", "Convert MPI processes into OpenMP threads", "None", &mumps->use_petsc_omp_support));
2090     if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */
2091     /* do not use PetscOptionsInt() so that the option -mat_mumps_use_omp_threads is not displayed twice in the help */
2092     PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)F)->prefix, "-mat_mumps_use_omp_threads", &nthreads, NULL));
2093     if (mumps->use_petsc_omp_support) {
2094       PetscCheck(PetscDefined(HAVE_OPENMP_SUPPORT), PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "The system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual",
2095                  ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : "");
2096       PetscCheck(!schur, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use -%smat_mumps_use_omp_threads with the Schur complement feature", ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : "");
2097 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
2098       PetscCall(PetscOmpCtrlCreate(mumps->petsc_comm, nthreads, &mumps->omp_ctrl));
2099       PetscCall(PetscOmpCtrlGetOmpComms(mumps->omp_ctrl, &mumps->omp_comm, &mumps->mumps_comm, &mumps->is_omp_master));
2100 #endif
2101     } else {
2102       mumps->omp_comm      = PETSC_COMM_SELF;
2103       mumps->mumps_comm    = mumps->petsc_comm;
2104       mumps->is_omp_master = PETSC_TRUE;
2105     }
2106     PetscCallMPI(MPI_Comm_size(mumps->omp_comm, &mumps->omp_comm_size));
2107     mumps->reqs = NULL;
2108     mumps->tag  = 0;
2109 
2110     if (mumps->mumps_comm != MPI_COMM_NULL) {
2111       if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) {
2112         /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */
2113         MPI_Comm comm;
2114         PetscCallMPI(MPI_Comm_dup(mumps->mumps_comm, &comm));
2115         mumps->mumps_comm = comm;
2116       } else PetscCall(PetscCommGetComm(mumps->petsc_comm, &mumps->mumps_comm));
2117     }
2118 
2119     mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm);
2120     mumps->id.job          = JOB_INIT;
2121     mumps->id.par          = 1; /* host participates factorizaton and solve */
2122     mumps->id.sym          = mumps->sym;
2123 
2124     size          = mumps->id.size_schur;
2125     arr           = mumps->id.schur;
2126     listvar_schur = mumps->id.listvar_schur;
2127     PetscMUMPS_c(mumps);
2128     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
2129 
2130     /* set PETSc-MUMPS default options - override MUMPS default */
2131     mumps->id.ICNTL(3) = 0;
2132     mumps->id.ICNTL(4) = 0;
2133     if (mumps->petsc_size == 1) {
2134       mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */
2135       mumps->id.ICNTL(7)  = 7; /* automatic choice of ordering done by the package */
2136     } else {
2137       mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */
2138       mumps->id.ICNTL(21) = 1; /* distributed solution */
2139     }
2140 
2141     /* restore cached ICNTL and CNTL values */
2142     for (icntl = 0; icntl < nICNTL_pre; ++icntl) mumps->id.ICNTL(mumps->ICNTL_pre[1 + 2 * icntl]) = mumps->ICNTL_pre[2 + 2 * icntl];
2143     for (icntl = 0; icntl < nCNTL_pre; ++icntl) mumps->id.CNTL((PetscInt)mumps->CNTL_pre[1 + 2 * icntl]) = mumps->CNTL_pre[2 + 2 * icntl];
2144     PetscCall(PetscFree(mumps->ICNTL_pre));
2145     PetscCall(PetscFree(mumps->CNTL_pre));
2146 
2147     if (schur) {
2148       mumps->id.size_schur    = size;
2149       mumps->id.schur_lld     = size;
2150       mumps->id.schur         = arr;
2151       mumps->id.listvar_schur = listvar_schur;
2152       if (mumps->petsc_size > 1) {
2153         PetscBool gs; /* gs is false if any rank other than root has non-empty IS */
2154 
2155         mumps->id.ICNTL(19) = 1;                                                                            /* MUMPS returns Schur centralized on the host */
2156         gs                  = mumps->myid ? (mumps->id.size_schur ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */
2157         PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &gs, 1, MPIU_BOOL, MPI_LAND, mumps->petsc_comm));
2158         PetscCheck(gs, PETSC_COMM_SELF, PETSC_ERR_SUP, "MUMPS distributed parallel Schur complements not yet supported from PETSc");
2159       } else {
2160         if (F->factortype == MAT_FACTOR_LU) {
2161           mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */
2162         } else {
2163           mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */
2164         }
2165       }
2166       mumps->id.ICNTL(26) = -1;
2167     }
2168 
2169     /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code.
2170        For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS.
2171      */
2172     PetscCallMPI(MPI_Bcast(mumps->id.icntl, 40, MPI_INT, 0, mumps->omp_comm));
2173     PetscCallMPI(MPI_Bcast(mumps->id.cntl, 15, MPIU_REAL, 0, mumps->omp_comm));
2174 
2175     mumps->scat_rhs = NULL;
2176     mumps->scat_sol = NULL;
2177   }
2178   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_1", "ICNTL(1): output stream for error messages", "None", mumps->id.ICNTL(1), &icntl, &flg));
2179   if (flg) mumps->id.ICNTL(1) = icntl;
2180   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_2", "ICNTL(2): output stream for diagnostic printing, statistics, and warning", "None", mumps->id.ICNTL(2), &icntl, &flg));
2181   if (flg) mumps->id.ICNTL(2) = icntl;
2182   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_3", "ICNTL(3): output stream for global information, collected on the host", "None", mumps->id.ICNTL(3), &icntl, &flg));
2183   if (flg) mumps->id.ICNTL(3) = icntl;
2184 
2185   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_4", "ICNTL(4): level of printing (0 to 4)", "None", mumps->id.ICNTL(4), &icntl, &flg));
2186   if (flg) mumps->id.ICNTL(4) = icntl;
2187   if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */
2188 
2189   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_6", "ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)", "None", mumps->id.ICNTL(6), &icntl, &flg));
2190   if (flg) mumps->id.ICNTL(6) = icntl;
2191 
2192   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_7", "ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)", "None", mumps->id.ICNTL(7), &icntl, &flg));
2193   if (flg) {
2194     PetscCheck(icntl != 1 && icntl >= 0 && icntl <= 7, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto");
2195     mumps->id.ICNTL(7) = icntl;
2196   }
2197 
2198   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_8", "ICNTL(8): scaling strategy (-2 to 8 or 77)", "None", mumps->id.ICNTL(8), &mumps->id.ICNTL(8), NULL));
2199   /* PetscCall(PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL)); handled by MatSolveTranspose_MUMPS() */
2200   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_10", "ICNTL(10): max num of refinements", "None", mumps->id.ICNTL(10), &mumps->id.ICNTL(10), NULL));
2201   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_11", "ICNTL(11): statistics related to an error analysis (via -ksp_view)", "None", mumps->id.ICNTL(11), &mumps->id.ICNTL(11), NULL));
2202   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_12", "ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)", "None", mumps->id.ICNTL(12), &mumps->id.ICNTL(12), NULL));
2203   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_13", "ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting", "None", mumps->id.ICNTL(13), &mumps->id.ICNTL(13), NULL));
2204   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_14", "ICNTL(14): percentage increase in the estimated working space", "None", mumps->id.ICNTL(14), &mumps->id.ICNTL(14), NULL));
2205   PetscCall(MatGetBlockSizes(A, &rbs, &cbs));
2206   if (rbs == cbs && rbs > 1) mumps->id.ICNTL(15) = -rbs;
2207   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_15", "ICNTL(15): compression of the input matrix resulting from a block format", "None", mumps->id.ICNTL(15), &mumps->id.ICNTL(15), &flg));
2208   if (flg) {
2209     PetscCheck(mumps->id.ICNTL(15) <= 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Positive -mat_mumps_icntl_15 not handled");
2210     PetscCheck((-mumps->id.ICNTL(15) % cbs == 0) && (-mumps->id.ICNTL(15) % rbs == 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "The opposite of -mat_mumps_icntl_15 must be a multiple of the column and row blocksizes");
2211   }
2212   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_19", "ICNTL(19): computes the Schur complement", "None", mumps->id.ICNTL(19), &mumps->id.ICNTL(19), NULL));
2213   if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */
2214     PetscCall(MatDestroy(&F->schur));
2215     PetscCall(MatMumpsResetSchur_Private(mumps));
2216   }
2217 
2218   /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps",
2219      and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran
2220      and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...'
2221      This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug,
2222      see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590.
2223      In short, we could not use distributed RHS until with MPICH v4.0b1 or we enabled a workaround in mumps-5.6.2+
2224    */
2225 #if PETSC_PKG_MUMPS_VERSION_GE(5, 6, 2) && defined(PETSC_HAVE_MUMPS_AVOID_MPI_IN_PLACE)
2226   mumps->ICNTL20 = 10;
2227 #elif PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) || (defined(PETSC_HAVE_MPICH_NUMVERSION) && (PETSC_HAVE_MPICH_NUMVERSION < 40000101))
2228   mumps->ICNTL20     = 0; /* Centralized dense RHS*/
2229 #else
2230   mumps->ICNTL20 = 10; /* Distributed dense RHS*/
2231 #endif
2232   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_20", "ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides", "None", mumps->ICNTL20, &mumps->ICNTL20, &flg));
2233   PetscCheck(!flg || mumps->ICNTL20 == 10 || mumps->ICNTL20 == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10", (int)mumps->ICNTL20);
2234 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0)
2235   PetscCheck(!flg || mumps->ICNTL20 != 10, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=10 is not supported before MUMPS-5.3.0");
2236 #endif
2237   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL)); we only use distributed solution vector */
2238 
2239   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_22", "ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)", "None", mumps->id.ICNTL(22), &mumps->id.ICNTL(22), NULL));
2240   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_23", "ICNTL(23): max size of the working memory (MB) that can allocate per processor", "None", mumps->id.ICNTL(23), &mumps->id.ICNTL(23), NULL));
2241   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_24", "ICNTL(24): detection of null pivot rows (0 or 1)", "None", mumps->id.ICNTL(24), &mumps->id.ICNTL(24), NULL));
2242   if (mumps->id.ICNTL(24)) { mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ }
2243 
2244   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_25", "ICNTL(25): computes a solution of a deficient matrix and a null space basis", "None", mumps->id.ICNTL(25), &mumps->id.ICNTL(25), NULL));
2245   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_26", "ICNTL(26): drives the solution phase if a Schur complement matrix", "None", mumps->id.ICNTL(26), &mumps->id.ICNTL(26), NULL));
2246   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_27", "ICNTL(27): controls the blocking size for multiple right-hand sides", "None", mumps->id.ICNTL(27), &mumps->id.ICNTL(27), NULL));
2247   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_28", "ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering", "None", mumps->id.ICNTL(28), &mumps->id.ICNTL(28), NULL));
2248   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_29", "ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis", "None", mumps->id.ICNTL(29), &mumps->id.ICNTL(29), NULL));
2249   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL)); */ /* call MatMumpsGetInverse() directly */
2250   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_31", "ICNTL(31): indicates which factors may be discarded during factorization", "None", mumps->id.ICNTL(31), &mumps->id.ICNTL(31), NULL));
2251   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elimination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL));  -- not supported by PETSc API */
2252   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_33", "ICNTL(33): compute determinant", "None", mumps->id.ICNTL(33), &mumps->id.ICNTL(33), NULL));
2253   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_35", "ICNTL(35): activates Block Low Rank (BLR) based factorization", "None", mumps->id.ICNTL(35), &mumps->id.ICNTL(35), NULL));
2254   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_36", "ICNTL(36): choice of BLR factorization variant", "None", mumps->id.ICNTL(36), &mumps->id.ICNTL(36), NULL));
2255   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_38", "ICNTL(38): estimated compression rate of LU factors with BLR", "None", mumps->id.ICNTL(38), &mumps->id.ICNTL(38), NULL));
2256   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_58", "ICNTL(58): defines options for symbolic factorization", "None", mumps->id.ICNTL(58), &mumps->id.ICNTL(58), NULL));
2257 
2258   PetscCall(PetscOptionsReal("-mat_mumps_cntl_1", "CNTL(1): relative pivoting threshold", "None", mumps->id.CNTL(1), &mumps->id.CNTL(1), NULL));
2259   PetscCall(PetscOptionsReal("-mat_mumps_cntl_2", "CNTL(2): stopping criterion of refinement", "None", mumps->id.CNTL(2), &mumps->id.CNTL(2), NULL));
2260   PetscCall(PetscOptionsReal("-mat_mumps_cntl_3", "CNTL(3): absolute pivoting threshold", "None", mumps->id.CNTL(3), &mumps->id.CNTL(3), NULL));
2261   PetscCall(PetscOptionsReal("-mat_mumps_cntl_4", "CNTL(4): value for static pivoting", "None", mumps->id.CNTL(4), &mumps->id.CNTL(4), NULL));
2262   PetscCall(PetscOptionsReal("-mat_mumps_cntl_5", "CNTL(5): fixation for null pivots", "None", mumps->id.CNTL(5), &mumps->id.CNTL(5), NULL));
2263   PetscCall(PetscOptionsReal("-mat_mumps_cntl_7", "CNTL(7): dropping parameter used during BLR", "None", mumps->id.CNTL(7), &mumps->id.CNTL(7), NULL));
2264 
2265   PetscCall(PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL));
2266 
2267   PetscCall(PetscOptionsIntArray("-mat_mumps_view_info", "request INFO local to each processor", "", info, &ninfo, NULL));
2268   if (ninfo) {
2269     PetscCheck(ninfo <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "number of INFO %" PetscInt_FMT " must <= 80", ninfo);
2270     PetscCall(PetscMalloc1(ninfo, &mumps->info));
2271     mumps->ninfo = ninfo;
2272     for (i = 0; i < ninfo; i++) {
2273       PetscCheck(info[i] >= 0 && info[i] <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "index of INFO %" PetscInt_FMT " must between 1 and 80", ninfo);
2274       mumps->info[i] = info[i];
2275     }
2276   }
2277   PetscOptionsEnd();
2278   PetscFunctionReturn(PETSC_SUCCESS);
2279 }
2280 
2281 static PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F, Mat A, PETSC_UNUSED const MatFactorInfo *info, Mat_MUMPS *mumps)
2282 {
2283   PetscFunctionBegin;
2284   if (mumps->id.INFOG(1) < 0) {
2285     PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in analysis: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
2286     if (mumps->id.INFOG(1) == -6) {
2287       PetscCall(PetscInfo(F, "MUMPS error in analysis: matrix is singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2288       F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT;
2289     } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) {
2290       PetscCall(PetscInfo(F, "MUMPS error in analysis: problem with work array, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2291       F->factorerrortype = MAT_FACTOR_OUTMEMORY;
2292     } else if (mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0) {
2293       PetscCall(PetscInfo(F, "MUMPS error in analysis: empty matrix\n"));
2294     } else {
2295       PetscCall(PetscInfo(F, "MUMPS error in analysis: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS "\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2296       F->factorerrortype = MAT_FACTOR_OTHER;
2297     }
2298   }
2299   PetscFunctionReturn(PETSC_SUCCESS);
2300 }
2301 
2302 static PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F, Mat A, IS r, PETSC_UNUSED IS c, const MatFactorInfo *info)
2303 {
2304   Mat_MUMPS     *mumps = (Mat_MUMPS *)F->data;
2305   Vec            b;
2306   const PetscInt M = A->rmap->N;
2307 
2308   PetscFunctionBegin;
2309   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
2310     /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */
2311     PetscFunctionReturn(PETSC_SUCCESS);
2312   }
2313 
2314   /* Set MUMPS options from the options database */
2315   PetscCall(MatSetFromOptions_MUMPS(F, A));
2316 
2317   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
2318   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps));
2319 
2320   /* analysis phase */
2321   mumps->id.job = JOB_FACTSYMBOLIC;
2322   mumps->id.n   = M;
2323   switch (mumps->id.ICNTL(18)) {
2324   case 0: /* centralized assembled matrix input */
2325     if (!mumps->myid) {
2326       mumps->id.nnz = mumps->nnz;
2327       mumps->id.irn = mumps->irn;
2328       mumps->id.jcn = mumps->jcn;
2329       if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val;
2330       if (r && mumps->id.ICNTL(7) == 7) {
2331         mumps->id.ICNTL(7) = 1;
2332         if (!mumps->myid) {
2333           const PetscInt *idx;
2334           PetscInt        i;
2335 
2336           PetscCall(PetscMalloc1(M, &mumps->id.perm_in));
2337           PetscCall(ISGetIndices(r, &idx));
2338           for (i = 0; i < M; i++) PetscCall(PetscMUMPSIntCast(idx[i] + 1, &(mumps->id.perm_in[i]))); /* perm_in[]: start from 1, not 0! */
2339           PetscCall(ISRestoreIndices(r, &idx));
2340         }
2341       }
2342     }
2343     break;
2344   case 3: /* distributed assembled matrix input (size>1) */
2345     mumps->id.nnz_loc = mumps->nnz;
2346     mumps->id.irn_loc = mumps->irn;
2347     mumps->id.jcn_loc = mumps->jcn;
2348     if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val;
2349     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2350       PetscCall(MatCreateVecs(A, NULL, &b));
2351       PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq));
2352       PetscCall(VecDestroy(&b));
2353     }
2354     break;
2355   }
2356   PetscMUMPS_c(mumps);
2357   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps));
2358 
2359   F->ops->lufactornumeric   = MatFactorNumeric_MUMPS;
2360   F->ops->solve             = MatSolve_MUMPS;
2361   F->ops->solvetranspose    = MatSolveTranspose_MUMPS;
2362   F->ops->matsolve          = MatMatSolve_MUMPS;
2363   F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS;
2364   F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS;
2365 
2366   mumps->matstruc = SAME_NONZERO_PATTERN;
2367   PetscFunctionReturn(PETSC_SUCCESS);
2368 }
2369 
2370 /* Note the Petsc r and c permutations are ignored */
2371 static PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F, Mat A, PETSC_UNUSED IS r, PETSC_UNUSED IS c, const MatFactorInfo *info)
2372 {
2373   Mat_MUMPS     *mumps = (Mat_MUMPS *)F->data;
2374   Vec            b;
2375   const PetscInt M = A->rmap->N;
2376 
2377   PetscFunctionBegin;
2378   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
2379     /* F is assembled by a previous call of MatLUFactorSymbolic_BAIJMUMPS() */
2380     PetscFunctionReturn(PETSC_SUCCESS);
2381   }
2382 
2383   /* Set MUMPS options from the options database */
2384   PetscCall(MatSetFromOptions_MUMPS(F, A));
2385 
2386   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
2387   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps));
2388 
2389   /* analysis phase */
2390   mumps->id.job = JOB_FACTSYMBOLIC;
2391   mumps->id.n   = M;
2392   switch (mumps->id.ICNTL(18)) {
2393   case 0: /* centralized assembled matrix input */
2394     if (!mumps->myid) {
2395       mumps->id.nnz = mumps->nnz;
2396       mumps->id.irn = mumps->irn;
2397       mumps->id.jcn = mumps->jcn;
2398       if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val;
2399     }
2400     break;
2401   case 3: /* distributed assembled matrix input (size>1) */
2402     mumps->id.nnz_loc = mumps->nnz;
2403     mumps->id.irn_loc = mumps->irn;
2404     mumps->id.jcn_loc = mumps->jcn;
2405     if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val;
2406     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2407       PetscCall(MatCreateVecs(A, NULL, &b));
2408       PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq));
2409       PetscCall(VecDestroy(&b));
2410     }
2411     break;
2412   }
2413   PetscMUMPS_c(mumps);
2414   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps));
2415 
2416   F->ops->lufactornumeric   = MatFactorNumeric_MUMPS;
2417   F->ops->solve             = MatSolve_MUMPS;
2418   F->ops->solvetranspose    = MatSolveTranspose_MUMPS;
2419   F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS;
2420 
2421   mumps->matstruc = SAME_NONZERO_PATTERN;
2422   PetscFunctionReturn(PETSC_SUCCESS);
2423 }
2424 
2425 /* Note the Petsc r permutation and factor info are ignored */
2426 static PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F, Mat A, PETSC_UNUSED IS r, const MatFactorInfo *info)
2427 {
2428   Mat_MUMPS     *mumps = (Mat_MUMPS *)F->data;
2429   Vec            b;
2430   const PetscInt M = A->rmap->N;
2431 
2432   PetscFunctionBegin;
2433   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
2434     /* F is assembled by a previous call of MatCholeskyFactorSymbolic_MUMPS() */
2435     PetscFunctionReturn(PETSC_SUCCESS);
2436   }
2437 
2438   /* Set MUMPS options from the options database */
2439   PetscCall(MatSetFromOptions_MUMPS(F, A));
2440 
2441   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
2442   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps));
2443 
2444   /* analysis phase */
2445   mumps->id.job = JOB_FACTSYMBOLIC;
2446   mumps->id.n   = M;
2447   switch (mumps->id.ICNTL(18)) {
2448   case 0: /* centralized assembled matrix input */
2449     if (!mumps->myid) {
2450       mumps->id.nnz = mumps->nnz;
2451       mumps->id.irn = mumps->irn;
2452       mumps->id.jcn = mumps->jcn;
2453       if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val;
2454     }
2455     break;
2456   case 3: /* distributed assembled matrix input (size>1) */
2457     mumps->id.nnz_loc = mumps->nnz;
2458     mumps->id.irn_loc = mumps->irn;
2459     mumps->id.jcn_loc = mumps->jcn;
2460     if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val;
2461     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2462       PetscCall(MatCreateVecs(A, NULL, &b));
2463       PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq));
2464       PetscCall(VecDestroy(&b));
2465     }
2466     break;
2467   }
2468   PetscMUMPS_c(mumps);
2469   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps));
2470 
2471   F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS;
2472   F->ops->solve                 = MatSolve_MUMPS;
2473   F->ops->solvetranspose        = MatSolve_MUMPS;
2474   F->ops->matsolve              = MatMatSolve_MUMPS;
2475   F->ops->mattransposesolve     = MatMatTransposeSolve_MUMPS;
2476   F->ops->matsolvetranspose     = MatMatSolveTranspose_MUMPS;
2477 #if defined(PETSC_USE_COMPLEX)
2478   F->ops->getinertia = NULL;
2479 #else
2480   F->ops->getinertia = MatGetInertia_SBAIJMUMPS;
2481 #endif
2482 
2483   mumps->matstruc = SAME_NONZERO_PATTERN;
2484   PetscFunctionReturn(PETSC_SUCCESS);
2485 }
2486 
2487 static PetscErrorCode MatView_MUMPS(Mat A, PetscViewer viewer)
2488 {
2489   PetscBool         iascii;
2490   PetscViewerFormat format;
2491   Mat_MUMPS        *mumps = (Mat_MUMPS *)A->data;
2492 
2493   PetscFunctionBegin;
2494   /* check if matrix is mumps type */
2495   if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(PETSC_SUCCESS);
2496 
2497   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
2498   if (iascii) {
2499     PetscCall(PetscViewerGetFormat(viewer, &format));
2500     if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
2501       PetscCall(PetscViewerASCIIPrintf(viewer, "MUMPS run parameters:\n"));
2502       if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
2503         PetscCall(PetscViewerASCIIPrintf(viewer, "  SYM (matrix type):                   %d\n", mumps->id.sym));
2504         PetscCall(PetscViewerASCIIPrintf(viewer, "  PAR (host participation):            %d\n", mumps->id.par));
2505         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(1) (output for error):         %d\n", mumps->id.ICNTL(1)));
2506         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(2) (output of diagnostic msg): %d\n", mumps->id.ICNTL(2)));
2507         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(3) (output for global info):   %d\n", mumps->id.ICNTL(3)));
2508         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(4) (level of printing):        %d\n", mumps->id.ICNTL(4)));
2509         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(5) (input mat struct):         %d\n", mumps->id.ICNTL(5)));
2510         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(6) (matrix prescaling):        %d\n", mumps->id.ICNTL(6)));
2511         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(7) (sequential matrix ordering):%d\n", mumps->id.ICNTL(7)));
2512         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(8) (scaling strategy):         %d\n", mumps->id.ICNTL(8)));
2513         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(10) (max num of refinements):  %d\n", mumps->id.ICNTL(10)));
2514         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(11) (error analysis):          %d\n", mumps->id.ICNTL(11)));
2515         if (mumps->id.ICNTL(11) > 0) {
2516           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(4) (inf norm of input mat):        %g\n", (double)mumps->id.RINFOG(4)));
2517           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(5) (inf norm of solution):         %g\n", (double)mumps->id.RINFOG(5)));
2518           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(6) (inf norm of residual):         %g\n", (double)mumps->id.RINFOG(6)));
2519           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(7),RINFOG(8) (backward error est): %g, %g\n", (double)mumps->id.RINFOG(7), (double)mumps->id.RINFOG(8)));
2520           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(9) (error estimate):               %g\n", (double)mumps->id.RINFOG(9)));
2521           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n", (double)mumps->id.RINFOG(10), (double)mumps->id.RINFOG(11)));
2522         }
2523         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(12) (efficiency control):                         %d\n", mumps->id.ICNTL(12)));
2524         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(13) (sequential factorization of the root node):  %d\n", mumps->id.ICNTL(13)));
2525         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(14) (percentage of estimated workspace increase): %d\n", mumps->id.ICNTL(14)));
2526         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(15) (compression of the input matrix):            %d\n", mumps->id.ICNTL(15)));
2527         /* ICNTL(15-17) not used */
2528         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(18) (input mat struct):                           %d\n", mumps->id.ICNTL(18)));
2529         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(19) (Schur complement info):                      %d\n", mumps->id.ICNTL(19)));
2530         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(20) (RHS sparse pattern):                         %d\n", mumps->id.ICNTL(20)));
2531         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(21) (solution struct):                            %d\n", mumps->id.ICNTL(21)));
2532         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(22) (in-core/out-of-core facility):               %d\n", mumps->id.ICNTL(22)));
2533         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(23) (max size of memory can be allocated locally):%d\n", mumps->id.ICNTL(23)));
2534 
2535         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(24) (detection of null pivot rows):               %d\n", mumps->id.ICNTL(24)));
2536         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(25) (computation of a null space basis):          %d\n", mumps->id.ICNTL(25)));
2537         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(26) (Schur options for RHS or solution):          %d\n", mumps->id.ICNTL(26)));
2538         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(27) (blocking size for multiple RHS):             %d\n", mumps->id.ICNTL(27)));
2539         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(28) (use parallel or sequential ordering):        %d\n", mumps->id.ICNTL(28)));
2540         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(29) (parallel ordering):                          %d\n", mumps->id.ICNTL(29)));
2541 
2542         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(30) (user-specified set of entries in inv(A)):    %d\n", mumps->id.ICNTL(30)));
2543         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(31) (factors is discarded in the solve phase):    %d\n", mumps->id.ICNTL(31)));
2544         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(33) (compute determinant):                        %d\n", mumps->id.ICNTL(33)));
2545         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(35) (activate BLR based factorization):           %d\n", mumps->id.ICNTL(35)));
2546         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(36) (choice of BLR factorization variant):        %d\n", mumps->id.ICNTL(36)));
2547         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(38) (estimated compression rate of LU factors):   %d\n", mumps->id.ICNTL(38)));
2548         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(58) (options for symbolic factorization):         %d\n", mumps->id.ICNTL(58)));
2549 
2550         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(1) (relative pivoting threshold):      %g\n", (double)mumps->id.CNTL(1)));
2551         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(2) (stopping criterion of refinement): %g\n", (double)mumps->id.CNTL(2)));
2552         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(3) (absolute pivoting threshold):      %g\n", (double)mumps->id.CNTL(3)));
2553         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(4) (value of static pivoting):         %g\n", (double)mumps->id.CNTL(4)));
2554         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(5) (fixation for null pivots):         %g\n", (double)mumps->id.CNTL(5)));
2555         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(7) (dropping parameter for BLR):       %g\n", (double)mumps->id.CNTL(7)));
2556 
2557         /* information local to each processor */
2558         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(1) (local estimated flops for the elimination after analysis):\n"));
2559         PetscCall(PetscViewerASCIIPushSynchronized(viewer));
2560         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %g\n", mumps->myid, (double)mumps->id.RINFO(1)));
2561         PetscCall(PetscViewerFlush(viewer));
2562         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(2) (local estimated flops for the assembly after factorization):\n"));
2563         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %g\n", mumps->myid, (double)mumps->id.RINFO(2)));
2564         PetscCall(PetscViewerFlush(viewer));
2565         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(3) (local estimated flops for the elimination after factorization):\n"));
2566         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %g\n", mumps->myid, (double)mumps->id.RINFO(3)));
2567         PetscCall(PetscViewerFlush(viewer));
2568 
2569         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization):\n"));
2570         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(15)));
2571         PetscCall(PetscViewerFlush(viewer));
2572 
2573         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization):\n"));
2574         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(16)));
2575         PetscCall(PetscViewerFlush(viewer));
2576 
2577         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(23) (num of pivots eliminated on this processor after factorization):\n"));
2578         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(23)));
2579         PetscCall(PetscViewerFlush(viewer));
2580 
2581         if (mumps->ninfo && mumps->ninfo <= 80) {
2582           PetscInt i;
2583           for (i = 0; i < mumps->ninfo; i++) {
2584             PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(%" PetscInt_FMT "):\n", mumps->info[i]));
2585             PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(mumps->info[i])));
2586             PetscCall(PetscViewerFlush(viewer));
2587           }
2588         }
2589         PetscCall(PetscViewerASCIIPopSynchronized(viewer));
2590       } else PetscCall(PetscViewerASCIIPrintf(viewer, "  Use -%sksp_view ::ascii_info_detail to display information for all processes\n", ((PetscObject)A)->prefix ? ((PetscObject)A)->prefix : ""));
2591 
2592       if (mumps->myid == 0) { /* information from the host */
2593         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFOG(1) (global estimated flops for the elimination after analysis): %g\n", (double)mumps->id.RINFOG(1)));
2594         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFOG(2) (global estimated flops for the assembly after factorization): %g\n", (double)mumps->id.RINFOG(2)));
2595         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFOG(3) (global estimated flops for the elimination after factorization): %g\n", (double)mumps->id.RINFOG(3)));
2596         PetscCall(PetscViewerASCIIPrintf(viewer, "  (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n", (double)mumps->id.RINFOG(12), (double)mumps->id.RINFOG(13), mumps->id.INFOG(34)));
2597 
2598         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(3)));
2599         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(4)));
2600         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(5) (estimated maximum front size in the complete tree): %d\n", mumps->id.INFOG(5)));
2601         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(6) (number of nodes in the complete tree): %d\n", mumps->id.INFOG(6)));
2602         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(7) (ordering option effectively used after analysis): %d\n", mumps->id.INFOG(7)));
2603         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n", mumps->id.INFOG(8)));
2604         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n", mumps->id.INFOG(9)));
2605         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(10) (total integer space store the matrix factors after factorization): %d\n", mumps->id.INFOG(10)));
2606         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(11) (order of largest frontal matrix after factorization): %d\n", mumps->id.INFOG(11)));
2607         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(12) (number of off-diagonal pivots): %d\n", mumps->id.INFOG(12)));
2608         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(13) (number of delayed pivots after factorization): %d\n", mumps->id.INFOG(13)));
2609         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(14) (number of memory compress after factorization): %d\n", mumps->id.INFOG(14)));
2610         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(15) (number of steps of iterative refinement after solution): %d\n", mumps->id.INFOG(15)));
2611         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n", mumps->id.INFOG(16)));
2612         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n", mumps->id.INFOG(17)));
2613         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n", mumps->id.INFOG(18)));
2614         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n", mumps->id.INFOG(19)));
2615         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(20) (estimated number of entries in the factors): %d\n", mumps->id.INFOG(20)));
2616         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n", mumps->id.INFOG(21)));
2617         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n", mumps->id.INFOG(22)));
2618         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n", mumps->id.INFOG(23)));
2619         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n", mumps->id.INFOG(24)));
2620         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n", mumps->id.INFOG(25)));
2621         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(28) (after factorization: number of null pivots encountered): %d\n", mumps->id.INFOG(28)));
2622         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n", mumps->id.INFOG(29)));
2623         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n", mumps->id.INFOG(30), mumps->id.INFOG(31)));
2624         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(32) (after analysis: type of analysis done): %d\n", mumps->id.INFOG(32)));
2625         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(33) (value used for ICNTL(8)): %d\n", mumps->id.INFOG(33)));
2626         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(34) (exponent of the determinant if determinant is requested): %d\n", mumps->id.INFOG(34)));
2627         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n", mumps->id.INFOG(35)));
2628         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(36)));
2629         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n", mumps->id.INFOG(37)));
2630         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(38)));
2631         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n", mumps->id.INFOG(39)));
2632       }
2633     }
2634   }
2635   PetscFunctionReturn(PETSC_SUCCESS);
2636 }
2637 
2638 static PetscErrorCode MatGetInfo_MUMPS(Mat A, PETSC_UNUSED MatInfoType flag, MatInfo *info)
2639 {
2640   Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;
2641 
2642   PetscFunctionBegin;
2643   info->block_size        = 1.0;
2644   info->nz_allocated      = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20);
2645   info->nz_used           = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20);
2646   info->nz_unneeded       = 0.0;
2647   info->assemblies        = 0.0;
2648   info->mallocs           = 0.0;
2649   info->memory            = 0.0;
2650   info->fill_ratio_given  = 0;
2651   info->fill_ratio_needed = 0;
2652   info->factor_mallocs    = 0;
2653   PetscFunctionReturn(PETSC_SUCCESS);
2654 }
2655 
2656 static PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is)
2657 {
2658   Mat_MUMPS         *mumps = (Mat_MUMPS *)F->data;
2659   const PetscScalar *arr;
2660   const PetscInt    *idxs;
2661   PetscInt           size, i;
2662 
2663   PetscFunctionBegin;
2664   PetscCall(ISGetLocalSize(is, &size));
2665   /* Schur complement matrix */
2666   PetscCall(MatDestroy(&F->schur));
2667   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size, size, NULL, &F->schur));
2668   PetscCall(MatDenseGetArrayRead(F->schur, &arr));
2669   mumps->id.schur      = (MumpsScalar *)arr;
2670   mumps->id.size_schur = size;
2671   mumps->id.schur_lld  = size;
2672   PetscCall(MatDenseRestoreArrayRead(F->schur, &arr));
2673   if (mumps->sym == 1) PetscCall(MatSetOption(F->schur, MAT_SPD, PETSC_TRUE));
2674 
2675   /* MUMPS expects Fortran style indices */
2676   PetscCall(PetscFree(mumps->id.listvar_schur));
2677   PetscCall(PetscMalloc1(size, &mumps->id.listvar_schur));
2678   PetscCall(ISGetIndices(is, &idxs));
2679   for (i = 0; i < size; i++) PetscCall(PetscMUMPSIntCast(idxs[i] + 1, &(mumps->id.listvar_schur[i])));
2680   PetscCall(ISRestoreIndices(is, &idxs));
2681   /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */
2682   mumps->id.ICNTL(26) = -1;
2683   PetscFunctionReturn(PETSC_SUCCESS);
2684 }
2685 
2686 static PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F, Mat *S)
2687 {
2688   Mat          St;
2689   Mat_MUMPS   *mumps = (Mat_MUMPS *)F->data;
2690   PetscScalar *array;
2691 
2692   PetscFunctionBegin;
2693   PetscCheck(mumps->id.ICNTL(19), PetscObjectComm((PetscObject)F), PETSC_ERR_ORDER, "Schur complement mode not selected! Call MatFactorSetSchurIS() to enable it");
2694   PetscCall(MatCreate(PETSC_COMM_SELF, &St));
2695   PetscCall(MatSetSizes(St, PETSC_DECIDE, PETSC_DECIDE, mumps->id.size_schur, mumps->id.size_schur));
2696   PetscCall(MatSetType(St, MATDENSE));
2697   PetscCall(MatSetUp(St));
2698   PetscCall(MatDenseGetArray(St, &array));
2699   if (!mumps->sym) {                /* MUMPS always return a full matrix */
2700     if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
2701       PetscInt i, j, N = mumps->id.size_schur;
2702       for (i = 0; i < N; i++) {
2703         for (j = 0; j < N; j++) {
2704 #if !defined(PETSC_USE_COMPLEX)
2705           PetscScalar val = mumps->id.schur[i * N + j];
2706 #else
2707           PetscScalar val = mumps->id.schur[i * N + j].r + PETSC_i * mumps->id.schur[i * N + j].i;
2708 #endif
2709           array[j * N + i] = val;
2710         }
2711       }
2712     } else { /* stored by columns */
2713       PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur));
2714     }
2715   } else {                          /* either full or lower-triangular (not packed) */
2716     if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */
2717       PetscInt i, j, N = mumps->id.size_schur;
2718       for (i = 0; i < N; i++) {
2719         for (j = i; j < N; j++) {
2720 #if !defined(PETSC_USE_COMPLEX)
2721           PetscScalar val = mumps->id.schur[i * N + j];
2722 #else
2723           PetscScalar val = mumps->id.schur[i * N + j].r + PETSC_i * mumps->id.schur[i * N + j].i;
2724 #endif
2725           array[i * N + j] = array[j * N + i] = val;
2726         }
2727       }
2728     } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */
2729       PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur));
2730     } else { /* ICNTL(19) == 1 lower triangular stored by rows */
2731       PetscInt i, j, N = mumps->id.size_schur;
2732       for (i = 0; i < N; i++) {
2733         for (j = 0; j < i + 1; j++) {
2734 #if !defined(PETSC_USE_COMPLEX)
2735           PetscScalar val = mumps->id.schur[i * N + j];
2736 #else
2737           PetscScalar val = mumps->id.schur[i * N + j].r + PETSC_i * mumps->id.schur[i * N + j].i;
2738 #endif
2739           array[i * N + j] = array[j * N + i] = val;
2740         }
2741       }
2742     }
2743   }
2744   PetscCall(MatDenseRestoreArray(St, &array));
2745   *S = St;
2746   PetscFunctionReturn(PETSC_SUCCESS);
2747 }
2748 
2749 static PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt ival)
2750 {
2751   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2752 
2753   PetscFunctionBegin;
2754   if (mumps->id.job == JOB_NULL) {                                       /* need to cache icntl and ival since PetscMUMPS_c() has never been called */
2755     PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; /* number of already cached ICNTL */
2756     for (i = 0; i < nICNTL_pre; ++i)
2757       if (mumps->ICNTL_pre[1 + 2 * i] == icntl) break; /* is this ICNTL already cached? */
2758     if (i == nICNTL_pre) {                             /* not already cached */
2759       if (i > 0) PetscCall(PetscRealloc(sizeof(PetscMUMPSInt) * (2 * nICNTL_pre + 3), &mumps->ICNTL_pre));
2760       else PetscCall(PetscCalloc(sizeof(PetscMUMPSInt) * 3, &mumps->ICNTL_pre));
2761       mumps->ICNTL_pre[0]++;
2762     }
2763     mumps->ICNTL_pre[1 + 2 * i] = icntl;
2764     PetscCall(PetscMUMPSIntCast(ival, mumps->ICNTL_pre + 2 + 2 * i));
2765   } else PetscCall(PetscMUMPSIntCast(ival, &mumps->id.ICNTL(icntl)));
2766   PetscFunctionReturn(PETSC_SUCCESS);
2767 }
2768 
2769 static PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt *ival)
2770 {
2771   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2772 
2773   PetscFunctionBegin;
2774   if (mumps->id.job == JOB_NULL) {
2775     PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0;
2776     *ival = 0;
2777     for (i = 0; i < nICNTL_pre; ++i) {
2778       if (mumps->ICNTL_pre[1 + 2 * i] == icntl) *ival = mumps->ICNTL_pre[2 + 2 * i];
2779     }
2780   } else *ival = mumps->id.ICNTL(icntl);
2781   PetscFunctionReturn(PETSC_SUCCESS);
2782 }
2783 
2784 /*@
2785   MatMumpsSetIcntl - Set MUMPS parameter ICNTL() <https://mumps-solver.org/index.php?page=doc>
2786 
2787   Logically Collective
2788 
2789   Input Parameters:
2790 + F     - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2791 . icntl - index of MUMPS parameter array ICNTL()
2792 - ival  - value of MUMPS ICNTL(icntl)
2793 
2794   Options Database Key:
2795 . -mat_mumps_icntl_<icntl> <ival> - change the option numbered icntl to ival
2796 
2797   Level: beginner
2798 
2799 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2800 @*/
2801 PetscErrorCode MatMumpsSetIcntl(Mat F, PetscInt icntl, PetscInt ival)
2802 {
2803   PetscFunctionBegin;
2804   PetscValidType(F, 1);
2805   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2806   PetscValidLogicalCollectiveInt(F, icntl, 2);
2807   PetscValidLogicalCollectiveInt(F, ival, 3);
2808   PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl);
2809   PetscTryMethod(F, "MatMumpsSetIcntl_C", (Mat, PetscInt, PetscInt), (F, icntl, ival));
2810   PetscFunctionReturn(PETSC_SUCCESS);
2811 }
2812 
2813 /*@
2814   MatMumpsGetIcntl - Get MUMPS parameter ICNTL() <https://mumps-solver.org/index.php?page=doc>
2815 
2816   Logically Collective
2817 
2818   Input Parameters:
2819 + F     - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2820 - icntl - index of MUMPS parameter array ICNTL()
2821 
2822   Output Parameter:
2823 . ival - value of MUMPS ICNTL(icntl)
2824 
2825   Level: beginner
2826 
2827 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2828 @*/
2829 PetscErrorCode MatMumpsGetIcntl(Mat F, PetscInt icntl, PetscInt *ival)
2830 {
2831   PetscFunctionBegin;
2832   PetscValidType(F, 1);
2833   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2834   PetscValidLogicalCollectiveInt(F, icntl, 2);
2835   PetscAssertPointer(ival, 3);
2836   PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl);
2837   PetscUseMethod(F, "MatMumpsGetIcntl_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival));
2838   PetscFunctionReturn(PETSC_SUCCESS);
2839 }
2840 
2841 static PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal val)
2842 {
2843   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2844 
2845   PetscFunctionBegin;
2846   if (mumps->id.job == JOB_NULL) {
2847     PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0;
2848     for (i = 0; i < nCNTL_pre; ++i)
2849       if (mumps->CNTL_pre[1 + 2 * i] == icntl) break;
2850     if (i == nCNTL_pre) {
2851       if (i > 0) PetscCall(PetscRealloc(sizeof(PetscReal) * (2 * nCNTL_pre + 3), &mumps->CNTL_pre));
2852       else PetscCall(PetscCalloc(sizeof(PetscReal) * 3, &mumps->CNTL_pre));
2853       mumps->CNTL_pre[0]++;
2854     }
2855     mumps->CNTL_pre[1 + 2 * i] = icntl;
2856     mumps->CNTL_pre[2 + 2 * i] = val;
2857   } else mumps->id.CNTL(icntl) = val;
2858   PetscFunctionReturn(PETSC_SUCCESS);
2859 }
2860 
2861 static PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal *val)
2862 {
2863   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2864 
2865   PetscFunctionBegin;
2866   if (mumps->id.job == JOB_NULL) {
2867     PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0;
2868     *val = 0.0;
2869     for (i = 0; i < nCNTL_pre; ++i) {
2870       if (mumps->CNTL_pre[1 + 2 * i] == icntl) *val = mumps->CNTL_pre[2 + 2 * i];
2871     }
2872   } else *val = mumps->id.CNTL(icntl);
2873   PetscFunctionReturn(PETSC_SUCCESS);
2874 }
2875 
2876 /*@
2877   MatMumpsSetCntl - Set MUMPS parameter CNTL() <https://mumps-solver.org/index.php?page=doc>
2878 
2879   Logically Collective
2880 
2881   Input Parameters:
2882 + F     - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2883 . icntl - index of MUMPS parameter array CNTL()
2884 - val   - value of MUMPS CNTL(icntl)
2885 
2886   Options Database Key:
2887 . -mat_mumps_cntl_<icntl> <val> - change the option numbered icntl to ival
2888 
2889   Level: beginner
2890 
2891 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2892 @*/
2893 PetscErrorCode MatMumpsSetCntl(Mat F, PetscInt icntl, PetscReal val)
2894 {
2895   PetscFunctionBegin;
2896   PetscValidType(F, 1);
2897   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2898   PetscValidLogicalCollectiveInt(F, icntl, 2);
2899   PetscValidLogicalCollectiveReal(F, val, 3);
2900   PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl);
2901   PetscTryMethod(F, "MatMumpsSetCntl_C", (Mat, PetscInt, PetscReal), (F, icntl, val));
2902   PetscFunctionReturn(PETSC_SUCCESS);
2903 }
2904 
2905 /*@
2906   MatMumpsGetCntl - Get MUMPS parameter CNTL() <https://mumps-solver.org/index.php?page=doc>
2907 
2908   Logically Collective
2909 
2910   Input Parameters:
2911 + F     - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2912 - icntl - index of MUMPS parameter array CNTL()
2913 
2914   Output Parameter:
2915 . val - value of MUMPS CNTL(icntl)
2916 
2917   Level: beginner
2918 
2919 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2920 @*/
2921 PetscErrorCode MatMumpsGetCntl(Mat F, PetscInt icntl, PetscReal *val)
2922 {
2923   PetscFunctionBegin;
2924   PetscValidType(F, 1);
2925   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2926   PetscValidLogicalCollectiveInt(F, icntl, 2);
2927   PetscAssertPointer(val, 3);
2928   PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl);
2929   PetscUseMethod(F, "MatMumpsGetCntl_C", (Mat, PetscInt, PetscReal *), (F, icntl, val));
2930   PetscFunctionReturn(PETSC_SUCCESS);
2931 }
2932 
2933 static PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F, PetscInt icntl, PetscInt *info)
2934 {
2935   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2936 
2937   PetscFunctionBegin;
2938   *info = mumps->id.INFO(icntl);
2939   PetscFunctionReturn(PETSC_SUCCESS);
2940 }
2941 
2942 static PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F, PetscInt icntl, PetscInt *infog)
2943 {
2944   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2945 
2946   PetscFunctionBegin;
2947   *infog = mumps->id.INFOG(icntl);
2948   PetscFunctionReturn(PETSC_SUCCESS);
2949 }
2950 
2951 static PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfo)
2952 {
2953   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2954 
2955   PetscFunctionBegin;
2956   *rinfo = mumps->id.RINFO(icntl);
2957   PetscFunctionReturn(PETSC_SUCCESS);
2958 }
2959 
2960 static PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfog)
2961 {
2962   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2963 
2964   PetscFunctionBegin;
2965   *rinfog = mumps->id.RINFOG(icntl);
2966   PetscFunctionReturn(PETSC_SUCCESS);
2967 }
2968 
2969 static PetscErrorCode MatMumpsGetNullPivots_MUMPS(Mat F, PetscInt *size, PetscInt **array)
2970 {
2971   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2972 
2973   PetscFunctionBegin;
2974   PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection");
2975   *size  = 0;
2976   *array = NULL;
2977   if (!mumps->myid) {
2978     *size = mumps->id.INFOG(28);
2979     PetscCall(PetscMalloc1(*size, array));
2980     for (int i = 0; i < *size; i++) (*array)[i] = mumps->id.pivnul_list[i] - 1;
2981   }
2982   PetscFunctionReturn(PETSC_SUCCESS);
2983 }
2984 
2985 static PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F, Mat spRHS)
2986 {
2987   Mat          Bt = NULL, Btseq = NULL;
2988   PetscBool    flg;
2989   Mat_MUMPS   *mumps = (Mat_MUMPS *)F->data;
2990   PetscScalar *aa;
2991   PetscInt     spnr, *ia, *ja, M, nrhs;
2992 
2993   PetscFunctionBegin;
2994   PetscAssertPointer(spRHS, 2);
2995   PetscCall(PetscObjectTypeCompare((PetscObject)spRHS, MATTRANSPOSEVIRTUAL, &flg));
2996   if (flg) {
2997     PetscCall(MatTransposeGetMat(spRHS, &Bt));
2998   } else SETERRQ(PetscObjectComm((PetscObject)spRHS), PETSC_ERR_ARG_WRONG, "Matrix spRHS must be type MATTRANSPOSEVIRTUAL matrix");
2999 
3000   PetscCall(MatMumpsSetIcntl(F, 30, 1));
3001 
3002   if (mumps->petsc_size > 1) {
3003     Mat_MPIAIJ *b = (Mat_MPIAIJ *)Bt->data;
3004     Btseq         = b->A;
3005   } else {
3006     Btseq = Bt;
3007   }
3008 
3009   PetscCall(MatGetSize(spRHS, &M, &nrhs));
3010   mumps->id.nrhs = nrhs;
3011   mumps->id.lrhs = M;
3012   mumps->id.rhs  = NULL;
3013 
3014   if (!mumps->myid) {
3015     PetscCall(MatSeqAIJGetArray(Btseq, &aa));
3016     PetscCall(MatGetRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
3017     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
3018     PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs));
3019     mumps->id.rhs_sparse = (MumpsScalar *)aa;
3020   } else {
3021     mumps->id.irhs_ptr    = NULL;
3022     mumps->id.irhs_sparse = NULL;
3023     mumps->id.nz_rhs      = 0;
3024     mumps->id.rhs_sparse  = NULL;
3025   }
3026   mumps->id.ICNTL(20) = 1; /* rhs is sparse */
3027   mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */
3028 
3029   /* solve phase */
3030   mumps->id.job = JOB_SOLVE;
3031   PetscMUMPS_c(mumps);
3032   PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2));
3033 
3034   if (!mumps->myid) {
3035     PetscCall(MatSeqAIJRestoreArray(Btseq, &aa));
3036     PetscCall(MatRestoreRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
3037     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
3038   }
3039   PetscFunctionReturn(PETSC_SUCCESS);
3040 }
3041 
3042 /*@
3043   MatMumpsGetInverse - Get user-specified set of entries in inverse of `A` <https://mumps-solver.org/index.php?page=doc>
3044 
3045   Logically Collective
3046 
3047   Input Parameter:
3048 . F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
3049 
3050   Output Parameter:
3051 . spRHS - sequential sparse matrix in `MATTRANSPOSEVIRTUAL` format with requested entries of inverse of `A`
3052 
3053   Level: beginner
3054 
3055 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()`
3056 @*/
3057 PetscErrorCode MatMumpsGetInverse(Mat F, Mat spRHS)
3058 {
3059   PetscFunctionBegin;
3060   PetscValidType(F, 1);
3061   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3062   PetscUseMethod(F, "MatMumpsGetInverse_C", (Mat, Mat), (F, spRHS));
3063   PetscFunctionReturn(PETSC_SUCCESS);
3064 }
3065 
3066 static PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F, Mat spRHST)
3067 {
3068   Mat spRHS;
3069 
3070   PetscFunctionBegin;
3071   PetscCall(MatCreateTranspose(spRHST, &spRHS));
3072   PetscCall(MatMumpsGetInverse_MUMPS(F, spRHS));
3073   PetscCall(MatDestroy(&spRHS));
3074   PetscFunctionReturn(PETSC_SUCCESS);
3075 }
3076 
3077 /*@
3078   MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix $A^T $ <https://mumps-solver.org/index.php?page=doc>
3079 
3080   Logically Collective
3081 
3082   Input Parameter:
3083 . F - the factored matrix of A obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
3084 
3085   Output Parameter:
3086 . spRHST - sequential sparse matrix in `MATAIJ` format containing the requested entries of inverse of `A`^T
3087 
3088   Level: beginner
3089 
3090 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()`, `MatMumpsGetInverse()`
3091 @*/
3092 PetscErrorCode MatMumpsGetInverseTranspose(Mat F, Mat spRHST)
3093 {
3094   PetscBool flg;
3095 
3096   PetscFunctionBegin;
3097   PetscValidType(F, 1);
3098   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3099   PetscCall(PetscObjectTypeCompareAny((PetscObject)spRHST, &flg, MATSEQAIJ, MATMPIAIJ, NULL));
3100   PetscCheck(flg, PetscObjectComm((PetscObject)spRHST), PETSC_ERR_ARG_WRONG, "Matrix spRHST must be MATAIJ matrix");
3101 
3102   PetscUseMethod(F, "MatMumpsGetInverseTranspose_C", (Mat, Mat), (F, spRHST));
3103   PetscFunctionReturn(PETSC_SUCCESS);
3104 }
3105 
3106 /*@
3107   MatMumpsGetInfo - Get MUMPS parameter INFO() <https://mumps-solver.org/index.php?page=doc>
3108 
3109   Logically Collective
3110 
3111   Input Parameters:
3112 + F     - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
3113 - icntl - index of MUMPS parameter array INFO()
3114 
3115   Output Parameter:
3116 . ival - value of MUMPS INFO(icntl)
3117 
3118   Level: beginner
3119 
3120 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
3121 @*/
3122 PetscErrorCode MatMumpsGetInfo(Mat F, PetscInt icntl, PetscInt *ival)
3123 {
3124   PetscFunctionBegin;
3125   PetscValidType(F, 1);
3126   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3127   PetscAssertPointer(ival, 3);
3128   PetscUseMethod(F, "MatMumpsGetInfo_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival));
3129   PetscFunctionReturn(PETSC_SUCCESS);
3130 }
3131 
3132 /*@
3133   MatMumpsGetInfog - Get MUMPS parameter INFOG() <https://mumps-solver.org/index.php?page=doc>
3134 
3135   Logically Collective
3136 
3137   Input Parameters:
3138 + F     - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
3139 - icntl - index of MUMPS parameter array INFOG()
3140 
3141   Output Parameter:
3142 . ival - value of MUMPS INFOG(icntl)
3143 
3144   Level: beginner
3145 
3146 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
3147 @*/
3148 PetscErrorCode MatMumpsGetInfog(Mat F, PetscInt icntl, PetscInt *ival)
3149 {
3150   PetscFunctionBegin;
3151   PetscValidType(F, 1);
3152   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3153   PetscAssertPointer(ival, 3);
3154   PetscUseMethod(F, "MatMumpsGetInfog_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival));
3155   PetscFunctionReturn(PETSC_SUCCESS);
3156 }
3157 
3158 /*@
3159   MatMumpsGetRinfo - Get MUMPS parameter RINFO() <https://mumps-solver.org/index.php?page=doc>
3160 
3161   Logically Collective
3162 
3163   Input Parameters:
3164 + F     - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
3165 - icntl - index of MUMPS parameter array RINFO()
3166 
3167   Output Parameter:
3168 . val - value of MUMPS RINFO(icntl)
3169 
3170   Level: beginner
3171 
3172 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfog()`
3173 @*/
3174 PetscErrorCode MatMumpsGetRinfo(Mat F, PetscInt icntl, PetscReal *val)
3175 {
3176   PetscFunctionBegin;
3177   PetscValidType(F, 1);
3178   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3179   PetscAssertPointer(val, 3);
3180   PetscUseMethod(F, "MatMumpsGetRinfo_C", (Mat, PetscInt, PetscReal *), (F, icntl, val));
3181   PetscFunctionReturn(PETSC_SUCCESS);
3182 }
3183 
3184 /*@
3185   MatMumpsGetRinfog - Get MUMPS parameter RINFOG() <https://mumps-solver.org/index.php?page=doc>
3186 
3187   Logically Collective
3188 
3189   Input Parameters:
3190 + F     - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
3191 - icntl - index of MUMPS parameter array RINFOG()
3192 
3193   Output Parameter:
3194 . val - value of MUMPS RINFOG(icntl)
3195 
3196   Level: beginner
3197 
3198 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`
3199 @*/
3200 PetscErrorCode MatMumpsGetRinfog(Mat F, PetscInt icntl, PetscReal *val)
3201 {
3202   PetscFunctionBegin;
3203   PetscValidType(F, 1);
3204   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3205   PetscAssertPointer(val, 3);
3206   PetscUseMethod(F, "MatMumpsGetRinfog_C", (Mat, PetscInt, PetscReal *), (F, icntl, val));
3207   PetscFunctionReturn(PETSC_SUCCESS);
3208 }
3209 
3210 /*@
3211   MatMumpsGetNullPivots - Get MUMPS parameter PIVNUL_LIST() <https://mumps-solver.org/index.php?page=doc>
3212 
3213   Logically Collective
3214 
3215   Input Parameter:
3216 . F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
3217 
3218   Output Parameters:
3219 + size  - local size of the array. The size of the array is non-zero only on the host.
3220 - array - array of rows with null pivot, these rows follow 0-based indexing. The array gets allocated within the function and the user is responsible
3221            for freeing this array.
3222 
3223   Level: beginner
3224 
3225 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`
3226 @*/
3227 PetscErrorCode MatMumpsGetNullPivots(Mat F, PetscInt *size, PetscInt **array)
3228 {
3229   PetscFunctionBegin;
3230   PetscValidType(F, 1);
3231   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3232   PetscAssertPointer(size, 2);
3233   PetscAssertPointer(array, 3);
3234   PetscUseMethod(F, "MatMumpsGetNullPivots_C", (Mat, PetscInt *, PetscInt **), (F, size, array));
3235   PetscFunctionReturn(PETSC_SUCCESS);
3236 }
3237 
3238 /*MC
3239   MATSOLVERMUMPS -  A matrix type providing direct solvers (LU and Cholesky) for
3240   distributed and sequential matrices via the external package MUMPS <https://mumps-solver.org/index.php?page=doc>
3241 
3242   Works with `MATAIJ` and `MATSBAIJ` matrices
3243 
3244   Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS
3245 
3246   Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode.
3247   See details below.
3248 
3249   Use `-pc_type cholesky` or `lu` `-pc_factor_mat_solver_type mumps` to use this direct solver
3250 
3251   Options Database Keys:
3252 +  -mat_mumps_icntl_1   - ICNTL(1): output stream for error messages
3253 .  -mat_mumps_icntl_2   - ICNTL(2): output stream for diagnostic printing, statistics, and warning
3254 .  -mat_mumps_icntl_3   -  ICNTL(3): output stream for global information, collected on the host
3255 .  -mat_mumps_icntl_4   -  ICNTL(4): level of printing (0 to 4)
3256 .  -mat_mumps_icntl_6   - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)
3257 .  -mat_mumps_icntl_7   - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto
3258                           Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only)
3259 .  -mat_mumps_icntl_8   - ICNTL(8): scaling strategy (-2 to 8 or 77)
3260 .  -mat_mumps_icntl_10  - ICNTL(10): max num of refinements
3261 .  -mat_mumps_icntl_11  - ICNTL(11): statistics related to an error analysis (via -ksp_view)
3262 .  -mat_mumps_icntl_12  - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)
3263 .  -mat_mumps_icntl_13  - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting
3264 .  -mat_mumps_icntl_14  - ICNTL(14): percentage increase in the estimated working space
3265 .  -mat_mumps_icntl_15  - ICNTL(15): compression of the input matrix resulting from a block format
3266 .  -mat_mumps_icntl_19  - ICNTL(19): computes the Schur complement
3267 .  -mat_mumps_icntl_20  - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS
3268 .  -mat_mumps_icntl_22  - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)
3269 .  -mat_mumps_icntl_23  - ICNTL(23): max size of the working memory (MB) that can allocate per processor
3270 .  -mat_mumps_icntl_24  - ICNTL(24): detection of null pivot rows (0 or 1)
3271 .  -mat_mumps_icntl_25  - ICNTL(25): compute a solution of a deficient matrix and a null space basis
3272 .  -mat_mumps_icntl_26  - ICNTL(26): drives the solution phase if a Schur complement matrix
3273 .  -mat_mumps_icntl_28  - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering
3274 .  -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis
3275 .  -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A)
3276 .  -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization
3277 .  -mat_mumps_icntl_33 - ICNTL(33): compute determinant
3278 .  -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature
3279 .  -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant
3280 .  -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR
3281 .  -mat_mumps_icntl_58 - ICNTL(58): options for symbolic factorization
3282 .  -mat_mumps_cntl_1   - CNTL(1): relative pivoting threshold
3283 .  -mat_mumps_cntl_2   -  CNTL(2): stopping criterion of refinement
3284 .  -mat_mumps_cntl_3   - CNTL(3): absolute pivoting threshold
3285 .  -mat_mumps_cntl_4   - CNTL(4): value for static pivoting
3286 .  -mat_mumps_cntl_5   - CNTL(5): fixation for null pivots
3287 .  -mat_mumps_cntl_7   - CNTL(7): precision of the dropping parameter used during BLR factorization
3288 -  -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS.
3289                                    Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual.
3290 
3291   Level: beginner
3292 
3293   Notes:
3294   MUMPS Cholesky does not handle (complex) Hermitian matrices (see User's Guide at <https://mumps-solver.org/index.php?page=doc>) so using it will
3295   error if the matrix is Hermitian.
3296 
3297   When used within a `KSP`/`PC` solve the options are prefixed with that of the `PC`. Otherwise one can set the options prefix by calling
3298   `MatSetOptionsPrefixFactor()` on the matrix from which the factor was obtained or `MatSetOptionsPrefix()` on the factor matrix.
3299 
3300   When a MUMPS factorization fails inside a KSP solve, for example with a `KSP_DIVERGED_PC_FAILED`, one can find the MUMPS information about
3301   the failure with
3302 .vb
3303           KSPGetPC(ksp,&pc);
3304           PCFactorGetMatrix(pc,&mat);
3305           MatMumpsGetInfo(mat,....);
3306           MatMumpsGetInfog(mat,....); etc.
3307 .ve
3308     Or run with `-ksp_error_if_not_converged` and the program will be stopped and the information printed in the error message.
3309 
3310   MUMPS provides 64-bit integer support in two build modes:
3311   full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and
3312   requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI).
3313 
3314   selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices,
3315   MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and
3316   columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit
3317   integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS.
3318 
3319   With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc.
3320 
3321   Two modes to run MUMPS/PETSc with OpenMP
3322 .vb
3323      Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP
3324      threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test".
3325 .ve
3326 
3327 .vb
3328      -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example,
3329     if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16"
3330 .ve
3331 
3332    To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part
3333    (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with `--with-openmp` `--download-hwloc`
3334    (or `--with-hwloc`), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS
3335    libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS
3336    (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided).
3337 
3338    If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI
3339    processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of
3340    size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm
3341    are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set
3342    by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs.
3343    In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets,
3344    if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind
3345    MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half
3346    cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the
3347    problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding.
3348    For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and
3349    examine the mapping result.
3350 
3351    PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set `OMP_PROC_BIND` and `OMP_PLACES` in job scripts,
3352    for example, export `OMP_PLACES`=threads and export `OMP_PROC_BIND`=spread. One does not need to export `OMP_NUM_THREADS`=m in job scripts as PETSc
3353    calls `omp_set_num_threads`(m) internally before calling MUMPS.
3354 
3355    See {cite}`heroux2011bi` and {cite}`gutierrez2017accommodating`
3356 
3357 .seealso: [](ch_matrices), `Mat`, `PCFactorSetMatSolverType()`, `MatSolverType`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`, `KSPGetPC()`, `PCFactorGetMatrix()`
3358 M*/
3359 
3360 static PetscErrorCode MatFactorGetSolverType_mumps(PETSC_UNUSED Mat A, MatSolverType *type)
3361 {
3362   PetscFunctionBegin;
3363   *type = MATSOLVERMUMPS;
3364   PetscFunctionReturn(PETSC_SUCCESS);
3365 }
3366 
3367 /* MatGetFactor for Seq and MPI AIJ matrices */
3368 static PetscErrorCode MatGetFactor_aij_mumps(Mat A, MatFactorType ftype, Mat *F)
3369 {
3370   Mat         B;
3371   Mat_MUMPS  *mumps;
3372   PetscBool   isSeqAIJ, isDiag, isDense;
3373   PetscMPIInt size;
3374 
3375   PetscFunctionBegin;
3376 #if defined(PETSC_USE_COMPLEX)
3377   if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) {
3378     PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n"));
3379     *F = NULL;
3380     PetscFunctionReturn(PETSC_SUCCESS);
3381   }
3382 #endif
3383   /* Create the factorization matrix */
3384   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isSeqAIJ));
3385   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATDIAGONAL, &isDiag));
3386   PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &isDense, MATSEQDENSE, MATMPIDENSE, NULL));
3387   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
3388   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
3389   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name));
3390   PetscCall(MatSetUp(B));
3391 
3392   PetscCall(PetscNew(&mumps));
3393 
3394   B->ops->view    = MatView_MUMPS;
3395   B->ops->getinfo = MatGetInfo_MUMPS;
3396 
3397   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
3398   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
3399   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
3400   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
3401   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
3402   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
3403   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
3404   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
3405   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
3406   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
3407   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
3408   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
3409   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
3410   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
3411 
3412   if (ftype == MAT_FACTOR_LU) {
3413     B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
3414     B->factortype            = MAT_FACTOR_LU;
3415     if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij;
3416     else if (isDiag) mumps->ConvertToTriples = MatConvertToTriples_diagonal_xaij;
3417     else if (isDense) mumps->ConvertToTriples = MatConvertToTriples_dense_xaij;
3418     else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij;
3419     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]));
3420     mumps->sym = 0;
3421   } else {
3422     B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
3423     B->factortype                  = MAT_FACTOR_CHOLESKY;
3424     if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij;
3425     else if (isDiag) mumps->ConvertToTriples = MatConvertToTriples_diagonal_xaij;
3426     else if (isDense) mumps->ConvertToTriples = MatConvertToTriples_dense_xaij;
3427     else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij;
3428     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY]));
3429 #if defined(PETSC_USE_COMPLEX)
3430     mumps->sym = 2;
3431 #else
3432     if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1;
3433     else mumps->sym = 2;
3434 #endif
3435   }
3436 
3437   /* set solvertype */
3438   PetscCall(PetscFree(B->solvertype));
3439   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
3440   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
3441   if (size == 1) {
3442     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3443     B->canuseordering = PETSC_TRUE;
3444   }
3445   B->ops->destroy = MatDestroy_MUMPS;
3446   B->data         = (void *)mumps;
3447 
3448   *F               = B;
3449   mumps->id.job    = JOB_NULL;
3450   mumps->ICNTL_pre = NULL;
3451   mumps->CNTL_pre  = NULL;
3452   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
3453   PetscFunctionReturn(PETSC_SUCCESS);
3454 }
3455 
3456 /* MatGetFactor for Seq and MPI SBAIJ matrices */
3457 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A, PETSC_UNUSED MatFactorType ftype, Mat *F)
3458 {
3459   Mat         B;
3460   Mat_MUMPS  *mumps;
3461   PetscBool   isSeqSBAIJ;
3462   PetscMPIInt size;
3463 
3464   PetscFunctionBegin;
3465 #if defined(PETSC_USE_COMPLEX)
3466   if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) {
3467     PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n"));
3468     *F = NULL;
3469     PetscFunctionReturn(PETSC_SUCCESS);
3470   }
3471 #endif
3472   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
3473   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
3474   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name));
3475   PetscCall(MatSetUp(B));
3476 
3477   PetscCall(PetscNew(&mumps));
3478   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &isSeqSBAIJ));
3479   if (isSeqSBAIJ) {
3480     mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij;
3481   } else {
3482     mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij;
3483   }
3484 
3485   B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
3486   B->ops->view                   = MatView_MUMPS;
3487   B->ops->getinfo                = MatGetInfo_MUMPS;
3488 
3489   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
3490   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
3491   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
3492   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
3493   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
3494   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
3495   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
3496   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
3497   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
3498   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
3499   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
3500   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
3501   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
3502   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
3503 
3504   B->factortype = MAT_FACTOR_CHOLESKY;
3505 #if defined(PETSC_USE_COMPLEX)
3506   mumps->sym = 2;
3507 #else
3508   if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1;
3509   else mumps->sym = 2;
3510 #endif
3511 
3512   /* set solvertype */
3513   PetscCall(PetscFree(B->solvertype));
3514   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
3515   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
3516   if (size == 1) {
3517     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3518     B->canuseordering = PETSC_TRUE;
3519   }
3520   PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY]));
3521   B->ops->destroy = MatDestroy_MUMPS;
3522   B->data         = (void *)mumps;
3523 
3524   *F               = B;
3525   mumps->id.job    = JOB_NULL;
3526   mumps->ICNTL_pre = NULL;
3527   mumps->CNTL_pre  = NULL;
3528   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
3529   PetscFunctionReturn(PETSC_SUCCESS);
3530 }
3531 
3532 static PetscErrorCode MatGetFactor_baij_mumps(Mat A, MatFactorType ftype, Mat *F)
3533 {
3534   Mat         B;
3535   Mat_MUMPS  *mumps;
3536   PetscBool   isSeqBAIJ;
3537   PetscMPIInt size;
3538 
3539   PetscFunctionBegin;
3540   /* Create the factorization matrix */
3541   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &isSeqBAIJ));
3542   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
3543   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
3544   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name));
3545   PetscCall(MatSetUp(B));
3546 
3547   PetscCall(PetscNew(&mumps));
3548   if (ftype == MAT_FACTOR_LU) {
3549     B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS;
3550     B->factortype            = MAT_FACTOR_LU;
3551     if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij;
3552     else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij;
3553     mumps->sym = 0;
3554     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]));
3555   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead");
3556 
3557   B->ops->view    = MatView_MUMPS;
3558   B->ops->getinfo = MatGetInfo_MUMPS;
3559 
3560   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
3561   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
3562   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
3563   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
3564   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
3565   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
3566   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
3567   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
3568   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
3569   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
3570   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
3571   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
3572   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
3573   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
3574 
3575   /* set solvertype */
3576   PetscCall(PetscFree(B->solvertype));
3577   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
3578   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
3579   if (size == 1) {
3580     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3581     B->canuseordering = PETSC_TRUE;
3582   }
3583   B->ops->destroy = MatDestroy_MUMPS;
3584   B->data         = (void *)mumps;
3585 
3586   *F               = B;
3587   mumps->id.job    = JOB_NULL;
3588   mumps->ICNTL_pre = NULL;
3589   mumps->CNTL_pre  = NULL;
3590   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
3591   PetscFunctionReturn(PETSC_SUCCESS);
3592 }
3593 
3594 /* MatGetFactor for Seq and MPI SELL matrices */
3595 static PetscErrorCode MatGetFactor_sell_mumps(Mat A, MatFactorType ftype, Mat *F)
3596 {
3597   Mat         B;
3598   Mat_MUMPS  *mumps;
3599   PetscBool   isSeqSELL;
3600   PetscMPIInt size;
3601 
3602   PetscFunctionBegin;
3603   /* Create the factorization matrix */
3604   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSELL, &isSeqSELL));
3605   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
3606   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
3607   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name));
3608   PetscCall(MatSetUp(B));
3609 
3610   PetscCall(PetscNew(&mumps));
3611 
3612   B->ops->view    = MatView_MUMPS;
3613   B->ops->getinfo = MatGetInfo_MUMPS;
3614 
3615   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
3616   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
3617   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
3618   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
3619   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
3620   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
3621   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
3622   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
3623   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
3624   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
3625   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
3626   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
3627 
3628   if (ftype == MAT_FACTOR_LU) {
3629     B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
3630     B->factortype            = MAT_FACTOR_LU;
3631     if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij;
3632     else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented");
3633     mumps->sym = 0;
3634     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]));
3635   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented");
3636 
3637   /* set solvertype */
3638   PetscCall(PetscFree(B->solvertype));
3639   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
3640   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
3641   if (size == 1) {
3642     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization  */
3643     B->canuseordering = PETSC_TRUE;
3644   }
3645   B->ops->destroy = MatDestroy_MUMPS;
3646   B->data         = (void *)mumps;
3647 
3648   *F               = B;
3649   mumps->id.job    = JOB_NULL;
3650   mumps->ICNTL_pre = NULL;
3651   mumps->CNTL_pre  = NULL;
3652   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
3653   PetscFunctionReturn(PETSC_SUCCESS);
3654 }
3655 
3656 /* MatGetFactor for MATNEST matrices */
3657 static PetscErrorCode MatGetFactor_nest_mumps(Mat A, MatFactorType ftype, Mat *F)
3658 {
3659   Mat         B, **mats;
3660   Mat_MUMPS  *mumps;
3661   PetscInt    nr, nc;
3662   PetscMPIInt size;
3663   PetscBool   flg = PETSC_TRUE;
3664 
3665   PetscFunctionBegin;
3666 #if defined(PETSC_USE_COMPLEX)
3667   if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) {
3668     PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n"));
3669     *F = NULL;
3670     PetscFunctionReturn(PETSC_SUCCESS);
3671   }
3672 #endif
3673 
3674   /* Return if some condition is not satisfied */
3675   *F = NULL;
3676   PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats));
3677   if (ftype == MAT_FACTOR_CHOLESKY) {
3678     IS       *rows, *cols;
3679     PetscInt *m, *M;
3680 
3681     PetscCheck(nr == nc, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MAT_FACTOR_CHOLESKY not supported for nest sizes %" PetscInt_FMT " != %" PetscInt_FMT ". Use MAT_FACTOR_LU.", nr, nc);
3682     PetscCall(PetscMalloc2(nr, &rows, nc, &cols));
3683     PetscCall(MatNestGetISs(A, rows, cols));
3684     for (PetscInt r = 0; flg && r < nr; r++) PetscCall(ISEqualUnsorted(rows[r], cols[r], &flg));
3685     if (!flg) {
3686       PetscCall(PetscFree2(rows, cols));
3687       PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for unequal row and column maps. Use MAT_FACTOR_LU.\n"));
3688       PetscFunctionReturn(PETSC_SUCCESS);
3689     }
3690     PetscCall(PetscMalloc2(nr, &m, nr, &M));
3691     for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetMinMax(rows[r], &m[r], &M[r]));
3692     for (PetscInt r = 0; flg && r < nr; r++)
3693       for (PetscInt k = r + 1; flg && k < nr; k++)
3694         if ((m[k] <= m[r] && m[r] <= M[k]) || (m[k] <= M[r] && M[r] <= M[k])) flg = PETSC_FALSE;
3695     PetscCall(PetscFree2(m, M));
3696     PetscCall(PetscFree2(rows, cols));
3697     if (!flg) {
3698       PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for intersecting row maps. Use MAT_FACTOR_LU.\n"));
3699       PetscFunctionReturn(PETSC_SUCCESS);
3700     }
3701   }
3702 
3703   for (PetscInt r = 0; r < nr; r++) {
3704     for (PetscInt c = 0; c < nc; c++) {
3705       Mat       sub = mats[r][c];
3706       PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isTrans, isDiag, isDense;
3707 
3708       if (!sub || (ftype == MAT_FACTOR_CHOLESKY && c < r)) continue;
3709       PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans));
3710       if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub));
3711       else {
3712         PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isTrans));
3713         if (isTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub));
3714       }
3715       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ));
3716       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ));
3717       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ));
3718       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ));
3719       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ));
3720       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ));
3721       PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATDIAGONAL, &isDiag));
3722       PetscCall(PetscObjectTypeCompareAny((PetscObject)sub, &isDense, MATSEQDENSE, MATMPIDENSE, NULL));
3723       if (ftype == MAT_FACTOR_CHOLESKY) {
3724         if (r == c) {
3725           if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isSeqSBAIJ && !isMPISBAIJ && !isDiag && !isDense) {
3726             PetscCall(PetscInfo(sub, "MAT_FACTOR_CHOLESKY not supported for diagonal block of type %s.\n", ((PetscObject)sub)->type_name));
3727             flg = PETSC_FALSE;
3728           }
3729         } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isDiag && !isDense) {
3730           PetscCall(PetscInfo(sub, "MAT_FACTOR_CHOLESKY not supported for off-diagonal block of type %s.\n", ((PetscObject)sub)->type_name));
3731           flg = PETSC_FALSE;
3732         }
3733       } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isDiag && !isDense) {
3734         PetscCall(PetscInfo(sub, "MAT_LU_FACTOR not supported for block of type %s.\n", ((PetscObject)sub)->type_name));
3735         flg = PETSC_FALSE;
3736       }
3737     }
3738   }
3739   if (!flg) PetscFunctionReturn(PETSC_SUCCESS);
3740 
3741   /* Create the factorization matrix */
3742   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
3743   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
3744   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name));
3745   PetscCall(MatSetUp(B));
3746 
3747   PetscCall(PetscNew(&mumps));
3748 
3749   B->ops->view    = MatView_MUMPS;
3750   B->ops->getinfo = MatGetInfo_MUMPS;
3751 
3752   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
3753   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
3754   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
3755   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
3756   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
3757   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
3758   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
3759   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
3760   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
3761   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
3762   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
3763   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
3764   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
3765   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
3766 
3767   if (ftype == MAT_FACTOR_LU) {
3768     B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
3769     B->factortype            = MAT_FACTOR_LU;
3770     mumps->sym               = 0;
3771   } else {
3772     B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
3773     B->factortype                  = MAT_FACTOR_CHOLESKY;
3774 #if defined(PETSC_USE_COMPLEX)
3775     mumps->sym = 2;
3776 #else
3777     if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1;
3778     else mumps->sym = 2;
3779 #endif
3780   }
3781   mumps->ConvertToTriples = MatConvertToTriples_nest_xaij;
3782   PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[ftype]));
3783 
3784   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
3785   if (size == 1) {
3786     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3787     B->canuseordering = PETSC_TRUE;
3788   }
3789 
3790   /* set solvertype */
3791   PetscCall(PetscFree(B->solvertype));
3792   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
3793   B->ops->destroy = MatDestroy_MUMPS;
3794   B->data         = (void *)mumps;
3795 
3796   *F               = B;
3797   mumps->id.job    = JOB_NULL;
3798   mumps->ICNTL_pre = NULL;
3799   mumps->CNTL_pre  = NULL;
3800   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
3801   PetscFunctionReturn(PETSC_SUCCESS);
3802 }
3803 
3804 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void)
3805 {
3806   PetscFunctionBegin;
3807   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
3808   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
3809   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps));
3810   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps));
3811   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPISBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps));
3812   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
3813   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
3814   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps));
3815   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps));
3816   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps));
3817   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSELL, MAT_FACTOR_LU, MatGetFactor_sell_mumps));
3818   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATDIAGONAL, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
3819   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATDIAGONAL, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
3820   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQDENSE, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
3821   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQDENSE, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
3822   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIDENSE, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
3823   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIDENSE, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
3824   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_LU, MatGetFactor_nest_mumps));
3825   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_CHOLESKY, MatGetFactor_nest_mumps));
3826   PetscFunctionReturn(PETSC_SUCCESS);
3827 }
3828