xref: /petsc/src/mat/impls/aij/mpi/mumps/mumps.c (revision ed00a6c129cbc4f1b4754b50ab0c4e7536acfadd)
1 
2 /*
3     Provides an interface to the MUMPS sparse solver
4 */
5 #include <petscpkg_version.h>
6 #include <petscsf.h>
7 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I  "petscmat.h"  I*/
8 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
9 #include <../src/mat/impls/sell/mpi/mpisell.h>
10 
11 EXTERN_C_BEGIN
12 #if defined(PETSC_USE_COMPLEX)
13   #if defined(PETSC_USE_REAL_SINGLE)
14     #include <cmumps_c.h>
15   #else
16     #include <zmumps_c.h>
17   #endif
18 #else
19   #if defined(PETSC_USE_REAL_SINGLE)
20     #include <smumps_c.h>
21   #else
22     #include <dmumps_c.h>
23   #endif
24 #endif
25 EXTERN_C_END
26 #define JOB_INIT         -1
27 #define JOB_NULL         0
28 #define JOB_FACTSYMBOLIC 1
29 #define JOB_FACTNUMERIC  2
30 #define JOB_SOLVE        3
31 #define JOB_END          -2
32 
33 /* calls to MUMPS */
34 #if defined(PETSC_USE_COMPLEX)
35   #if defined(PETSC_USE_REAL_SINGLE)
36     #define MUMPS_c cmumps_c
37   #else
38     #define MUMPS_c zmumps_c
39   #endif
40 #else
41   #if defined(PETSC_USE_REAL_SINGLE)
42     #define MUMPS_c smumps_c
43   #else
44     #define MUMPS_c dmumps_c
45   #endif
46 #endif
47 
48 /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for
49    number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the
50    naming convention in PetscMPIInt, PetscBLASInt etc.
51 */
52 typedef MUMPS_INT PetscMUMPSInt;
53 
54 #if PETSC_PKG_MUMPS_VERSION_GE(5, 3, 0)
55   #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */
56     #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out"
57   #endif
58 #else
59   #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */
60     #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out"
61   #endif
62 #endif
63 
64 #define MPIU_MUMPSINT       MPI_INT
65 #define PETSC_MUMPS_INT_MAX 2147483647
66 #define PETSC_MUMPS_INT_MIN -2147483648
67 
68 /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/
69 static inline PetscErrorCode PetscMUMPSIntCast(PetscInt a, PetscMUMPSInt *b)
70 {
71   PetscFunctionBegin;
72 #if PetscDefined(USE_64BIT_INDICES)
73   PetscAssert(a <= PETSC_MUMPS_INT_MAX && a >= PETSC_MUMPS_INT_MIN, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt");
74 #endif
75   *b = (PetscMUMPSInt)(a);
76   PetscFunctionReturn(PETSC_SUCCESS);
77 }
78 
79 /* Put these utility routines here since they are only used in this file */
80 static inline PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems *PetscOptionsObject, const char opt[], const char text[], const char man[], PetscMUMPSInt currentvalue, PetscMUMPSInt *value, PetscBool *set, PetscMUMPSInt lb, PetscMUMPSInt ub)
81 {
82   PetscInt  myval;
83   PetscBool myset;
84   PetscFunctionBegin;
85   /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */
86   PetscCall(PetscOptionsInt_Private(PetscOptionsObject, opt, text, man, (PetscInt)currentvalue, &myval, &myset, lb, ub));
87   if (myset) PetscCall(PetscMUMPSIntCast(myval, value));
88   if (set) *set = myset;
89   PetscFunctionReturn(PETSC_SUCCESS);
90 }
91 #define PetscOptionsMUMPSInt(a, b, c, d, e, f) PetscOptionsMUMPSInt_Private(PetscOptionsObject, a, b, c, d, e, f, PETSC_MUMPS_INT_MIN, PETSC_MUMPS_INT_MAX)
92 
93 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */
94 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
95   #define PetscMUMPS_c(mumps) \
96     do { \
97       if (mumps->use_petsc_omp_support) { \
98         if (mumps->is_omp_master) { \
99           PetscCall(PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl)); \
100           MUMPS_c(&mumps->id); \
101           PetscCall(PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl)); \
102         } \
103         PetscCall(PetscOmpCtrlBarrier(mumps->omp_ctrl)); \
104         /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific      \
105          to processes, so we only Bcast info[1], an error code and leave others (since they do not have   \
106          an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82.                   \
107          omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \
108       */ \
109         PetscCallMPI(MPI_Bcast(mumps->id.infog, 40, MPIU_MUMPSINT, 0, mumps->omp_comm)); \
110         PetscCallMPI(MPI_Bcast(mumps->id.rinfog, 20, MPIU_REAL, 0, mumps->omp_comm)); \
111         PetscCallMPI(MPI_Bcast(mumps->id.info, 1, MPIU_MUMPSINT, 0, mumps->omp_comm)); \
112       } else { \
113         MUMPS_c(&mumps->id); \
114       } \
115     } while (0)
116 #else
117   #define PetscMUMPS_c(mumps) \
118     do { \
119       MUMPS_c(&mumps->id); \
120     } while (0)
121 #endif
122 
123 /* declare MumpsScalar */
124 #if defined(PETSC_USE_COMPLEX)
125   #if defined(PETSC_USE_REAL_SINGLE)
126     #define MumpsScalar mumps_complex
127   #else
128     #define MumpsScalar mumps_double_complex
129   #endif
130 #else
131   #define MumpsScalar PetscScalar
132 #endif
133 
134 /* macros s.t. indices match MUMPS documentation */
135 #define ICNTL(I)  icntl[(I)-1]
136 #define CNTL(I)   cntl[(I)-1]
137 #define INFOG(I)  infog[(I)-1]
138 #define INFO(I)   info[(I)-1]
139 #define RINFOG(I) rinfog[(I)-1]
140 #define RINFO(I)  rinfo[(I)-1]
141 
142 typedef struct Mat_MUMPS Mat_MUMPS;
143 struct Mat_MUMPS {
144 #if defined(PETSC_USE_COMPLEX)
145   #if defined(PETSC_USE_REAL_SINGLE)
146   CMUMPS_STRUC_C id;
147   #else
148   ZMUMPS_STRUC_C id;
149   #endif
150 #else
151   #if defined(PETSC_USE_REAL_SINGLE)
152   SMUMPS_STRUC_C id;
153   #else
154   DMUMPS_STRUC_C id;
155   #endif
156 #endif
157 
158   MatStructure   matstruc;
159   PetscMPIInt    myid, petsc_size;
160   PetscMUMPSInt *irn, *jcn;       /* the (i,j,v) triplets passed to mumps. */
161   PetscScalar   *val, *val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */
162   PetscInt64     nnz;             /* number of nonzeros. The type is called selective 64-bit in mumps */
163   PetscMUMPSInt  sym;
164   MPI_Comm       mumps_comm;
165   PetscMUMPSInt *ICNTL_pre;
166   PetscReal     *CNTL_pre;
167   PetscMUMPSInt  ICNTL9_pre;         /* check if ICNTL(9) is changed from previous MatSolve */
168   VecScatter     scat_rhs, scat_sol; /* used by MatSolve() */
169   PetscMUMPSInt  ICNTL20;            /* use centralized (0) or distributed (10) dense RHS */
170   PetscMUMPSInt  lrhs_loc, nloc_rhs, *irhs_loc;
171 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
172   PetscInt    *rhs_nrow, max_nrhs;
173   PetscMPIInt *rhs_recvcounts, *rhs_disps;
174   PetscScalar *rhs_loc, *rhs_recvbuf;
175 #endif
176   Vec            b_seq, x_seq;
177   PetscInt       ninfo, *info; /* which INFO to display */
178   PetscInt       sizeredrhs;
179   PetscScalar   *schur_sol;
180   PetscInt       schur_sizesol;
181   PetscMUMPSInt *ia_alloc, *ja_alloc; /* work arrays used for the CSR struct for sparse rhs */
182   PetscInt64     cur_ilen, cur_jlen;  /* current len of ia_alloc[], ja_alloc[] */
183   PetscErrorCode (*ConvertToTriples)(Mat, PetscInt, MatReuse, Mat_MUMPS *);
184 
185   /* Support for MATNEST */
186   PetscErrorCode (**nest_convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *);
187   PetscInt64  *nest_vals_start;
188   PetscScalar *nest_vals;
189 
190   /* stuff used by petsc/mumps OpenMP support*/
191   PetscBool    use_petsc_omp_support;
192   PetscOmpCtrl omp_ctrl;             /* an OpenMP controller that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */
193   MPI_Comm     petsc_comm, omp_comm; /* petsc_comm is petsc matrix's comm */
194   PetscInt64  *recvcount;            /* a collection of nnz on omp_master */
195   PetscMPIInt  tag, omp_comm_size;
196   PetscBool    is_omp_master; /* is this rank the master of omp_comm */
197   MPI_Request *reqs;
198 };
199 
200 /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt.
201    Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices.
202  */
203 static PetscErrorCode PetscMUMPSIntCSRCast(Mat_MUMPS *mumps, PetscInt nrow, PetscInt *ia, PetscInt *ja, PetscMUMPSInt **ia_mumps, PetscMUMPSInt **ja_mumps, PetscMUMPSInt *nnz_mumps)
204 {
205   PetscInt nnz = ia[nrow] - 1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscInt64 since mumps only uses PetscMUMPSInt for rhs */
206 
207   PetscFunctionBegin;
208 #if defined(PETSC_USE_64BIT_INDICES)
209   {
210     PetscInt i;
211     if (nrow + 1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */
212       PetscCall(PetscFree(mumps->ia_alloc));
213       PetscCall(PetscMalloc1(nrow + 1, &mumps->ia_alloc));
214       mumps->cur_ilen = nrow + 1;
215     }
216     if (nnz > mumps->cur_jlen) {
217       PetscCall(PetscFree(mumps->ja_alloc));
218       PetscCall(PetscMalloc1(nnz, &mumps->ja_alloc));
219       mumps->cur_jlen = nnz;
220     }
221     for (i = 0; i < nrow + 1; i++) PetscCall(PetscMUMPSIntCast(ia[i], &(mumps->ia_alloc[i])));
222     for (i = 0; i < nnz; i++) PetscCall(PetscMUMPSIntCast(ja[i], &(mumps->ja_alloc[i])));
223     *ia_mumps = mumps->ia_alloc;
224     *ja_mumps = mumps->ja_alloc;
225   }
226 #else
227   *ia_mumps          = ia;
228   *ja_mumps          = ja;
229 #endif
230   PetscCall(PetscMUMPSIntCast(nnz, nnz_mumps));
231   PetscFunctionReturn(PETSC_SUCCESS);
232 }
233 
234 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS *mumps)
235 {
236   PetscFunctionBegin;
237   PetscCall(PetscFree(mumps->id.listvar_schur));
238   PetscCall(PetscFree(mumps->id.redrhs));
239   PetscCall(PetscFree(mumps->schur_sol));
240   mumps->id.size_schur = 0;
241   mumps->id.schur_lld  = 0;
242   mumps->id.ICNTL(19)  = 0;
243   PetscFunctionReturn(PETSC_SUCCESS);
244 }
245 
246 /* solve with rhs in mumps->id.redrhs and return in the same location */
247 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F)
248 {
249   Mat_MUMPS           *mumps = (Mat_MUMPS *)F->data;
250   Mat                  S, B, X;
251   MatFactorSchurStatus schurstatus;
252   PetscInt             sizesol;
253 
254   PetscFunctionBegin;
255   PetscCall(MatFactorFactorizeSchurComplement(F));
256   PetscCall(MatFactorGetSchurComplement(F, &S, &schurstatus));
257   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &B));
258   PetscCall(MatSetType(B, ((PetscObject)S)->type_name));
259 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
260   PetscCall(MatBindToCPU(B, S->boundtocpu));
261 #endif
262   switch (schurstatus) {
263   case MAT_FACTOR_SCHUR_FACTORED:
264     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &X));
265     PetscCall(MatSetType(X, ((PetscObject)S)->type_name));
266 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
267     PetscCall(MatBindToCPU(X, S->boundtocpu));
268 #endif
269     if (!mumps->id.ICNTL(9)) { /* transpose solve */
270       PetscCall(MatMatSolveTranspose(S, B, X));
271     } else {
272       PetscCall(MatMatSolve(S, B, X));
273     }
274     break;
275   case MAT_FACTOR_SCHUR_INVERTED:
276     sizesol = mumps->id.nrhs * mumps->id.size_schur;
277     if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) {
278       PetscCall(PetscFree(mumps->schur_sol));
279       PetscCall(PetscMalloc1(sizesol, &mumps->schur_sol));
280       mumps->schur_sizesol = sizesol;
281     }
282     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->schur_sol, &X));
283     PetscCall(MatSetType(X, ((PetscObject)S)->type_name));
284 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
285     PetscCall(MatBindToCPU(X, S->boundtocpu));
286 #endif
287     PetscCall(MatProductCreateWithMat(S, B, NULL, X));
288     if (!mumps->id.ICNTL(9)) { /* transpose solve */
289       PetscCall(MatProductSetType(X, MATPRODUCT_AtB));
290     } else {
291       PetscCall(MatProductSetType(X, MATPRODUCT_AB));
292     }
293     PetscCall(MatProductSetFromOptions(X));
294     PetscCall(MatProductSymbolic(X));
295     PetscCall(MatProductNumeric(X));
296 
297     PetscCall(MatCopy(X, B, SAME_NONZERO_PATTERN));
298     break;
299   default:
300     SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unhandled MatFactorSchurStatus %d", F->schur_status);
301   }
302   PetscCall(MatFactorRestoreSchurComplement(F, &S, schurstatus));
303   PetscCall(MatDestroy(&B));
304   PetscCall(MatDestroy(&X));
305   PetscFunctionReturn(PETSC_SUCCESS);
306 }
307 
308 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion)
309 {
310   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
311 
312   PetscFunctionBegin;
313   if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */
314     PetscFunctionReturn(PETSC_SUCCESS);
315   }
316   if (!expansion) { /* prepare for the condensation step */
317     PetscInt sizeredrhs = mumps->id.nrhs * mumps->id.size_schur;
318     /* allocate MUMPS internal array to store reduced right-hand sides */
319     if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) {
320       PetscCall(PetscFree(mumps->id.redrhs));
321       mumps->id.lredrhs = mumps->id.size_schur;
322       PetscCall(PetscMalloc1(mumps->id.nrhs * mumps->id.lredrhs, &mumps->id.redrhs));
323       mumps->sizeredrhs = mumps->id.nrhs * mumps->id.lredrhs;
324     }
325     mumps->id.ICNTL(26) = 1; /* condensation phase */
326   } else {                   /* prepare for the expansion step */
327     /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */
328     PetscCall(MatMumpsSolveSchur_Private(F));
329     mumps->id.ICNTL(26) = 2; /* expansion phase */
330     PetscMUMPS_c(mumps);
331     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1));
332     /* restore defaults */
333     mumps->id.ICNTL(26) = -1;
334     /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */
335     if (mumps->id.nrhs > 1) {
336       PetscCall(PetscFree(mumps->id.redrhs));
337       mumps->id.lredrhs = 0;
338       mumps->sizeredrhs = 0;
339     }
340   }
341   PetscFunctionReturn(PETSC_SUCCESS);
342 }
343 
344 /*
345   MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz]
346 
347   input:
348     A       - matrix in aij,baij or sbaij format
349     shift   - 0: C style output triple; 1: Fortran style output triple.
350     reuse   - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple
351               MAT_REUSE_MATRIX:   only the values in v array are updated
352   output:
353     nnz     - dim of r, c, and v (number of local nonzero entries of A)
354     r, c, v - row and col index, matrix values (matrix triples)
355 
356   The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is
357   freed with PetscFree(mumps->irn);  This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means
358   that the PetscMalloc() cannot easily be replaced with a PetscMalloc3().
359 
360  */
361 
362 PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
363 {
364   const PetscScalar *av;
365   const PetscInt    *ai, *aj, *ajj, M = A->rmap->n;
366   PetscInt64         nz, rnz, i, j, k;
367   PetscMUMPSInt     *row, *col;
368   Mat_SeqAIJ        *aa = (Mat_SeqAIJ *)A->data;
369 
370   PetscFunctionBegin;
371   PetscCall(MatSeqAIJGetArrayRead(A, &av));
372   mumps->val = (PetscScalar *)av;
373   if (reuse == MAT_INITIAL_MATRIX) {
374     nz = aa->nz;
375     ai = aa->i;
376     aj = aa->j;
377     PetscCall(PetscMalloc2(nz, &row, nz, &col));
378     for (i = k = 0; i < M; i++) {
379       rnz = ai[i + 1] - ai[i];
380       ajj = aj + ai[i];
381       for (j = 0; j < rnz; j++) {
382         PetscCall(PetscMUMPSIntCast(i + shift, &row[k]));
383         PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[k]));
384         k++;
385       }
386     }
387     mumps->irn = row;
388     mumps->jcn = col;
389     mumps->nnz = nz;
390   }
391   PetscCall(MatSeqAIJRestoreArrayRead(A, &av));
392   PetscFunctionReturn(PETSC_SUCCESS);
393 }
394 
395 PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
396 {
397   PetscInt64     nz, i, j, k, r;
398   Mat_SeqSELL   *a = (Mat_SeqSELL *)A->data;
399   PetscMUMPSInt *row, *col;
400 
401   PetscFunctionBegin;
402   mumps->val = a->val;
403   if (reuse == MAT_INITIAL_MATRIX) {
404     nz = a->sliidx[a->totalslices];
405     PetscCall(PetscMalloc2(nz, &row, nz, &col));
406     for (i = k = 0; i < a->totalslices; i++) {
407       for (j = a->sliidx[i], r = 0; j < a->sliidx[i + 1]; j++, r = ((r + 1) & 0x07)) PetscCall(PetscMUMPSIntCast(8 * i + r + shift, &row[k++]));
408     }
409     for (i = 0; i < nz; i++) PetscCall(PetscMUMPSIntCast(a->colidx[i] + shift, &col[i]));
410     mumps->irn = row;
411     mumps->jcn = col;
412     mumps->nnz = nz;
413   }
414   PetscFunctionReturn(PETSC_SUCCESS);
415 }
416 
417 PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
418 {
419   Mat_SeqBAIJ    *aa = (Mat_SeqBAIJ *)A->data;
420   const PetscInt *ai, *aj, *ajj, bs2 = aa->bs2;
421   PetscInt64      M, nz, idx = 0, rnz, i, j, k, m;
422   PetscInt        bs;
423   PetscMUMPSInt  *row, *col;
424 
425   PetscFunctionBegin;
426   PetscCall(MatGetBlockSize(A, &bs));
427   M          = A->rmap->N / bs;
428   mumps->val = aa->a;
429   if (reuse == MAT_INITIAL_MATRIX) {
430     ai = aa->i;
431     aj = aa->j;
432     nz = bs2 * aa->nz;
433     PetscCall(PetscMalloc2(nz, &row, nz, &col));
434     for (i = 0; i < M; i++) {
435       ajj = aj + ai[i];
436       rnz = ai[i + 1] - ai[i];
437       for (k = 0; k < rnz; k++) {
438         for (j = 0; j < bs; j++) {
439           for (m = 0; m < bs; m++) {
440             PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[idx]));
441             PetscCall(PetscMUMPSIntCast(bs * ajj[k] + j + shift, &col[idx]));
442             idx++;
443           }
444         }
445       }
446     }
447     mumps->irn = row;
448     mumps->jcn = col;
449     mumps->nnz = nz;
450   }
451   PetscFunctionReturn(PETSC_SUCCESS);
452 }
453 
454 PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
455 {
456   const PetscInt *ai, *aj, *ajj;
457   PetscInt        bs;
458   PetscInt64      nz, rnz, i, j, k, m;
459   PetscMUMPSInt  *row, *col;
460   PetscScalar    *val;
461   Mat_SeqSBAIJ   *aa  = (Mat_SeqSBAIJ *)A->data;
462   const PetscInt  bs2 = aa->bs2, mbs = aa->mbs;
463 #if defined(PETSC_USE_COMPLEX)
464   PetscBool isset, hermitian;
465 #endif
466 
467   PetscFunctionBegin;
468 #if defined(PETSC_USE_COMPLEX)
469   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
470   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
471 #endif
472   ai = aa->i;
473   aj = aa->j;
474   PetscCall(MatGetBlockSize(A, &bs));
475   if (reuse == MAT_INITIAL_MATRIX) {
476     const PetscInt64 alloc_size = aa->nz * bs2;
477 
478     PetscCall(PetscMalloc2(alloc_size, &row, alloc_size, &col));
479     if (bs > 1) {
480       PetscCall(PetscMalloc1(alloc_size, &mumps->val_alloc));
481       mumps->val = mumps->val_alloc;
482     } else {
483       mumps->val = aa->a;
484     }
485     mumps->irn = row;
486     mumps->jcn = col;
487   } else {
488     if (bs == 1) mumps->val = aa->a;
489     row = mumps->irn;
490     col = mumps->jcn;
491   }
492   val = mumps->val;
493 
494   nz = 0;
495   if (bs > 1) {
496     for (i = 0; i < mbs; i++) {
497       rnz = ai[i + 1] - ai[i];
498       ajj = aj + ai[i];
499       for (j = 0; j < rnz; j++) {
500         for (k = 0; k < bs; k++) {
501           for (m = 0; m < bs; m++) {
502             if (ajj[j] > i || k >= m) {
503               if (reuse == MAT_INITIAL_MATRIX) {
504                 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[nz]));
505                 PetscCall(PetscMUMPSIntCast(ajj[j] * bs + k + shift, &col[nz]));
506               }
507               val[nz++] = aa->a[(ai[i] + j) * bs2 + m + k * bs];
508             }
509           }
510         }
511       }
512     }
513   } else if (reuse == MAT_INITIAL_MATRIX) {
514     for (i = 0; i < mbs; i++) {
515       rnz = ai[i + 1] - ai[i];
516       ajj = aj + ai[i];
517       for (j = 0; j < rnz; j++) {
518         PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
519         PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz]));
520         nz++;
521       }
522     }
523     PetscCheck(nz == aa->nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different numbers of nonzeros %" PetscInt64_FMT " != %" PetscInt_FMT, nz, aa->nz);
524   }
525   if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz;
526   PetscFunctionReturn(PETSC_SUCCESS);
527 }
528 
529 PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
530 {
531   const PetscInt    *ai, *aj, *ajj, *adiag, M = A->rmap->n;
532   PetscInt64         nz, rnz, i, j;
533   const PetscScalar *av, *v1;
534   PetscScalar       *val;
535   PetscMUMPSInt     *row, *col;
536   Mat_SeqAIJ        *aa = (Mat_SeqAIJ *)A->data;
537   PetscBool          missing;
538 #if defined(PETSC_USE_COMPLEX)
539   PetscBool hermitian, isset;
540 #endif
541 
542   PetscFunctionBegin;
543 #if defined(PETSC_USE_COMPLEX)
544   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
545   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
546 #endif
547   PetscCall(MatSeqAIJGetArrayRead(A, &av));
548   ai    = aa->i;
549   aj    = aa->j;
550   adiag = aa->diag;
551   PetscCall(MatMissingDiagonal_SeqAIJ(A, &missing, NULL));
552   if (reuse == MAT_INITIAL_MATRIX) {
553     /* count nz in the upper triangular part of A */
554     nz = 0;
555     if (missing) {
556       for (i = 0; i < M; i++) {
557         if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
558           for (j = ai[i]; j < ai[i + 1]; j++) {
559             if (aj[j] < i) continue;
560             nz++;
561           }
562         } else {
563           nz += ai[i + 1] - adiag[i];
564         }
565       }
566     } else {
567       for (i = 0; i < M; i++) nz += ai[i + 1] - adiag[i];
568     }
569     PetscCall(PetscMalloc2(nz, &row, nz, &col));
570     PetscCall(PetscMalloc1(nz, &val));
571     mumps->nnz = nz;
572     mumps->irn = row;
573     mumps->jcn = col;
574     mumps->val = mumps->val_alloc = val;
575 
576     nz = 0;
577     if (missing) {
578       for (i = 0; i < M; i++) {
579         if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
580           for (j = ai[i]; j < ai[i + 1]; j++) {
581             if (aj[j] < i) continue;
582             PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
583             PetscCall(PetscMUMPSIntCast(aj[j] + shift, &col[nz]));
584             val[nz] = av[j];
585             nz++;
586           }
587         } else {
588           rnz = ai[i + 1] - adiag[i];
589           ajj = aj + adiag[i];
590           v1  = av + adiag[i];
591           for (j = 0; j < rnz; j++) {
592             PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
593             PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz]));
594             val[nz++] = v1[j];
595           }
596         }
597       }
598     } else {
599       for (i = 0; i < M; i++) {
600         rnz = ai[i + 1] - adiag[i];
601         ajj = aj + adiag[i];
602         v1  = av + adiag[i];
603         for (j = 0; j < rnz; j++) {
604           PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
605           PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz]));
606           val[nz++] = v1[j];
607         }
608       }
609     }
610   } else {
611     nz  = 0;
612     val = mumps->val;
613     if (missing) {
614       for (i = 0; i < M; i++) {
615         if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
616           for (j = ai[i]; j < ai[i + 1]; j++) {
617             if (aj[j] < i) continue;
618             val[nz++] = av[j];
619           }
620         } else {
621           rnz = ai[i + 1] - adiag[i];
622           v1  = av + adiag[i];
623           for (j = 0; j < rnz; j++) val[nz++] = v1[j];
624         }
625       }
626     } else {
627       for (i = 0; i < M; i++) {
628         rnz = ai[i + 1] - adiag[i];
629         v1  = av + adiag[i];
630         for (j = 0; j < rnz; j++) val[nz++] = v1[j];
631       }
632     }
633   }
634   PetscCall(MatSeqAIJRestoreArrayRead(A, &av));
635   PetscFunctionReturn(PETSC_SUCCESS);
636 }
637 
638 PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
639 {
640   const PetscInt    *ai, *aj, *bi, *bj, *garray, *ajj, *bjj;
641   PetscInt           bs;
642   PetscInt64         rstart, nz, i, j, k, m, jj, irow, countA, countB;
643   PetscMUMPSInt     *row, *col;
644   const PetscScalar *av, *bv, *v1, *v2;
645   PetscScalar       *val;
646   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ *)A->data;
647   Mat_SeqSBAIJ      *aa  = (Mat_SeqSBAIJ *)(mat->A)->data;
648   Mat_SeqBAIJ       *bb  = (Mat_SeqBAIJ *)(mat->B)->data;
649   const PetscInt     bs2 = aa->bs2, mbs = aa->mbs;
650 #if defined(PETSC_USE_COMPLEX)
651   PetscBool hermitian, isset;
652 #endif
653 
654   PetscFunctionBegin;
655 #if defined(PETSC_USE_COMPLEX)
656   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
657   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
658 #endif
659   PetscCall(MatGetBlockSize(A, &bs));
660   rstart = A->rmap->rstart;
661   ai     = aa->i;
662   aj     = aa->j;
663   bi     = bb->i;
664   bj     = bb->j;
665   av     = aa->a;
666   bv     = bb->a;
667 
668   garray = mat->garray;
669 
670   if (reuse == MAT_INITIAL_MATRIX) {
671     nz = (aa->nz + bb->nz) * bs2; /* just a conservative estimate */
672     PetscCall(PetscMalloc2(nz, &row, nz, &col));
673     PetscCall(PetscMalloc1(nz, &val));
674     /* can not decide the exact mumps->nnz now because of the SBAIJ */
675     mumps->irn = row;
676     mumps->jcn = col;
677     mumps->val = mumps->val_alloc = val;
678   } else {
679     val = mumps->val;
680   }
681 
682   jj   = 0;
683   irow = rstart;
684   for (i = 0; i < mbs; i++) {
685     ajj    = aj + ai[i]; /* ptr to the beginning of this row */
686     countA = ai[i + 1] - ai[i];
687     countB = bi[i + 1] - bi[i];
688     bjj    = bj + bi[i];
689     v1     = av + ai[i] * bs2;
690     v2     = bv + bi[i] * bs2;
691 
692     if (bs > 1) {
693       /* A-part */
694       for (j = 0; j < countA; j++) {
695         for (k = 0; k < bs; k++) {
696           for (m = 0; m < bs; m++) {
697             if (rstart + ajj[j] * bs > irow || k >= m) {
698               if (reuse == MAT_INITIAL_MATRIX) {
699                 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj]));
700                 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] * bs + k + shift, &col[jj]));
701               }
702               val[jj++] = v1[j * bs2 + m + k * bs];
703             }
704           }
705         }
706       }
707 
708       /* B-part */
709       for (j = 0; j < countB; j++) {
710         for (k = 0; k < bs; k++) {
711           for (m = 0; m < bs; m++) {
712             if (reuse == MAT_INITIAL_MATRIX) {
713               PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj]));
714               PetscCall(PetscMUMPSIntCast(garray[bjj[j]] * bs + k + shift, &col[jj]));
715             }
716             val[jj++] = v2[j * bs2 + m + k * bs];
717           }
718         }
719       }
720     } else {
721       /* A-part */
722       for (j = 0; j < countA; j++) {
723         if (reuse == MAT_INITIAL_MATRIX) {
724           PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
725           PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj]));
726         }
727         val[jj++] = v1[j];
728       }
729 
730       /* B-part */
731       for (j = 0; j < countB; j++) {
732         if (reuse == MAT_INITIAL_MATRIX) {
733           PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
734           PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]));
735         }
736         val[jj++] = v2[j];
737       }
738     }
739     irow += bs;
740   }
741   mumps->nnz = jj;
742   PetscFunctionReturn(PETSC_SUCCESS);
743 }
744 
745 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
746 {
747   const PetscInt    *ai, *aj, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj;
748   PetscInt64         rstart, nz, i, j, jj, irow, countA, countB;
749   PetscMUMPSInt     *row, *col;
750   const PetscScalar *av, *bv, *v1, *v2;
751   PetscScalar       *val;
752   Mat                Ad, Ao;
753   Mat_SeqAIJ        *aa;
754   Mat_SeqAIJ        *bb;
755 
756   PetscFunctionBegin;
757   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray));
758   PetscCall(MatSeqAIJGetArrayRead(Ad, &av));
759   PetscCall(MatSeqAIJGetArrayRead(Ao, &bv));
760 
761   aa = (Mat_SeqAIJ *)(Ad)->data;
762   bb = (Mat_SeqAIJ *)(Ao)->data;
763   ai = aa->i;
764   aj = aa->j;
765   bi = bb->i;
766   bj = bb->j;
767 
768   rstart = A->rmap->rstart;
769 
770   if (reuse == MAT_INITIAL_MATRIX) {
771     nz = (PetscInt64)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */
772     PetscCall(PetscMalloc2(nz, &row, nz, &col));
773     PetscCall(PetscMalloc1(nz, &val));
774     mumps->nnz = nz;
775     mumps->irn = row;
776     mumps->jcn = col;
777     mumps->val = mumps->val_alloc = val;
778   } else {
779     val = mumps->val;
780   }
781 
782   jj   = 0;
783   irow = rstart;
784   for (i = 0; i < m; i++) {
785     ajj    = aj + ai[i]; /* ptr to the beginning of this row */
786     countA = ai[i + 1] - ai[i];
787     countB = bi[i + 1] - bi[i];
788     bjj    = bj + bi[i];
789     v1     = av + ai[i];
790     v2     = bv + bi[i];
791 
792     /* A-part */
793     for (j = 0; j < countA; j++) {
794       if (reuse == MAT_INITIAL_MATRIX) {
795         PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
796         PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj]));
797       }
798       val[jj++] = v1[j];
799     }
800 
801     /* B-part */
802     for (j = 0; j < countB; j++) {
803       if (reuse == MAT_INITIAL_MATRIX) {
804         PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
805         PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]));
806       }
807       val[jj++] = v2[j];
808     }
809     irow++;
810   }
811   PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av));
812   PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv));
813   PetscFunctionReturn(PETSC_SUCCESS);
814 }
815 
816 PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
817 {
818   Mat_MPIBAIJ       *mat = (Mat_MPIBAIJ *)A->data;
819   Mat_SeqBAIJ       *aa  = (Mat_SeqBAIJ *)(mat->A)->data;
820   Mat_SeqBAIJ       *bb  = (Mat_SeqBAIJ *)(mat->B)->data;
821   const PetscInt    *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j, *ajj, *bjj;
822   const PetscInt    *garray = mat->garray, mbs = mat->mbs, rstart = A->rmap->rstart;
823   const PetscInt     bs2 = mat->bs2;
824   PetscInt           bs;
825   PetscInt64         nz, i, j, k, n, jj, irow, countA, countB, idx;
826   PetscMUMPSInt     *row, *col;
827   const PetscScalar *av = aa->a, *bv = bb->a, *v1, *v2;
828   PetscScalar       *val;
829 
830   PetscFunctionBegin;
831   PetscCall(MatGetBlockSize(A, &bs));
832   if (reuse == MAT_INITIAL_MATRIX) {
833     nz = bs2 * (aa->nz + bb->nz);
834     PetscCall(PetscMalloc2(nz, &row, nz, &col));
835     PetscCall(PetscMalloc1(nz, &val));
836     mumps->nnz = nz;
837     mumps->irn = row;
838     mumps->jcn = col;
839     mumps->val = mumps->val_alloc = val;
840   } else {
841     val = mumps->val;
842   }
843 
844   jj   = 0;
845   irow = rstart;
846   for (i = 0; i < mbs; i++) {
847     countA = ai[i + 1] - ai[i];
848     countB = bi[i + 1] - bi[i];
849     ajj    = aj + ai[i];
850     bjj    = bj + bi[i];
851     v1     = av + bs2 * ai[i];
852     v2     = bv + bs2 * bi[i];
853 
854     idx = 0;
855     /* A-part */
856     for (k = 0; k < countA; k++) {
857       for (j = 0; j < bs; j++) {
858         for (n = 0; n < bs; n++) {
859           if (reuse == MAT_INITIAL_MATRIX) {
860             PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj]));
861             PetscCall(PetscMUMPSIntCast(rstart + bs * ajj[k] + j + shift, &col[jj]));
862           }
863           val[jj++] = v1[idx++];
864         }
865       }
866     }
867 
868     idx = 0;
869     /* B-part */
870     for (k = 0; k < countB; k++) {
871       for (j = 0; j < bs; j++) {
872         for (n = 0; n < bs; n++) {
873           if (reuse == MAT_INITIAL_MATRIX) {
874             PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj]));
875             PetscCall(PetscMUMPSIntCast(bs * garray[bjj[k]] + j + shift, &col[jj]));
876           }
877           val[jj++] = v2[idx++];
878         }
879       }
880     }
881     irow += bs;
882   }
883   PetscFunctionReturn(PETSC_SUCCESS);
884 }
885 
886 PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
887 {
888   const PetscInt    *ai, *aj, *adiag, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj;
889   PetscInt64         rstart, nz, nza, nzb, i, j, jj, irow, countA, countB;
890   PetscMUMPSInt     *row, *col;
891   const PetscScalar *av, *bv, *v1, *v2;
892   PetscScalar       *val;
893   Mat                Ad, Ao;
894   Mat_SeqAIJ        *aa;
895   Mat_SeqAIJ        *bb;
896 #if defined(PETSC_USE_COMPLEX)
897   PetscBool hermitian, isset;
898 #endif
899 
900   PetscFunctionBegin;
901 #if defined(PETSC_USE_COMPLEX)
902   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
903   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
904 #endif
905   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray));
906   PetscCall(MatSeqAIJGetArrayRead(Ad, &av));
907   PetscCall(MatSeqAIJGetArrayRead(Ao, &bv));
908 
909   aa    = (Mat_SeqAIJ *)(Ad)->data;
910   bb    = (Mat_SeqAIJ *)(Ao)->data;
911   ai    = aa->i;
912   aj    = aa->j;
913   adiag = aa->diag;
914   bi    = bb->i;
915   bj    = bb->j;
916 
917   rstart = A->rmap->rstart;
918 
919   if (reuse == MAT_INITIAL_MATRIX) {
920     nza = 0; /* num of upper triangular entries in mat->A, including diagonals */
921     nzb = 0; /* num of upper triangular entries in mat->B */
922     for (i = 0; i < m; i++) {
923       nza += (ai[i + 1] - adiag[i]);
924       countB = bi[i + 1] - bi[i];
925       bjj    = bj + bi[i];
926       for (j = 0; j < countB; j++) {
927         if (garray[bjj[j]] > rstart) nzb++;
928       }
929     }
930 
931     nz = nza + nzb; /* total nz of upper triangular part of mat */
932     PetscCall(PetscMalloc2(nz, &row, nz, &col));
933     PetscCall(PetscMalloc1(nz, &val));
934     mumps->nnz = nz;
935     mumps->irn = row;
936     mumps->jcn = col;
937     mumps->val = mumps->val_alloc = val;
938   } else {
939     val = mumps->val;
940   }
941 
942   jj   = 0;
943   irow = rstart;
944   for (i = 0; i < m; i++) {
945     ajj    = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */
946     v1     = av + adiag[i];
947     countA = ai[i + 1] - adiag[i];
948     countB = bi[i + 1] - bi[i];
949     bjj    = bj + bi[i];
950     v2     = bv + bi[i];
951 
952     /* A-part */
953     for (j = 0; j < countA; j++) {
954       if (reuse == MAT_INITIAL_MATRIX) {
955         PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
956         PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj]));
957       }
958       val[jj++] = v1[j];
959     }
960 
961     /* B-part */
962     for (j = 0; j < countB; j++) {
963       if (garray[bjj[j]] > rstart) {
964         if (reuse == MAT_INITIAL_MATRIX) {
965           PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
966           PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]));
967         }
968         val[jj++] = v2[j];
969       }
970     }
971     irow++;
972   }
973   PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av));
974   PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv));
975   PetscFunctionReturn(PETSC_SUCCESS);
976 }
977 
978 PetscErrorCode MatConvertToTriples_nest_xaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
979 {
980   Mat     **mats;
981   PetscInt  nr, nc;
982   PetscBool chol = mumps->sym ? PETSC_TRUE : PETSC_FALSE;
983 
984   PetscFunctionBegin;
985   PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats));
986   if (reuse == MAT_INITIAL_MATRIX) {
987     PetscMUMPSInt *irns, *jcns;
988     PetscScalar   *vals;
989     PetscInt64     totnnz, cumnnz, maxnnz;
990     PetscInt      *pjcns_w;
991     IS            *rows, *cols;
992     PetscInt     **rows_idx, **cols_idx;
993 
994     cumnnz = 0;
995     maxnnz = 0;
996     PetscCall(PetscMalloc2(nr * nc, &mumps->nest_vals_start, nr * nc, &mumps->nest_convert_to_triples));
997     for (PetscInt r = 0; r < nr; r++) {
998       for (PetscInt c = 0; c < nc; c++) {
999         Mat sub = mats[r][c];
1000 
1001         mumps->nest_convert_to_triples[r * nc + c] = NULL;
1002         if (chol && c < r) continue; /* skip lower-triangular block for Cholesky */
1003         if (sub) {
1004           PetscErrorCode (*convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *) = NULL;
1005           PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ;
1006           MatInfo   info;
1007 
1008           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ));
1009           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ));
1010           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ));
1011           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ));
1012           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ));
1013           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ));
1014 
1015           if (chol) {
1016             if (r == c) {
1017               if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqsbaij;
1018               else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpisbaij;
1019               else if (isSeqSBAIJ) convert_to_triples = MatConvertToTriples_seqsbaij_seqsbaij;
1020               else if (isMPISBAIJ) convert_to_triples = MatConvertToTriples_mpisbaij_mpisbaij;
1021             } else {
1022               if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij;
1023               else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij;
1024               else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij;
1025               else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij;
1026             }
1027           } else {
1028             if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij;
1029             else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij;
1030             else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij;
1031             else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij;
1032           }
1033           PetscCheck(convert_to_triples, PetscObjectComm((PetscObject)sub), PETSC_ERR_SUP, "Not for block of type %s", ((PetscObject)sub)->type_name);
1034           mumps->nest_convert_to_triples[r * nc + c] = convert_to_triples;
1035           PetscCall(MatGetInfo(sub, MAT_LOCAL, &info));
1036           cumnnz += (PetscInt64)info.nz_used; /* can be overestimated for Cholesky */
1037           maxnnz = PetscMax(maxnnz, info.nz_used);
1038         }
1039       }
1040     }
1041 
1042     /* Allocate total COO */
1043     totnnz = cumnnz;
1044     PetscCall(PetscMalloc2(totnnz, &irns, totnnz, &jcns));
1045     PetscCall(PetscMalloc1(totnnz, &vals));
1046 
1047     /* Handle rows and column maps
1048        We directly map rows and use an SF for the columns */
1049     PetscCall(PetscMalloc4(nr, &rows, nc, &cols, nr, &rows_idx, nc, &cols_idx));
1050     PetscCall(MatNestGetISs(A, rows, cols));
1051     for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetIndices(rows[r], (const PetscInt **)&rows_idx[r]));
1052     for (PetscInt c = 0; c < nc; c++) PetscCall(ISGetIndices(cols[c], (const PetscInt **)&cols_idx[c]));
1053     if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscMalloc1(maxnnz, &pjcns_w));
1054     else (void)(maxnnz);
1055 
1056     cumnnz = 0;
1057     for (PetscInt r = 0; r < nr; r++) {
1058       for (PetscInt c = 0; c < nc; c++) {
1059         Mat             sub  = mats[r][c];
1060         const PetscInt *ridx = rows_idx[r];
1061         PetscInt        rst;
1062         PetscSF         csf;
1063 
1064         mumps->nest_vals_start[r * nc + c] = cumnnz;
1065         if (!mumps->nest_convert_to_triples[r * nc + c]) continue;
1066 
1067         /* Directly use the mumps datastructure and use C ordering for now */
1068         PetscCall((*mumps->nest_convert_to_triples[r * nc + c])(sub, 0, MAT_INITIAL_MATRIX, mumps));
1069 
1070         /* Import values to full COO */
1071         PetscCall(PetscArraycpy(vals + cumnnz, mumps->val, mumps->nnz));
1072 
1073         /* Direct map of rows */
1074         PetscCall(MatGetOwnershipRange(sub, &rst, NULL));
1075         for (PetscInt k = 0; k < mumps->nnz; k++) PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &irns[cumnnz + k]));
1076 
1077         /* Communicate column indices */
1078         if (PetscDefined(USE_64BIT_INDICES)) {
1079           for (PetscInt k = 0; k < mumps->nnz; k++) pjcns_w[k] = mumps->jcn[k];
1080         } else {
1081           pjcns_w = (PetscInt *)(jcns + cumnnz); /* This cast is needed only to silence warnings for 64bit integers builds */
1082           PetscCall(PetscArraycpy(pjcns_w, mumps->jcn, mumps->nnz));
1083         }
1084 
1085         /* This could have been done with a single SF but it would have complicated the code a lot. */
1086         PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &csf));
1087         PetscCall(PetscSFSetGraphLayout(csf, sub->cmap, mumps->nnz, NULL, PETSC_OWN_POINTER, pjcns_w));
1088         PetscCall(PetscSFBcastBegin(csf, MPIU_INT, cols_idx[c], pjcns_w, MPI_REPLACE));
1089         PetscCall(PetscSFBcastEnd(csf, MPIU_INT, cols_idx[c], pjcns_w, MPI_REPLACE));
1090         PetscCall(PetscSFDestroy(&csf));
1091 
1092         /* Apply Fortran index shifting for columns */
1093         for (PetscInt k = 0; k < mumps->nnz; k++) PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &jcns[cumnnz + k]));
1094 
1095         /* Shift new starting point and sanity check */
1096         cumnnz += mumps->nnz;
1097         PetscCheck(cumnnz <= totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected number of nonzeros %" PetscInt64_FMT " != %" PetscInt64_FMT, cumnnz, totnnz);
1098 
1099         /* Free scratch memory */
1100         PetscCall(PetscFree2(mumps->irn, mumps->jcn));
1101         PetscCall(PetscFree(mumps->val_alloc));
1102         mumps->val = NULL;
1103         mumps->nnz = 0;
1104       }
1105     }
1106     if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscFree(pjcns_w));
1107     for (PetscInt r = 0; r < nr; r++) PetscCall(ISRestoreIndices(rows[r], (const PetscInt **)&rows_idx[r]));
1108     for (PetscInt c = 0; c < nc; c++) PetscCall(ISRestoreIndices(cols[c], (const PetscInt **)&cols_idx[c]));
1109     PetscCall(PetscFree4(rows, cols, rows_idx, cols_idx));
1110     if (!chol) PetscCheck(cumnnz == totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different number of nonzeros %" PetscInt64_FMT " != %" PetscInt64_FMT, cumnnz, totnnz);
1111 
1112     /* Set pointers for final MUMPS data structure */
1113     mumps->nest_vals = vals;
1114     mumps->val_alloc = NULL; /* do not use val_alloc since it may be reallocated with the OMP callpath */
1115     mumps->val       = vals;
1116     mumps->irn       = irns;
1117     mumps->jcn       = jcns;
1118     mumps->nnz       = cumnnz;
1119   } else {
1120     PetscScalar *oval = mumps->nest_vals;
1121     for (PetscInt r = 0; r < nr; r++) {
1122       for (PetscInt c = 0; c < nc; c++) {
1123         if (!mumps->nest_convert_to_triples[r * nc + c]) continue;
1124         mumps->val = oval + mumps->nest_vals_start[r * nc + c];
1125         PetscCall((*mumps->nest_convert_to_triples[r * nc + c])(mats[r][c], shift, MAT_REUSE_MATRIX, mumps));
1126       }
1127     }
1128     mumps->val = oval;
1129   }
1130   PetscFunctionReturn(PETSC_SUCCESS);
1131 }
1132 
1133 PetscErrorCode MatDestroy_MUMPS(Mat A)
1134 {
1135   Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;
1136 
1137   PetscFunctionBegin;
1138   PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc));
1139   PetscCall(VecScatterDestroy(&mumps->scat_rhs));
1140   PetscCall(VecScatterDestroy(&mumps->scat_sol));
1141   PetscCall(VecDestroy(&mumps->b_seq));
1142   PetscCall(VecDestroy(&mumps->x_seq));
1143   PetscCall(PetscFree(mumps->id.perm_in));
1144   PetscCall(PetscFree2(mumps->irn, mumps->jcn));
1145   PetscCall(PetscFree(mumps->val_alloc));
1146   PetscCall(PetscFree(mumps->info));
1147   PetscCall(PetscFree(mumps->ICNTL_pre));
1148   PetscCall(PetscFree(mumps->CNTL_pre));
1149   PetscCall(MatMumpsResetSchur_Private(mumps));
1150   if (mumps->id.job != JOB_NULL) { /* cannot call PetscMUMPS_c() if JOB_INIT has never been called for this instance */
1151     mumps->id.job = JOB_END;
1152     PetscMUMPS_c(mumps);
1153     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in MatDestroy_MUMPS: INFOG(1)=%d", mumps->id.INFOG(1));
1154     if (mumps->mumps_comm != MPI_COMM_NULL) {
1155       if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) PetscCallMPI(MPI_Comm_free(&mumps->mumps_comm));
1156       else PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A), &mumps->mumps_comm));
1157     }
1158   }
1159 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1160   if (mumps->use_petsc_omp_support) {
1161     PetscCall(PetscOmpCtrlDestroy(&mumps->omp_ctrl));
1162     PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf));
1163     PetscCall(PetscFree3(mumps->rhs_nrow, mumps->rhs_recvcounts, mumps->rhs_disps));
1164   }
1165 #endif
1166   PetscCall(PetscFree(mumps->ia_alloc));
1167   PetscCall(PetscFree(mumps->ja_alloc));
1168   PetscCall(PetscFree(mumps->recvcount));
1169   PetscCall(PetscFree(mumps->reqs));
1170   PetscCall(PetscFree(mumps->irhs_loc));
1171   PetscCall(PetscFree2(mumps->nest_vals_start, mumps->nest_convert_to_triples));
1172   PetscCall(PetscFree(mumps->nest_vals));
1173   PetscCall(PetscFree(A->data));
1174 
1175   /* clear composed functions */
1176   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
1177   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorSetSchurIS_C", NULL));
1178   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorCreateSchurComplement_C", NULL));
1179   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetIcntl_C", NULL));
1180   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetIcntl_C", NULL));
1181   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetCntl_C", NULL));
1182   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetCntl_C", NULL));
1183   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfo_C", NULL));
1184   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfog_C", NULL));
1185   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfo_C", NULL));
1186   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfog_C", NULL));
1187   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetNullPivots_C", NULL));
1188   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverse_C", NULL));
1189   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverseTranspose_C", NULL));
1190   PetscFunctionReturn(PETSC_SUCCESS);
1191 }
1192 
1193 /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */
1194 static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A, PetscInt nrhs, const PetscScalar *array)
1195 {
1196   Mat_MUMPS        *mumps   = (Mat_MUMPS *)A->data;
1197   const PetscMPIInt ompsize = mumps->omp_comm_size;
1198   PetscInt          i, m, M, rstart;
1199 
1200   PetscFunctionBegin;
1201   PetscCall(MatGetSize(A, &M, NULL));
1202   PetscCall(MatGetLocalSize(A, &m, NULL));
1203   PetscCheck(M <= PETSC_MUMPS_INT_MAX, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt");
1204   if (ompsize == 1) {
1205     if (!mumps->irhs_loc) {
1206       mumps->nloc_rhs = m;
1207       PetscCall(PetscMalloc1(m, &mumps->irhs_loc));
1208       PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
1209       for (i = 0; i < m; i++) mumps->irhs_loc[i] = rstart + i + 1; /* use 1-based indices */
1210     }
1211     mumps->id.rhs_loc = (MumpsScalar *)array;
1212   } else {
1213 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1214     const PetscInt *ranges;
1215     PetscMPIInt     j, k, sendcount, *petsc_ranks, *omp_ranks;
1216     MPI_Group       petsc_group, omp_group;
1217     PetscScalar    *recvbuf = NULL;
1218 
1219     if (mumps->is_omp_master) {
1220       /* Lazily initialize the omp stuff for distributed rhs */
1221       if (!mumps->irhs_loc) {
1222         PetscCall(PetscMalloc2(ompsize, &omp_ranks, ompsize, &petsc_ranks));
1223         PetscCall(PetscMalloc3(ompsize, &mumps->rhs_nrow, ompsize, &mumps->rhs_recvcounts, ompsize, &mumps->rhs_disps));
1224         PetscCallMPI(MPI_Comm_group(mumps->petsc_comm, &petsc_group));
1225         PetscCallMPI(MPI_Comm_group(mumps->omp_comm, &omp_group));
1226         for (j = 0; j < ompsize; j++) omp_ranks[j] = j;
1227         PetscCallMPI(MPI_Group_translate_ranks(omp_group, ompsize, omp_ranks, petsc_group, petsc_ranks));
1228 
1229         /* Populate mumps->irhs_loc[], rhs_nrow[] */
1230         mumps->nloc_rhs = 0;
1231         PetscCall(MatGetOwnershipRanges(A, &ranges));
1232         for (j = 0; j < ompsize; j++) {
1233           mumps->rhs_nrow[j] = ranges[petsc_ranks[j] + 1] - ranges[petsc_ranks[j]];
1234           mumps->nloc_rhs += mumps->rhs_nrow[j];
1235         }
1236         PetscCall(PetscMalloc1(mumps->nloc_rhs, &mumps->irhs_loc));
1237         for (j = k = 0; j < ompsize; j++) {
1238           for (i = ranges[petsc_ranks[j]]; i < ranges[petsc_ranks[j] + 1]; i++, k++) mumps->irhs_loc[k] = i + 1; /* uses 1-based indices */
1239         }
1240 
1241         PetscCall(PetscFree2(omp_ranks, petsc_ranks));
1242         PetscCallMPI(MPI_Group_free(&petsc_group));
1243         PetscCallMPI(MPI_Group_free(&omp_group));
1244       }
1245 
1246       /* Realloc buffers when current nrhs is bigger than what we have met */
1247       if (nrhs > mumps->max_nrhs) {
1248         PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf));
1249         PetscCall(PetscMalloc2(mumps->nloc_rhs * nrhs, &mumps->rhs_loc, mumps->nloc_rhs * nrhs, &mumps->rhs_recvbuf));
1250         mumps->max_nrhs = nrhs;
1251       }
1252 
1253       /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */
1254       for (j = 0; j < ompsize; j++) PetscCall(PetscMPIIntCast(mumps->rhs_nrow[j] * nrhs, &mumps->rhs_recvcounts[j]));
1255       mumps->rhs_disps[0] = 0;
1256       for (j = 1; j < ompsize; j++) {
1257         mumps->rhs_disps[j] = mumps->rhs_disps[j - 1] + mumps->rhs_recvcounts[j - 1];
1258         PetscCheck(mumps->rhs_disps[j] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscMPIInt overflow!");
1259       }
1260       recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */
1261     }
1262 
1263     PetscCall(PetscMPIIntCast(m * nrhs, &sendcount));
1264     PetscCallMPI(MPI_Gatherv(array, sendcount, MPIU_SCALAR, recvbuf, mumps->rhs_recvcounts, mumps->rhs_disps, MPIU_SCALAR, 0, mumps->omp_comm));
1265 
1266     if (mumps->is_omp_master) {
1267       if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */
1268         PetscScalar *dst, *dstbase = mumps->rhs_loc;
1269         for (j = 0; j < ompsize; j++) {
1270           const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j];
1271           dst                    = dstbase;
1272           for (i = 0; i < nrhs; i++) {
1273             PetscCall(PetscArraycpy(dst, src, mumps->rhs_nrow[j]));
1274             src += mumps->rhs_nrow[j];
1275             dst += mumps->nloc_rhs;
1276           }
1277           dstbase += mumps->rhs_nrow[j];
1278         }
1279       }
1280       mumps->id.rhs_loc = (MumpsScalar *)mumps->rhs_loc;
1281     }
1282 #endif /* PETSC_HAVE_OPENMP_SUPPORT */
1283   }
1284   mumps->id.nrhs     = nrhs;
1285   mumps->id.nloc_rhs = mumps->nloc_rhs;
1286   mumps->id.lrhs_loc = mumps->nloc_rhs;
1287   mumps->id.irhs_loc = mumps->irhs_loc;
1288   PetscFunctionReturn(PETSC_SUCCESS);
1289 }
1290 
1291 PetscErrorCode MatSolve_MUMPS(Mat A, Vec b, Vec x)
1292 {
1293   Mat_MUMPS         *mumps  = (Mat_MUMPS *)A->data;
1294   const PetscScalar *rarray = NULL;
1295   PetscScalar       *array;
1296   IS                 is_iden, is_petsc;
1297   PetscInt           i;
1298   PetscBool          second_solve = PETSC_FALSE;
1299   static PetscBool   cite1 = PETSC_FALSE, cite2 = PETSC_FALSE;
1300 
1301   PetscFunctionBegin;
1302   PetscCall(PetscCitationsRegister("@article{MUMPS01,\n  author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n  title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n  journal = {SIAM "
1303                                    "Journal on Matrix Analysis and Applications},\n  volume = {23},\n  number = {1},\n  pages = {15--41},\n  year = {2001}\n}\n",
1304                                    &cite1));
1305   PetscCall(PetscCitationsRegister("@article{MUMPS02,\n  author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n  title = {Hybrid scheduling for the parallel solution of linear systems},\n  journal = {Parallel "
1306                                    "Computing},\n  volume = {32},\n  number = {2},\n  pages = {136--156},\n  year = {2006}\n}\n",
1307                                    &cite2));
1308 
1309   if (A->factorerrortype) {
1310     PetscCall(PetscInfo(A, "MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1311     PetscCall(VecSetInf(x));
1312     PetscFunctionReturn(PETSC_SUCCESS);
1313   }
1314 
1315   mumps->id.nrhs = 1;
1316   if (mumps->petsc_size > 1) {
1317     if (mumps->ICNTL20 == 10) {
1318       mumps->id.ICNTL(20) = 10; /* dense distributed RHS */
1319       PetscCall(VecGetArrayRead(b, &rarray));
1320       PetscCall(MatMumpsSetUpDistRHSInfo(A, 1, rarray));
1321     } else {
1322       mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential rhs vector*/
1323       PetscCall(VecScatterBegin(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD));
1324       PetscCall(VecScatterEnd(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD));
1325       if (!mumps->myid) {
1326         PetscCall(VecGetArray(mumps->b_seq, &array));
1327         mumps->id.rhs = (MumpsScalar *)array;
1328       }
1329     }
1330   } else {                   /* petsc_size == 1 */
1331     mumps->id.ICNTL(20) = 0; /* dense centralized RHS */
1332     PetscCall(VecCopy(b, x));
1333     PetscCall(VecGetArray(x, &array));
1334     mumps->id.rhs = (MumpsScalar *)array;
1335   }
1336 
1337   /*
1338      handle condensation step of Schur complement (if any)
1339      We set by default ICNTL(26) == -1 when Schur indices have been provided by the user.
1340      According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase
1341      Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system.
1342      This requires an extra call to PetscMUMPS_c and the computation of the factors for S
1343   */
1344   if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) {
1345     PetscCheck(mumps->petsc_size <= 1, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc");
1346     second_solve = PETSC_TRUE;
1347     PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE));
1348   }
1349   /* solve phase */
1350   mumps->id.job = JOB_SOLVE;
1351   PetscMUMPS_c(mumps);
1352   PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1));
1353 
1354   /* handle expansion step of Schur complement (if any) */
1355   if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE));
1356 
1357   if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */
1358     if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) {
1359       /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */
1360       PetscCall(VecScatterDestroy(&mumps->scat_sol));
1361     }
1362     if (!mumps->scat_sol) { /* create scatter scat_sol */
1363       PetscInt *isol2_loc = NULL;
1364       PetscCall(ISCreateStride(PETSC_COMM_SELF, mumps->id.lsol_loc, 0, 1, &is_iden)); /* from */
1365       PetscCall(PetscMalloc1(mumps->id.lsol_loc, &isol2_loc));
1366       for (i = 0; i < mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i] - 1;                        /* change Fortran style to C style */
1367       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, mumps->id.lsol_loc, isol2_loc, PETSC_OWN_POINTER, &is_petsc)); /* to */
1368       PetscCall(VecScatterCreate(mumps->x_seq, is_iden, x, is_petsc, &mumps->scat_sol));
1369       PetscCall(ISDestroy(&is_iden));
1370       PetscCall(ISDestroy(&is_petsc));
1371       mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */
1372     }
1373 
1374     PetscCall(VecScatterBegin(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD));
1375     PetscCall(VecScatterEnd(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD));
1376   }
1377 
1378   if (mumps->petsc_size > 1) {
1379     if (mumps->ICNTL20 == 10) {
1380       PetscCall(VecRestoreArrayRead(b, &rarray));
1381     } else if (!mumps->myid) {
1382       PetscCall(VecRestoreArray(mumps->b_seq, &array));
1383     }
1384   } else PetscCall(VecRestoreArray(x, &array));
1385 
1386   PetscCall(PetscLogFlops(2.0 * mumps->id.RINFO(3)));
1387   PetscFunctionReturn(PETSC_SUCCESS);
1388 }
1389 
1390 PetscErrorCode MatSolveTranspose_MUMPS(Mat A, Vec b, Vec x)
1391 {
1392   Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;
1393 
1394   PetscFunctionBegin;
1395   mumps->id.ICNTL(9) = 0;
1396   PetscCall(MatSolve_MUMPS(A, b, x));
1397   mumps->id.ICNTL(9) = 1;
1398   PetscFunctionReturn(PETSC_SUCCESS);
1399 }
1400 
1401 PetscErrorCode MatMatSolve_MUMPS(Mat A, Mat B, Mat X)
1402 {
1403   Mat                Bt = NULL;
1404   PetscBool          denseX, denseB, flg, flgT;
1405   Mat_MUMPS         *mumps = (Mat_MUMPS *)A->data;
1406   PetscInt           i, nrhs, M;
1407   PetscScalar       *array;
1408   const PetscScalar *rbray;
1409   PetscInt           lsol_loc, nlsol_loc, *idxx, iidx = 0;
1410   PetscMUMPSInt     *isol_loc, *isol_loc_save;
1411   PetscScalar       *bray, *sol_loc, *sol_loc_save;
1412   IS                 is_to, is_from;
1413   PetscInt           k, proc, j, m, myrstart;
1414   const PetscInt    *rstart;
1415   Vec                v_mpi, msol_loc;
1416   VecScatter         scat_sol;
1417   Vec                b_seq;
1418   VecScatter         scat_rhs;
1419   PetscScalar       *aa;
1420   PetscInt           spnr, *ia, *ja;
1421   Mat_MPIAIJ        *b = NULL;
1422 
1423   PetscFunctionBegin;
1424   PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &denseX, MATSEQDENSE, MATMPIDENSE, NULL));
1425   PetscCheck(denseX, PetscObjectComm((PetscObject)X), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix");
1426 
1427   PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &denseB, MATSEQDENSE, MATMPIDENSE, NULL));
1428   if (denseB) {
1429     PetscCheck(B->rmap->n == X->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Matrix B and X must have same row distribution");
1430     mumps->id.ICNTL(20) = 0; /* dense RHS */
1431   } else {                   /* sparse B */
1432     PetscCheck(X != B, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_IDN, "X and B must be different matrices");
1433     PetscCall(PetscObjectTypeCompare((PetscObject)B, MATTRANSPOSEVIRTUAL, &flgT));
1434     if (flgT) { /* input B is transpose of actual RHS matrix,
1435                  because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */
1436       PetscCall(MatTransposeGetMat(B, &Bt));
1437     } else SETERRQ(PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_WRONG, "Matrix B must be MATTRANSPOSEVIRTUAL matrix");
1438     mumps->id.ICNTL(20) = 1; /* sparse RHS */
1439   }
1440 
1441   PetscCall(MatGetSize(B, &M, &nrhs));
1442   mumps->id.nrhs = nrhs;
1443   mumps->id.lrhs = M;
1444   mumps->id.rhs  = NULL;
1445 
1446   if (mumps->petsc_size == 1) {
1447     PetscScalar *aa;
1448     PetscInt     spnr, *ia, *ja;
1449     PetscBool    second_solve = PETSC_FALSE;
1450 
1451     PetscCall(MatDenseGetArray(X, &array));
1452     mumps->id.rhs = (MumpsScalar *)array;
1453 
1454     if (denseB) {
1455       /* copy B to X */
1456       PetscCall(MatDenseGetArrayRead(B, &rbray));
1457       PetscCall(PetscArraycpy(array, rbray, M * nrhs));
1458       PetscCall(MatDenseRestoreArrayRead(B, &rbray));
1459     } else { /* sparse B */
1460       PetscCall(MatSeqAIJGetArray(Bt, &aa));
1461       PetscCall(MatGetRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
1462       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
1463       PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs));
1464       mumps->id.rhs_sparse = (MumpsScalar *)aa;
1465     }
1466     /* handle condensation step of Schur complement (if any) */
1467     if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) {
1468       second_solve = PETSC_TRUE;
1469       PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE));
1470     }
1471     /* solve phase */
1472     mumps->id.job = JOB_SOLVE;
1473     PetscMUMPS_c(mumps);
1474     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1));
1475 
1476     /* handle expansion step of Schur complement (if any) */
1477     if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE));
1478     if (!denseB) { /* sparse B */
1479       PetscCall(MatSeqAIJRestoreArray(Bt, &aa));
1480       PetscCall(MatRestoreRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
1481       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure");
1482     }
1483     PetscCall(MatDenseRestoreArray(X, &array));
1484     PetscFunctionReturn(PETSC_SUCCESS);
1485   }
1486 
1487   /* parallel case: MUMPS requires rhs B to be centralized on the host! */
1488   PetscCheck(mumps->petsc_size <= 1 || !mumps->id.ICNTL(19), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc");
1489 
1490   /* create msol_loc to hold mumps local solution */
1491   isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */
1492   sol_loc_save  = (PetscScalar *)mumps->id.sol_loc;
1493 
1494   lsol_loc  = mumps->id.lsol_loc;
1495   nlsol_loc = nrhs * lsol_loc; /* length of sol_loc */
1496   PetscCall(PetscMalloc2(nlsol_loc, &sol_loc, lsol_loc, &isol_loc));
1497   mumps->id.sol_loc  = (MumpsScalar *)sol_loc;
1498   mumps->id.isol_loc = isol_loc;
1499 
1500   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nlsol_loc, (PetscScalar *)sol_loc, &msol_loc));
1501 
1502   if (denseB) {
1503     if (mumps->ICNTL20 == 10) {
1504       mumps->id.ICNTL(20) = 10; /* dense distributed RHS */
1505       PetscCall(MatDenseGetArrayRead(B, &rbray));
1506       PetscCall(MatMumpsSetUpDistRHSInfo(A, nrhs, rbray));
1507       PetscCall(MatDenseRestoreArrayRead(B, &rbray));
1508       PetscCall(MatGetLocalSize(B, &m, NULL));
1509       PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhs * M, NULL, &v_mpi));
1510     } else {
1511       mumps->id.ICNTL(20) = 0; /* dense centralized RHS */
1512       /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in
1513         very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank
1514         0, re-arrange B into desired order, which is a local operation.
1515       */
1516 
1517       /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */
1518       /* wrap dense rhs matrix B into a vector v_mpi */
1519       PetscCall(MatGetLocalSize(B, &m, NULL));
1520       PetscCall(MatDenseGetArray(B, &bray));
1521       PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhs * M, (const PetscScalar *)bray, &v_mpi));
1522       PetscCall(MatDenseRestoreArray(B, &bray));
1523 
1524       /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */
1525       if (!mumps->myid) {
1526         PetscInt *idx;
1527         /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */
1528         PetscCall(PetscMalloc1(nrhs * M, &idx));
1529         PetscCall(MatGetOwnershipRanges(B, &rstart));
1530         k = 0;
1531         for (proc = 0; proc < mumps->petsc_size; proc++) {
1532           for (j = 0; j < nrhs; j++) {
1533             for (i = rstart[proc]; i < rstart[proc + 1]; i++) idx[k++] = j * M + i;
1534           }
1535         }
1536 
1537         PetscCall(VecCreateSeq(PETSC_COMM_SELF, nrhs * M, &b_seq));
1538         PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nrhs * M, idx, PETSC_OWN_POINTER, &is_to));
1539         PetscCall(ISCreateStride(PETSC_COMM_SELF, nrhs * M, 0, 1, &is_from));
1540       } else {
1541         PetscCall(VecCreateSeq(PETSC_COMM_SELF, 0, &b_seq));
1542         PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_to));
1543         PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_from));
1544       }
1545       PetscCall(VecScatterCreate(v_mpi, is_from, b_seq, is_to, &scat_rhs));
1546       PetscCall(VecScatterBegin(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD));
1547       PetscCall(ISDestroy(&is_to));
1548       PetscCall(ISDestroy(&is_from));
1549       PetscCall(VecScatterEnd(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD));
1550 
1551       if (!mumps->myid) { /* define rhs on the host */
1552         PetscCall(VecGetArray(b_seq, &bray));
1553         mumps->id.rhs = (MumpsScalar *)bray;
1554         PetscCall(VecRestoreArray(b_seq, &bray));
1555       }
1556     }
1557   } else { /* sparse B */
1558     b = (Mat_MPIAIJ *)Bt->data;
1559 
1560     /* wrap dense X into a vector v_mpi */
1561     PetscCall(MatGetLocalSize(X, &m, NULL));
1562     PetscCall(MatDenseGetArray(X, &bray));
1563     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)X), 1, nrhs * m, nrhs * M, (const PetscScalar *)bray, &v_mpi));
1564     PetscCall(MatDenseRestoreArray(X, &bray));
1565 
1566     if (!mumps->myid) {
1567       PetscCall(MatSeqAIJGetArray(b->A, &aa));
1568       PetscCall(MatGetRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
1569       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
1570       PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs));
1571       mumps->id.rhs_sparse = (MumpsScalar *)aa;
1572     } else {
1573       mumps->id.irhs_ptr    = NULL;
1574       mumps->id.irhs_sparse = NULL;
1575       mumps->id.nz_rhs      = 0;
1576       mumps->id.rhs_sparse  = NULL;
1577     }
1578   }
1579 
1580   /* solve phase */
1581   mumps->id.job = JOB_SOLVE;
1582   PetscMUMPS_c(mumps);
1583   PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1));
1584 
1585   /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */
1586   PetscCall(MatDenseGetArray(X, &array));
1587   PetscCall(VecPlaceArray(v_mpi, array));
1588 
1589   /* create scatter scat_sol */
1590   PetscCall(MatGetOwnershipRanges(X, &rstart));
1591   /* iidx: index for scatter mumps solution to petsc X */
1592 
1593   PetscCall(ISCreateStride(PETSC_COMM_SELF, nlsol_loc, 0, 1, &is_from));
1594   PetscCall(PetscMalloc1(nlsol_loc, &idxx));
1595   for (i = 0; i < lsol_loc; i++) {
1596     isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */
1597 
1598     for (proc = 0; proc < mumps->petsc_size; proc++) {
1599       if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc + 1]) {
1600         myrstart = rstart[proc];
1601         k        = isol_loc[i] - myrstart;          /* local index on 1st column of petsc vector X */
1602         iidx     = k + myrstart * nrhs;             /* maps mumps isol_loc[i] to petsc index in X */
1603         m        = rstart[proc + 1] - rstart[proc]; /* rows of X for this proc */
1604         break;
1605       }
1606     }
1607 
1608     for (j = 0; j < nrhs; j++) idxx[i + j * lsol_loc] = iidx + j * m;
1609   }
1610   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nlsol_loc, idxx, PETSC_COPY_VALUES, &is_to));
1611   PetscCall(VecScatterCreate(msol_loc, is_from, v_mpi, is_to, &scat_sol));
1612   PetscCall(VecScatterBegin(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD));
1613   PetscCall(ISDestroy(&is_from));
1614   PetscCall(ISDestroy(&is_to));
1615   PetscCall(VecScatterEnd(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD));
1616   PetscCall(MatDenseRestoreArray(X, &array));
1617 
1618   /* free spaces */
1619   mumps->id.sol_loc  = (MumpsScalar *)sol_loc_save;
1620   mumps->id.isol_loc = isol_loc_save;
1621 
1622   PetscCall(PetscFree2(sol_loc, isol_loc));
1623   PetscCall(PetscFree(idxx));
1624   PetscCall(VecDestroy(&msol_loc));
1625   PetscCall(VecDestroy(&v_mpi));
1626   if (!denseB) {
1627     if (!mumps->myid) {
1628       b = (Mat_MPIAIJ *)Bt->data;
1629       PetscCall(MatSeqAIJRestoreArray(b->A, &aa));
1630       PetscCall(MatRestoreRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
1631       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure");
1632     }
1633   } else {
1634     if (mumps->ICNTL20 == 0) {
1635       PetscCall(VecDestroy(&b_seq));
1636       PetscCall(VecScatterDestroy(&scat_rhs));
1637     }
1638   }
1639   PetscCall(VecScatterDestroy(&scat_sol));
1640   PetscCall(PetscLogFlops(2.0 * nrhs * mumps->id.RINFO(3)));
1641   PetscFunctionReturn(PETSC_SUCCESS);
1642 }
1643 
1644 PetscErrorCode MatMatSolveTranspose_MUMPS(Mat A, Mat B, Mat X)
1645 {
1646   Mat_MUMPS    *mumps    = (Mat_MUMPS *)A->data;
1647   PetscMUMPSInt oldvalue = mumps->id.ICNTL(9);
1648 
1649   PetscFunctionBegin;
1650   mumps->id.ICNTL(9) = 0;
1651   PetscCall(MatMatSolve_MUMPS(A, B, X));
1652   mumps->id.ICNTL(9) = oldvalue;
1653   PetscFunctionReturn(PETSC_SUCCESS);
1654 }
1655 
1656 PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A, Mat Bt, Mat X)
1657 {
1658   PetscBool flg;
1659   Mat       B;
1660 
1661   PetscFunctionBegin;
1662   PetscCall(PetscObjectTypeCompareAny((PetscObject)Bt, &flg, MATSEQAIJ, MATMPIAIJ, NULL));
1663   PetscCheck(flg, PetscObjectComm((PetscObject)Bt), PETSC_ERR_ARG_WRONG, "Matrix Bt must be MATAIJ matrix");
1664 
1665   /* Create B=Bt^T that uses Bt's data structure */
1666   PetscCall(MatCreateTranspose(Bt, &B));
1667 
1668   PetscCall(MatMatSolve_MUMPS(A, B, X));
1669   PetscCall(MatDestroy(&B));
1670   PetscFunctionReturn(PETSC_SUCCESS);
1671 }
1672 
1673 #if !defined(PETSC_USE_COMPLEX)
1674 /*
1675   input:
1676    F:        numeric factor
1677   output:
1678    nneg:     total number of negative pivots
1679    nzero:    total number of zero pivots
1680    npos:     (global dimension of F) - nneg - nzero
1681 */
1682 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos)
1683 {
1684   Mat_MUMPS  *mumps = (Mat_MUMPS *)F->data;
1685   PetscMPIInt size;
1686 
1687   PetscFunctionBegin;
1688   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)F), &size));
1689   /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */
1690   PetscCheck(size <= 1 || mumps->id.ICNTL(13) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia", mumps->id.INFOG(13));
1691 
1692   if (nneg) *nneg = mumps->id.INFOG(12);
1693   if (nzero || npos) {
1694     PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection");
1695     if (nzero) *nzero = mumps->id.INFOG(28);
1696     if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28));
1697   }
1698   PetscFunctionReturn(PETSC_SUCCESS);
1699 }
1700 #endif
1701 
1702 PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse, Mat_MUMPS *mumps)
1703 {
1704   PetscInt       i, nreqs;
1705   PetscMUMPSInt *irn, *jcn;
1706   PetscMPIInt    count;
1707   PetscInt64     totnnz, remain;
1708   const PetscInt osize = mumps->omp_comm_size;
1709   PetscScalar   *val;
1710 
1711   PetscFunctionBegin;
1712   if (osize > 1) {
1713     if (reuse == MAT_INITIAL_MATRIX) {
1714       /* master first gathers counts of nonzeros to receive */
1715       if (mumps->is_omp_master) PetscCall(PetscMalloc1(osize, &mumps->recvcount));
1716       PetscCallMPI(MPI_Gather(&mumps->nnz, 1, MPIU_INT64, mumps->recvcount, 1, MPIU_INT64, 0 /*master*/, mumps->omp_comm));
1717 
1718       /* Then each computes number of send/recvs */
1719       if (mumps->is_omp_master) {
1720         /* Start from 1 since self communication is not done in MPI */
1721         nreqs = 0;
1722         for (i = 1; i < osize; i++) nreqs += (mumps->recvcount[i] + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX;
1723       } else {
1724         nreqs = (mumps->nnz + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX;
1725       }
1726       PetscCall(PetscMalloc1(nreqs * 3, &mumps->reqs)); /* Triple the requests since we send irn, jcn and val separately */
1727 
1728       /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others.
1729          MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz
1730          might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size
1731          is very small, the current approach should have no extra overhead compared to MPI_Gatherv.
1732        */
1733       nreqs = 0; /* counter for actual send/recvs */
1734       if (mumps->is_omp_master) {
1735         for (i = 0, totnnz = 0; i < osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */
1736         PetscCall(PetscMalloc2(totnnz, &irn, totnnz, &jcn));
1737         PetscCall(PetscMalloc1(totnnz, &val));
1738 
1739         /* Self communication */
1740         PetscCall(PetscArraycpy(irn, mumps->irn, mumps->nnz));
1741         PetscCall(PetscArraycpy(jcn, mumps->jcn, mumps->nnz));
1742         PetscCall(PetscArraycpy(val, mumps->val, mumps->nnz));
1743 
1744         /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */
1745         PetscCall(PetscFree2(mumps->irn, mumps->jcn));
1746         PetscCall(PetscFree(mumps->val_alloc));
1747         mumps->nnz = totnnz;
1748         mumps->irn = irn;
1749         mumps->jcn = jcn;
1750         mumps->val = mumps->val_alloc = val;
1751 
1752         irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */
1753         jcn += mumps->recvcount[0];
1754         val += mumps->recvcount[0];
1755 
1756         /* Remote communication */
1757         for (i = 1; i < osize; i++) {
1758           count  = PetscMin(mumps->recvcount[i], PETSC_MPI_INT_MAX);
1759           remain = mumps->recvcount[i] - count;
1760           while (count > 0) {
1761             PetscCallMPI(MPI_Irecv(irn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1762             PetscCallMPI(MPI_Irecv(jcn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1763             PetscCallMPI(MPI_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1764             irn += count;
1765             jcn += count;
1766             val += count;
1767             count = PetscMin(remain, PETSC_MPI_INT_MAX);
1768             remain -= count;
1769           }
1770         }
1771       } else {
1772         irn    = mumps->irn;
1773         jcn    = mumps->jcn;
1774         val    = mumps->val;
1775         count  = PetscMin(mumps->nnz, PETSC_MPI_INT_MAX);
1776         remain = mumps->nnz - count;
1777         while (count > 0) {
1778           PetscCallMPI(MPI_Isend(irn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1779           PetscCallMPI(MPI_Isend(jcn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1780           PetscCallMPI(MPI_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1781           irn += count;
1782           jcn += count;
1783           val += count;
1784           count = PetscMin(remain, PETSC_MPI_INT_MAX);
1785           remain -= count;
1786         }
1787       }
1788     } else {
1789       nreqs = 0;
1790       if (mumps->is_omp_master) {
1791         val = mumps->val + mumps->recvcount[0];
1792         for (i = 1; i < osize; i++) { /* Remote communication only since self data is already in place */
1793           count  = PetscMin(mumps->recvcount[i], PETSC_MPI_INT_MAX);
1794           remain = mumps->recvcount[i] - count;
1795           while (count > 0) {
1796             PetscCallMPI(MPI_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1797             val += count;
1798             count = PetscMin(remain, PETSC_MPI_INT_MAX);
1799             remain -= count;
1800           }
1801         }
1802       } else {
1803         val    = mumps->val;
1804         count  = PetscMin(mumps->nnz, PETSC_MPI_INT_MAX);
1805         remain = mumps->nnz - count;
1806         while (count > 0) {
1807           PetscCallMPI(MPI_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1808           val += count;
1809           count = PetscMin(remain, PETSC_MPI_INT_MAX);
1810           remain -= count;
1811         }
1812       }
1813     }
1814     PetscCallMPI(MPI_Waitall(nreqs, mumps->reqs, MPI_STATUSES_IGNORE));
1815     mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */
1816   }
1817   PetscFunctionReturn(PETSC_SUCCESS);
1818 }
1819 
1820 PetscErrorCode MatFactorNumeric_MUMPS(Mat F, Mat A, const MatFactorInfo *info)
1821 {
1822   Mat_MUMPS *mumps = (Mat_MUMPS *)(F)->data;
1823   PetscBool  isMPIAIJ;
1824 
1825   PetscFunctionBegin;
1826   if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) {
1827     if (mumps->id.INFOG(1) == -6) PetscCall(PetscInfo(A, "MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1828     PetscCall(PetscInfo(A, "MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1829     PetscFunctionReturn(PETSC_SUCCESS);
1830   }
1831 
1832   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps));
1833   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX, mumps));
1834 
1835   /* numerical factorization phase */
1836   mumps->id.job = JOB_FACTNUMERIC;
1837   if (!mumps->id.ICNTL(18)) { /* A is centralized */
1838     if (!mumps->myid) mumps->id.a = (MumpsScalar *)mumps->val;
1839   } else {
1840     mumps->id.a_loc = (MumpsScalar *)mumps->val;
1841   }
1842   PetscMUMPS_c(mumps);
1843   if (mumps->id.INFOG(1) < 0) {
1844     PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d", mumps->id.INFOG(1), mumps->id.INFO(2));
1845     if (mumps->id.INFOG(1) == -10) { /* numerically singular matrix */
1846       PetscCall(PetscInfo(F, "matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1847       F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
1848     } else if (mumps->id.INFOG(1) == -13) {
1849       PetscCall(PetscInfo(F, "MUMPS in numerical factorization phase: INFOG(1)=%d, cannot allocate required memory %d megabytes\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1850       F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1851     } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) {
1852       PetscCall(PetscInfo(F, "MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d, problem with workarray\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1853       F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1854     } else {
1855       PetscCall(PetscInfo(F, "MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1856       F->factorerrortype = MAT_FACTOR_OTHER;
1857     }
1858   }
1859   PetscCheck(mumps->myid || mumps->id.ICNTL(16) <= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "  mumps->id.ICNTL(16):=%d", mumps->id.INFOG(16));
1860 
1861   F->assembled = PETSC_TRUE;
1862 
1863   if (F->schur) { /* reset Schur status to unfactored */
1864 #if defined(PETSC_HAVE_CUDA)
1865     F->schur->offloadmask = PETSC_OFFLOAD_CPU;
1866 #endif
1867     if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
1868       mumps->id.ICNTL(19) = 2;
1869       PetscCall(MatTranspose(F->schur, MAT_INPLACE_MATRIX, &F->schur));
1870     }
1871     PetscCall(MatFactorRestoreSchurComplement(F, NULL, MAT_FACTOR_SCHUR_UNFACTORED));
1872   }
1873 
1874   /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */
1875   if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3;
1876 
1877   if (!mumps->is_omp_master) mumps->id.INFO(23) = 0;
1878   if (mumps->petsc_size > 1) {
1879     PetscInt     lsol_loc;
1880     PetscScalar *sol_loc;
1881 
1882     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &isMPIAIJ));
1883 
1884     /* distributed solution; Create x_seq=sol_loc for repeated use */
1885     if (mumps->x_seq) {
1886       PetscCall(VecScatterDestroy(&mumps->scat_sol));
1887       PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc));
1888       PetscCall(VecDestroy(&mumps->x_seq));
1889     }
1890     lsol_loc = mumps->id.INFO(23); /* length of sol_loc */
1891     PetscCall(PetscMalloc2(lsol_loc, &sol_loc, lsol_loc, &mumps->id.isol_loc));
1892     mumps->id.lsol_loc = lsol_loc;
1893     mumps->id.sol_loc  = (MumpsScalar *)sol_loc;
1894     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, lsol_loc, sol_loc, &mumps->x_seq));
1895   }
1896   PetscCall(PetscLogFlops(mumps->id.RINFO(2)));
1897   PetscFunctionReturn(PETSC_SUCCESS);
1898 }
1899 
1900 /* Sets MUMPS options from the options database */
1901 PetscErrorCode MatSetFromOptions_MUMPS(Mat F, Mat A)
1902 {
1903   Mat_MUMPS    *mumps = (Mat_MUMPS *)F->data;
1904   PetscMUMPSInt icntl = 0, size, *listvar_schur;
1905   PetscInt      info[80], i, ninfo = 80, rbs, cbs;
1906   PetscBool     flg = PETSC_FALSE, schur = (PetscBool)(mumps->id.ICNTL(26) == -1);
1907   MumpsScalar  *arr;
1908 
1909   PetscFunctionBegin;
1910   PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "MUMPS Options", "Mat");
1911   if (mumps->id.job == JOB_NULL) { /* MatSetFromOptions_MUMPS() has never been called before */
1912     PetscInt nthreads   = 0;
1913     PetscInt nCNTL_pre  = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0;
1914     PetscInt nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0;
1915 
1916     mumps->petsc_comm = PetscObjectComm((PetscObject)A);
1917     PetscCallMPI(MPI_Comm_size(mumps->petsc_comm, &mumps->petsc_size));
1918     PetscCallMPI(MPI_Comm_rank(mumps->petsc_comm, &mumps->myid)); /* "if (!myid)" still works even if mumps_comm is different */
1919 
1920     PetscCall(PetscOptionsName("-mat_mumps_use_omp_threads", "Convert MPI processes into OpenMP threads", "None", &mumps->use_petsc_omp_support));
1921     if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */
1922     /* do not use PetscOptionsInt() so that the option -mat_mumps_use_omp_threads is not displayed twice in the help */
1923     PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)F)->prefix, "-mat_mumps_use_omp_threads", &nthreads, NULL));
1924     if (mumps->use_petsc_omp_support) {
1925       PetscCheck(PetscDefined(HAVE_OPENMP_SUPPORT), PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "The system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual",
1926                  ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : "");
1927       PetscCheck(!schur, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use -%smat_mumps_use_omp_threads with the Schur complement feature", ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : "");
1928 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1929       PetscCall(PetscOmpCtrlCreate(mumps->petsc_comm, nthreads, &mumps->omp_ctrl));
1930       PetscCall(PetscOmpCtrlGetOmpComms(mumps->omp_ctrl, &mumps->omp_comm, &mumps->mumps_comm, &mumps->is_omp_master));
1931 #endif
1932     } else {
1933       mumps->omp_comm      = PETSC_COMM_SELF;
1934       mumps->mumps_comm    = mumps->petsc_comm;
1935       mumps->is_omp_master = PETSC_TRUE;
1936     }
1937     PetscCallMPI(MPI_Comm_size(mumps->omp_comm, &mumps->omp_comm_size));
1938     mumps->reqs = NULL;
1939     mumps->tag  = 0;
1940 
1941     if (mumps->mumps_comm != MPI_COMM_NULL) {
1942       if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) {
1943         /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */
1944         MPI_Comm comm;
1945         PetscCallMPI(MPI_Comm_dup(mumps->mumps_comm, &comm));
1946         mumps->mumps_comm = comm;
1947       } else PetscCall(PetscCommGetComm(mumps->petsc_comm, &mumps->mumps_comm));
1948     }
1949 
1950     mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm);
1951     mumps->id.job          = JOB_INIT;
1952     mumps->id.par          = 1; /* host participates factorizaton and solve */
1953     mumps->id.sym          = mumps->sym;
1954 
1955     size          = mumps->id.size_schur;
1956     arr           = mumps->id.schur;
1957     listvar_schur = mumps->id.listvar_schur;
1958     PetscMUMPS_c(mumps);
1959     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS: INFOG(1)=%d", mumps->id.INFOG(1));
1960     /* restore cached ICNTL and CNTL values */
1961     for (icntl = 0; icntl < nICNTL_pre; ++icntl) mumps->id.ICNTL(mumps->ICNTL_pre[1 + 2 * icntl]) = mumps->ICNTL_pre[2 + 2 * icntl];
1962     for (icntl = 0; icntl < nCNTL_pre; ++icntl) mumps->id.CNTL((PetscInt)mumps->CNTL_pre[1 + 2 * icntl]) = mumps->CNTL_pre[2 + 2 * icntl];
1963     PetscCall(PetscFree(mumps->ICNTL_pre));
1964     PetscCall(PetscFree(mumps->CNTL_pre));
1965 
1966     if (schur) {
1967       mumps->id.size_schur    = size;
1968       mumps->id.schur_lld     = size;
1969       mumps->id.schur         = arr;
1970       mumps->id.listvar_schur = listvar_schur;
1971       if (mumps->petsc_size > 1) {
1972         PetscBool gs; /* gs is false if any rank other than root has non-empty IS */
1973 
1974         mumps->id.ICNTL(19) = 1;                                                                            /* MUMPS returns Schur centralized on the host */
1975         gs                  = mumps->myid ? (mumps->id.size_schur ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */
1976         PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &gs, 1, MPIU_BOOL, MPI_LAND, mumps->petsc_comm));
1977         PetscCheck(gs, PETSC_COMM_SELF, PETSC_ERR_SUP, "MUMPS distributed parallel Schur complements not yet supported from PETSc");
1978       } else {
1979         if (F->factortype == MAT_FACTOR_LU) {
1980           mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */
1981         } else {
1982           mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */
1983         }
1984       }
1985       mumps->id.ICNTL(26) = -1;
1986     }
1987 
1988     /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code.
1989        For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS.
1990      */
1991     PetscCallMPI(MPI_Bcast(mumps->id.icntl, 40, MPI_INT, 0, mumps->omp_comm));
1992     PetscCallMPI(MPI_Bcast(mumps->id.cntl, 15, MPIU_REAL, 0, mumps->omp_comm));
1993 
1994     mumps->scat_rhs = NULL;
1995     mumps->scat_sol = NULL;
1996 
1997     /* set PETSc-MUMPS default options - override MUMPS default */
1998     mumps->id.ICNTL(3) = 0;
1999     mumps->id.ICNTL(4) = 0;
2000     if (mumps->petsc_size == 1) {
2001       mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */
2002       mumps->id.ICNTL(7)  = 7; /* automatic choice of ordering done by the package */
2003     } else {
2004       mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */
2005       mumps->id.ICNTL(21) = 1; /* distributed solution */
2006     }
2007   }
2008   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_1", "ICNTL(1): output stream for error messages", "None", mumps->id.ICNTL(1), &icntl, &flg));
2009   if (flg) mumps->id.ICNTL(1) = icntl;
2010   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_2", "ICNTL(2): output stream for diagnostic printing, statistics, and warning", "None", mumps->id.ICNTL(2), &icntl, &flg));
2011   if (flg) mumps->id.ICNTL(2) = icntl;
2012   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_3", "ICNTL(3): output stream for global information, collected on the host", "None", mumps->id.ICNTL(3), &icntl, &flg));
2013   if (flg) mumps->id.ICNTL(3) = icntl;
2014 
2015   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_4", "ICNTL(4): level of printing (0 to 4)", "None", mumps->id.ICNTL(4), &icntl, &flg));
2016   if (flg) mumps->id.ICNTL(4) = icntl;
2017   if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */
2018 
2019   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_6", "ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)", "None", mumps->id.ICNTL(6), &icntl, &flg));
2020   if (flg) mumps->id.ICNTL(6) = icntl;
2021 
2022   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_7", "ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)", "None", mumps->id.ICNTL(7), &icntl, &flg));
2023   if (flg) {
2024     PetscCheck(icntl != 1 && icntl >= 0 && icntl <= 7, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto");
2025     mumps->id.ICNTL(7) = icntl;
2026   }
2027 
2028   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_8", "ICNTL(8): scaling strategy (-2 to 8 or 77)", "None", mumps->id.ICNTL(8), &mumps->id.ICNTL(8), NULL));
2029   /* PetscCall(PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL)); handled by MatSolveTranspose_MUMPS() */
2030   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_10", "ICNTL(10): max num of refinements", "None", mumps->id.ICNTL(10), &mumps->id.ICNTL(10), NULL));
2031   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_11", "ICNTL(11): statistics related to an error analysis (via -ksp_view)", "None", mumps->id.ICNTL(11), &mumps->id.ICNTL(11), NULL));
2032   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_12", "ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)", "None", mumps->id.ICNTL(12), &mumps->id.ICNTL(12), NULL));
2033   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_13", "ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting", "None", mumps->id.ICNTL(13), &mumps->id.ICNTL(13), NULL));
2034   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_14", "ICNTL(14): percentage increase in the estimated working space", "None", mumps->id.ICNTL(14), &mumps->id.ICNTL(14), NULL));
2035   PetscCall(MatGetBlockSizes(A, &rbs, &cbs));
2036   if (rbs == cbs && rbs > 1) mumps->id.ICNTL(15) = -rbs;
2037   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_15", "ICNTL(15): compression of the input matrix resulting from a block format", "None", mumps->id.ICNTL(15), &mumps->id.ICNTL(15), &flg));
2038   if (flg) {
2039     PetscCheck(mumps->id.ICNTL(15) <= 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Positive -mat_mumps_icntl_15 not handled");
2040     PetscCheck((-mumps->id.ICNTL(15) % cbs == 0) && (-mumps->id.ICNTL(15) % rbs == 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "The opposite of -mat_mumps_icntl_15 must be a multiple of the column and row blocksizes");
2041   }
2042   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_19", "ICNTL(19): computes the Schur complement", "None", mumps->id.ICNTL(19), &mumps->id.ICNTL(19), NULL));
2043   if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */
2044     PetscCall(MatDestroy(&F->schur));
2045     PetscCall(MatMumpsResetSchur_Private(mumps));
2046   }
2047 
2048   /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps",
2049      and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran
2050      and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...'
2051      This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug,
2052      see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590.
2053      In short, we could not use distributed RHS with MPICH until v4.0b1.
2054    */
2055 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) || (defined(PETSC_HAVE_MPICH_NUMVERSION) && (PETSC_HAVE_MPICH_NUMVERSION < 40000101))
2056   mumps->ICNTL20 = 0; /* Centralized dense RHS*/
2057 #else
2058   mumps->ICNTL20     = 10; /* Distributed dense RHS*/
2059 #endif
2060   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_20", "ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides", "None", mumps->ICNTL20, &mumps->ICNTL20, &flg));
2061   PetscCheck(!flg || mumps->ICNTL20 == 10 || mumps->ICNTL20 == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10", (int)mumps->ICNTL20);
2062 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0)
2063   PetscCheck(!flg || mumps->ICNTL20 != 10, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=10 is not supported before MUMPS-5.3.0");
2064 #endif
2065   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL)); we only use distributed solution vector */
2066 
2067   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_22", "ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)", "None", mumps->id.ICNTL(22), &mumps->id.ICNTL(22), NULL));
2068   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_23", "ICNTL(23): max size of the working memory (MB) that can allocate per processor", "None", mumps->id.ICNTL(23), &mumps->id.ICNTL(23), NULL));
2069   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_24", "ICNTL(24): detection of null pivot rows (0 or 1)", "None", mumps->id.ICNTL(24), &mumps->id.ICNTL(24), NULL));
2070   if (mumps->id.ICNTL(24)) { mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ }
2071 
2072   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_25", "ICNTL(25): computes a solution of a deficient matrix and a null space basis", "None", mumps->id.ICNTL(25), &mumps->id.ICNTL(25), NULL));
2073   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_26", "ICNTL(26): drives the solution phase if a Schur complement matrix", "None", mumps->id.ICNTL(26), &mumps->id.ICNTL(26), NULL));
2074   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_27", "ICNTL(27): controls the blocking size for multiple right-hand sides", "None", mumps->id.ICNTL(27), &mumps->id.ICNTL(27), NULL));
2075   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_28", "ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering", "None", mumps->id.ICNTL(28), &mumps->id.ICNTL(28), NULL));
2076   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_29", "ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis", "None", mumps->id.ICNTL(29), &mumps->id.ICNTL(29), NULL));
2077   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL)); */ /* call MatMumpsGetInverse() directly */
2078   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_31", "ICNTL(31): indicates which factors may be discarded during factorization", "None", mumps->id.ICNTL(31), &mumps->id.ICNTL(31), NULL));
2079   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elemination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL));  -- not supported by PETSc API */
2080   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_33", "ICNTL(33): compute determinant", "None", mumps->id.ICNTL(33), &mumps->id.ICNTL(33), NULL));
2081   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_35", "ICNTL(35): activates Block Low Rank (BLR) based factorization", "None", mumps->id.ICNTL(35), &mumps->id.ICNTL(35), NULL));
2082   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_36", "ICNTL(36): choice of BLR factorization variant", "None", mumps->id.ICNTL(36), &mumps->id.ICNTL(36), NULL));
2083   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_38", "ICNTL(38): estimated compression rate of LU factors with BLR", "None", mumps->id.ICNTL(38), &mumps->id.ICNTL(38), NULL));
2084 
2085   PetscCall(PetscOptionsReal("-mat_mumps_cntl_1", "CNTL(1): relative pivoting threshold", "None", mumps->id.CNTL(1), &mumps->id.CNTL(1), NULL));
2086   PetscCall(PetscOptionsReal("-mat_mumps_cntl_2", "CNTL(2): stopping criterion of refinement", "None", mumps->id.CNTL(2), &mumps->id.CNTL(2), NULL));
2087   PetscCall(PetscOptionsReal("-mat_mumps_cntl_3", "CNTL(3): absolute pivoting threshold", "None", mumps->id.CNTL(3), &mumps->id.CNTL(3), NULL));
2088   PetscCall(PetscOptionsReal("-mat_mumps_cntl_4", "CNTL(4): value for static pivoting", "None", mumps->id.CNTL(4), &mumps->id.CNTL(4), NULL));
2089   PetscCall(PetscOptionsReal("-mat_mumps_cntl_5", "CNTL(5): fixation for null pivots", "None", mumps->id.CNTL(5), &mumps->id.CNTL(5), NULL));
2090   PetscCall(PetscOptionsReal("-mat_mumps_cntl_7", "CNTL(7): dropping parameter used during BLR", "None", mumps->id.CNTL(7), &mumps->id.CNTL(7), NULL));
2091 
2092   PetscCall(PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL));
2093 
2094   PetscCall(PetscOptionsIntArray("-mat_mumps_view_info", "request INFO local to each processor", "", info, &ninfo, NULL));
2095   if (ninfo) {
2096     PetscCheck(ninfo <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "number of INFO %" PetscInt_FMT " must <= 80", ninfo);
2097     PetscCall(PetscMalloc1(ninfo, &mumps->info));
2098     mumps->ninfo = ninfo;
2099     for (i = 0; i < ninfo; i++) {
2100       PetscCheck(info[i] >= 0 && info[i] <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "index of INFO %" PetscInt_FMT " must between 1 and 80", ninfo);
2101       mumps->info[i] = info[i];
2102     }
2103   }
2104   PetscOptionsEnd();
2105   PetscFunctionReturn(PETSC_SUCCESS);
2106 }
2107 
2108 PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F, Mat A, const MatFactorInfo *info, Mat_MUMPS *mumps)
2109 {
2110   PetscFunctionBegin;
2111   if (mumps->id.INFOG(1) < 0) {
2112     PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in analysis phase: INFOG(1)=%d", mumps->id.INFOG(1));
2113     if (mumps->id.INFOG(1) == -6) {
2114       PetscCall(PetscInfo(F, "matrix is singular in structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2115       F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT;
2116     } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) {
2117       PetscCall(PetscInfo(F, "problem of workspace, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2118       F->factorerrortype = MAT_FACTOR_OUTMEMORY;
2119     } else if (mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0) {
2120       PetscCall(PetscInfo(F, "Empty matrix\n"));
2121     } else {
2122       PetscCall(PetscInfo(F, "Error reported by MUMPS in analysis phase: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2123       F->factorerrortype = MAT_FACTOR_OTHER;
2124     }
2125   }
2126   PetscFunctionReturn(PETSC_SUCCESS);
2127 }
2128 
2129 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info)
2130 {
2131   Mat_MUMPS     *mumps = (Mat_MUMPS *)F->data;
2132   Vec            b;
2133   const PetscInt M = A->rmap->N;
2134 
2135   PetscFunctionBegin;
2136   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
2137     /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */
2138     PetscFunctionReturn(PETSC_SUCCESS);
2139   }
2140 
2141   /* Set MUMPS options from the options database */
2142   PetscCall(MatSetFromOptions_MUMPS(F, A));
2143 
2144   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
2145   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps));
2146 
2147   /* analysis phase */
2148   mumps->id.job = JOB_FACTSYMBOLIC;
2149   mumps->id.n   = M;
2150   switch (mumps->id.ICNTL(18)) {
2151   case 0: /* centralized assembled matrix input */
2152     if (!mumps->myid) {
2153       mumps->id.nnz = mumps->nnz;
2154       mumps->id.irn = mumps->irn;
2155       mumps->id.jcn = mumps->jcn;
2156       if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val;
2157       if (r) {
2158         mumps->id.ICNTL(7) = 1;
2159         if (!mumps->myid) {
2160           const PetscInt *idx;
2161           PetscInt        i;
2162 
2163           PetscCall(PetscMalloc1(M, &mumps->id.perm_in));
2164           PetscCall(ISGetIndices(r, &idx));
2165           for (i = 0; i < M; i++) PetscCall(PetscMUMPSIntCast(idx[i] + 1, &(mumps->id.perm_in[i]))); /* perm_in[]: start from 1, not 0! */
2166           PetscCall(ISRestoreIndices(r, &idx));
2167         }
2168       }
2169     }
2170     break;
2171   case 3: /* distributed assembled matrix input (size>1) */
2172     mumps->id.nnz_loc = mumps->nnz;
2173     mumps->id.irn_loc = mumps->irn;
2174     mumps->id.jcn_loc = mumps->jcn;
2175     if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val;
2176     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2177       PetscCall(MatCreateVecs(A, NULL, &b));
2178       PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq));
2179       PetscCall(VecDestroy(&b));
2180     }
2181     break;
2182   }
2183   PetscMUMPS_c(mumps);
2184   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps));
2185 
2186   F->ops->lufactornumeric   = MatFactorNumeric_MUMPS;
2187   F->ops->solve             = MatSolve_MUMPS;
2188   F->ops->solvetranspose    = MatSolveTranspose_MUMPS;
2189   F->ops->matsolve          = MatMatSolve_MUMPS;
2190   F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS;
2191   F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS;
2192 
2193   mumps->matstruc = SAME_NONZERO_PATTERN;
2194   PetscFunctionReturn(PETSC_SUCCESS);
2195 }
2196 
2197 /* Note the Petsc r and c permutations are ignored */
2198 PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info)
2199 {
2200   Mat_MUMPS     *mumps = (Mat_MUMPS *)F->data;
2201   Vec            b;
2202   const PetscInt M = A->rmap->N;
2203 
2204   PetscFunctionBegin;
2205   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
2206     /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */
2207     PetscFunctionReturn(PETSC_SUCCESS);
2208   }
2209 
2210   /* Set MUMPS options from the options database */
2211   PetscCall(MatSetFromOptions_MUMPS(F, A));
2212 
2213   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
2214   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps));
2215 
2216   /* analysis phase */
2217   mumps->id.job = JOB_FACTSYMBOLIC;
2218   mumps->id.n   = M;
2219   switch (mumps->id.ICNTL(18)) {
2220   case 0: /* centralized assembled matrix input */
2221     if (!mumps->myid) {
2222       mumps->id.nnz = mumps->nnz;
2223       mumps->id.irn = mumps->irn;
2224       mumps->id.jcn = mumps->jcn;
2225       if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val;
2226     }
2227     break;
2228   case 3: /* distributed assembled matrix input (size>1) */
2229     mumps->id.nnz_loc = mumps->nnz;
2230     mumps->id.irn_loc = mumps->irn;
2231     mumps->id.jcn_loc = mumps->jcn;
2232     if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val;
2233     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2234       PetscCall(MatCreateVecs(A, NULL, &b));
2235       PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq));
2236       PetscCall(VecDestroy(&b));
2237     }
2238     break;
2239   }
2240   PetscMUMPS_c(mumps);
2241   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps));
2242 
2243   F->ops->lufactornumeric   = MatFactorNumeric_MUMPS;
2244   F->ops->solve             = MatSolve_MUMPS;
2245   F->ops->solvetranspose    = MatSolveTranspose_MUMPS;
2246   F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS;
2247 
2248   mumps->matstruc = SAME_NONZERO_PATTERN;
2249   PetscFunctionReturn(PETSC_SUCCESS);
2250 }
2251 
2252 /* Note the Petsc r permutation and factor info are ignored */
2253 PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F, Mat A, IS r, const MatFactorInfo *info)
2254 {
2255   Mat_MUMPS     *mumps = (Mat_MUMPS *)F->data;
2256   Vec            b;
2257   const PetscInt M = A->rmap->N;
2258 
2259   PetscFunctionBegin;
2260   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
2261     /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */
2262     PetscFunctionReturn(PETSC_SUCCESS);
2263   }
2264 
2265   /* Set MUMPS options from the options database */
2266   PetscCall(MatSetFromOptions_MUMPS(F, A));
2267 
2268   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
2269   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps));
2270 
2271   /* analysis phase */
2272   mumps->id.job = JOB_FACTSYMBOLIC;
2273   mumps->id.n   = M;
2274   switch (mumps->id.ICNTL(18)) {
2275   case 0: /* centralized assembled matrix input */
2276     if (!mumps->myid) {
2277       mumps->id.nnz = mumps->nnz;
2278       mumps->id.irn = mumps->irn;
2279       mumps->id.jcn = mumps->jcn;
2280       if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val;
2281     }
2282     break;
2283   case 3: /* distributed assembled matrix input (size>1) */
2284     mumps->id.nnz_loc = mumps->nnz;
2285     mumps->id.irn_loc = mumps->irn;
2286     mumps->id.jcn_loc = mumps->jcn;
2287     if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val;
2288     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2289       PetscCall(MatCreateVecs(A, NULL, &b));
2290       PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq));
2291       PetscCall(VecDestroy(&b));
2292     }
2293     break;
2294   }
2295   PetscMUMPS_c(mumps);
2296   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps));
2297 
2298   F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS;
2299   F->ops->solve                 = MatSolve_MUMPS;
2300   F->ops->solvetranspose        = MatSolve_MUMPS;
2301   F->ops->matsolve              = MatMatSolve_MUMPS;
2302   F->ops->mattransposesolve     = MatMatTransposeSolve_MUMPS;
2303   F->ops->matsolvetranspose     = MatMatSolveTranspose_MUMPS;
2304 #if defined(PETSC_USE_COMPLEX)
2305   F->ops->getinertia = NULL;
2306 #else
2307   F->ops->getinertia = MatGetInertia_SBAIJMUMPS;
2308 #endif
2309 
2310   mumps->matstruc = SAME_NONZERO_PATTERN;
2311   PetscFunctionReturn(PETSC_SUCCESS);
2312 }
2313 
2314 PetscErrorCode MatView_MUMPS(Mat A, PetscViewer viewer)
2315 {
2316   PetscBool         iascii;
2317   PetscViewerFormat format;
2318   Mat_MUMPS        *mumps = (Mat_MUMPS *)A->data;
2319 
2320   PetscFunctionBegin;
2321   /* check if matrix is mumps type */
2322   if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(PETSC_SUCCESS);
2323 
2324   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
2325   if (iascii) {
2326     PetscCall(PetscViewerGetFormat(viewer, &format));
2327     if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
2328       PetscCall(PetscViewerASCIIPrintf(viewer, "MUMPS run parameters:\n"));
2329       if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
2330         PetscCall(PetscViewerASCIIPrintf(viewer, "  SYM (matrix type):                   %d\n", mumps->id.sym));
2331         PetscCall(PetscViewerASCIIPrintf(viewer, "  PAR (host participation):            %d\n", mumps->id.par));
2332         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(1) (output for error):         %d\n", mumps->id.ICNTL(1)));
2333         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(2) (output of diagnostic msg): %d\n", mumps->id.ICNTL(2)));
2334         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(3) (output for global info):   %d\n", mumps->id.ICNTL(3)));
2335         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(4) (level of printing):        %d\n", mumps->id.ICNTL(4)));
2336         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(5) (input mat struct):         %d\n", mumps->id.ICNTL(5)));
2337         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(6) (matrix prescaling):        %d\n", mumps->id.ICNTL(6)));
2338         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(7) (sequential matrix ordering):%d\n", mumps->id.ICNTL(7)));
2339         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(8) (scaling strategy):         %d\n", mumps->id.ICNTL(8)));
2340         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(10) (max num of refinements):  %d\n", mumps->id.ICNTL(10)));
2341         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(11) (error analysis):          %d\n", mumps->id.ICNTL(11)));
2342         if (mumps->id.ICNTL(11) > 0) {
2343           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(4) (inf norm of input mat):        %g\n", mumps->id.RINFOG(4)));
2344           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(5) (inf norm of solution):         %g\n", mumps->id.RINFOG(5)));
2345           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(6) (inf norm of residual):         %g\n", mumps->id.RINFOG(6)));
2346           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(7),RINFOG(8) (backward error est): %g, %g\n", mumps->id.RINFOG(7), mumps->id.RINFOG(8)));
2347           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(9) (error estimate):               %g\n", mumps->id.RINFOG(9)));
2348           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n", mumps->id.RINFOG(10), mumps->id.RINFOG(11)));
2349         }
2350         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(12) (efficiency control):                         %d\n", mumps->id.ICNTL(12)));
2351         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(13) (sequential factorization of the root node):  %d\n", mumps->id.ICNTL(13)));
2352         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(14) (percentage of estimated workspace increase): %d\n", mumps->id.ICNTL(14)));
2353         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(15) (compression of the input matrix):            %d\n", mumps->id.ICNTL(15)));
2354         /* ICNTL(15-17) not used */
2355         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(18) (input mat struct):                           %d\n", mumps->id.ICNTL(18)));
2356         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(19) (Schur complement info):                      %d\n", mumps->id.ICNTL(19)));
2357         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(20) (RHS sparse pattern):                         %d\n", mumps->id.ICNTL(20)));
2358         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(21) (solution struct):                            %d\n", mumps->id.ICNTL(21)));
2359         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(22) (in-core/out-of-core facility):               %d\n", mumps->id.ICNTL(22)));
2360         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(23) (max size of memory can be allocated locally):%d\n", mumps->id.ICNTL(23)));
2361 
2362         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(24) (detection of null pivot rows):               %d\n", mumps->id.ICNTL(24)));
2363         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(25) (computation of a null space basis):          %d\n", mumps->id.ICNTL(25)));
2364         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(26) (Schur options for RHS or solution):          %d\n", mumps->id.ICNTL(26)));
2365         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(27) (blocking size for multiple RHS):             %d\n", mumps->id.ICNTL(27)));
2366         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(28) (use parallel or sequential ordering):        %d\n", mumps->id.ICNTL(28)));
2367         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(29) (parallel ordering):                          %d\n", mumps->id.ICNTL(29)));
2368 
2369         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(30) (user-specified set of entries in inv(A)):    %d\n", mumps->id.ICNTL(30)));
2370         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(31) (factors is discarded in the solve phase):    %d\n", mumps->id.ICNTL(31)));
2371         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(33) (compute determinant):                        %d\n", mumps->id.ICNTL(33)));
2372         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(35) (activate BLR based factorization):           %d\n", mumps->id.ICNTL(35)));
2373         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(36) (choice of BLR factorization variant):        %d\n", mumps->id.ICNTL(36)));
2374         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(38) (estimated compression rate of LU factors):   %d\n", mumps->id.ICNTL(38)));
2375 
2376         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(1) (relative pivoting threshold):      %g\n", mumps->id.CNTL(1)));
2377         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(2) (stopping criterion of refinement): %g\n", mumps->id.CNTL(2)));
2378         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(3) (absolute pivoting threshold):      %g\n", mumps->id.CNTL(3)));
2379         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(4) (value of static pivoting):         %g\n", mumps->id.CNTL(4)));
2380         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(5) (fixation for null pivots):         %g\n", mumps->id.CNTL(5)));
2381         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(7) (dropping parameter for BLR):       %g\n", mumps->id.CNTL(7)));
2382 
2383         /* information local to each processor */
2384         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(1) (local estimated flops for the elimination after analysis):\n"));
2385         PetscCall(PetscViewerASCIIPushSynchronized(viewer));
2386         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %g\n", mumps->myid, mumps->id.RINFO(1)));
2387         PetscCall(PetscViewerFlush(viewer));
2388         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(2) (local estimated flops for the assembly after factorization):\n"));
2389         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %g\n", mumps->myid, mumps->id.RINFO(2)));
2390         PetscCall(PetscViewerFlush(viewer));
2391         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(3) (local estimated flops for the elimination after factorization):\n"));
2392         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %g\n", mumps->myid, mumps->id.RINFO(3)));
2393         PetscCall(PetscViewerFlush(viewer));
2394 
2395         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization):\n"));
2396         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(15)));
2397         PetscCall(PetscViewerFlush(viewer));
2398 
2399         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization):\n"));
2400         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(16)));
2401         PetscCall(PetscViewerFlush(viewer));
2402 
2403         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(23) (num of pivots eliminated on this processor after factorization):\n"));
2404         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(23)));
2405         PetscCall(PetscViewerFlush(viewer));
2406 
2407         if (mumps->ninfo && mumps->ninfo <= 80) {
2408           PetscInt i;
2409           for (i = 0; i < mumps->ninfo; i++) {
2410             PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(%" PetscInt_FMT "):\n", mumps->info[i]));
2411             PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(mumps->info[i])));
2412             PetscCall(PetscViewerFlush(viewer));
2413           }
2414         }
2415         PetscCall(PetscViewerASCIIPopSynchronized(viewer));
2416       } else PetscCall(PetscViewerASCIIPrintf(viewer, "  Use -%sksp_view ::ascii_info_detail to display information for all processes\n", ((PetscObject)A)->prefix ? ((PetscObject)A)->prefix : ""));
2417 
2418       if (mumps->myid == 0) { /* information from the host */
2419         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFOG(1) (global estimated flops for the elimination after analysis): %g\n", mumps->id.RINFOG(1)));
2420         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFOG(2) (global estimated flops for the assembly after factorization): %g\n", mumps->id.RINFOG(2)));
2421         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFOG(3) (global estimated flops for the elimination after factorization): %g\n", mumps->id.RINFOG(3)));
2422         PetscCall(PetscViewerASCIIPrintf(viewer, "  (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n", mumps->id.RINFOG(12), mumps->id.RINFOG(13), mumps->id.INFOG(34)));
2423 
2424         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(3)));
2425         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(4)));
2426         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(5) (estimated maximum front size in the complete tree): %d\n", mumps->id.INFOG(5)));
2427         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(6) (number of nodes in the complete tree): %d\n", mumps->id.INFOG(6)));
2428         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(7) (ordering option effectively used after analysis): %d\n", mumps->id.INFOG(7)));
2429         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n", mumps->id.INFOG(8)));
2430         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n", mumps->id.INFOG(9)));
2431         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(10) (total integer space store the matrix factors after factorization): %d\n", mumps->id.INFOG(10)));
2432         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(11) (order of largest frontal matrix after factorization): %d\n", mumps->id.INFOG(11)));
2433         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(12) (number of off-diagonal pivots): %d\n", mumps->id.INFOG(12)));
2434         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(13) (number of delayed pivots after factorization): %d\n", mumps->id.INFOG(13)));
2435         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(14) (number of memory compress after factorization): %d\n", mumps->id.INFOG(14)));
2436         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(15) (number of steps of iterative refinement after solution): %d\n", mumps->id.INFOG(15)));
2437         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n", mumps->id.INFOG(16)));
2438         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n", mumps->id.INFOG(17)));
2439         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n", mumps->id.INFOG(18)));
2440         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n", mumps->id.INFOG(19)));
2441         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(20) (estimated number of entries in the factors): %d\n", mumps->id.INFOG(20)));
2442         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n", mumps->id.INFOG(21)));
2443         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n", mumps->id.INFOG(22)));
2444         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n", mumps->id.INFOG(23)));
2445         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n", mumps->id.INFOG(24)));
2446         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n", mumps->id.INFOG(25)));
2447         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(28) (after factorization: number of null pivots encountered): %d\n", mumps->id.INFOG(28)));
2448         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n", mumps->id.INFOG(29)));
2449         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n", mumps->id.INFOG(30), mumps->id.INFOG(31)));
2450         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(32) (after analysis: type of analysis done): %d\n", mumps->id.INFOG(32)));
2451         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(33) (value used for ICNTL(8)): %d\n", mumps->id.INFOG(33)));
2452         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(34) (exponent of the determinant if determinant is requested): %d\n", mumps->id.INFOG(34)));
2453         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n", mumps->id.INFOG(35)));
2454         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(36)));
2455         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n", mumps->id.INFOG(37)));
2456         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(38)));
2457         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n", mumps->id.INFOG(39)));
2458       }
2459     }
2460   }
2461   PetscFunctionReturn(PETSC_SUCCESS);
2462 }
2463 
2464 PetscErrorCode MatGetInfo_MUMPS(Mat A, MatInfoType flag, MatInfo *info)
2465 {
2466   Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;
2467 
2468   PetscFunctionBegin;
2469   info->block_size        = 1.0;
2470   info->nz_allocated      = mumps->id.INFOG(20);
2471   info->nz_used           = mumps->id.INFOG(20);
2472   info->nz_unneeded       = 0.0;
2473   info->assemblies        = 0.0;
2474   info->mallocs           = 0.0;
2475   info->memory            = 0.0;
2476   info->fill_ratio_given  = 0;
2477   info->fill_ratio_needed = 0;
2478   info->factor_mallocs    = 0;
2479   PetscFunctionReturn(PETSC_SUCCESS);
2480 }
2481 
2482 PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is)
2483 {
2484   Mat_MUMPS         *mumps = (Mat_MUMPS *)F->data;
2485   const PetscScalar *arr;
2486   const PetscInt    *idxs;
2487   PetscInt           size, i;
2488 
2489   PetscFunctionBegin;
2490   PetscCall(ISGetLocalSize(is, &size));
2491   /* Schur complement matrix */
2492   PetscCall(MatDestroy(&F->schur));
2493   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size, size, NULL, &F->schur));
2494   PetscCall(MatDenseGetArrayRead(F->schur, &arr));
2495   mumps->id.schur      = (MumpsScalar *)arr;
2496   mumps->id.size_schur = size;
2497   mumps->id.schur_lld  = size;
2498   PetscCall(MatDenseRestoreArrayRead(F->schur, &arr));
2499   if (mumps->sym == 1) PetscCall(MatSetOption(F->schur, MAT_SPD, PETSC_TRUE));
2500 
2501   /* MUMPS expects Fortran style indices */
2502   PetscCall(PetscFree(mumps->id.listvar_schur));
2503   PetscCall(PetscMalloc1(size, &mumps->id.listvar_schur));
2504   PetscCall(ISGetIndices(is, &idxs));
2505   for (i = 0; i < size; i++) PetscCall(PetscMUMPSIntCast(idxs[i] + 1, &(mumps->id.listvar_schur[i])));
2506   PetscCall(ISRestoreIndices(is, &idxs));
2507   /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */
2508   mumps->id.ICNTL(26) = -1;
2509   PetscFunctionReturn(PETSC_SUCCESS);
2510 }
2511 
2512 PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F, Mat *S)
2513 {
2514   Mat          St;
2515   Mat_MUMPS   *mumps = (Mat_MUMPS *)F->data;
2516   PetscScalar *array;
2517 #if defined(PETSC_USE_COMPLEX)
2518   PetscScalar im = PetscSqrtScalar((PetscScalar)-1.0);
2519 #endif
2520 
2521   PetscFunctionBegin;
2522   PetscCheck(mumps->id.ICNTL(19), PetscObjectComm((PetscObject)F), PETSC_ERR_ORDER, "Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it");
2523   PetscCall(MatCreate(PETSC_COMM_SELF, &St));
2524   PetscCall(MatSetSizes(St, PETSC_DECIDE, PETSC_DECIDE, mumps->id.size_schur, mumps->id.size_schur));
2525   PetscCall(MatSetType(St, MATDENSE));
2526   PetscCall(MatSetUp(St));
2527   PetscCall(MatDenseGetArray(St, &array));
2528   if (!mumps->sym) {                /* MUMPS always return a full matrix */
2529     if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
2530       PetscInt i, j, N = mumps->id.size_schur;
2531       for (i = 0; i < N; i++) {
2532         for (j = 0; j < N; j++) {
2533 #if !defined(PETSC_USE_COMPLEX)
2534           PetscScalar val = mumps->id.schur[i * N + j];
2535 #else
2536           PetscScalar val = mumps->id.schur[i * N + j].r + im * mumps->id.schur[i * N + j].i;
2537 #endif
2538           array[j * N + i] = val;
2539         }
2540       }
2541     } else { /* stored by columns */
2542       PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur));
2543     }
2544   } else {                          /* either full or lower-triangular (not packed) */
2545     if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */
2546       PetscInt i, j, N = mumps->id.size_schur;
2547       for (i = 0; i < N; i++) {
2548         for (j = i; j < N; j++) {
2549 #if !defined(PETSC_USE_COMPLEX)
2550           PetscScalar val = mumps->id.schur[i * N + j];
2551 #else
2552           PetscScalar val = mumps->id.schur[i * N + j].r + im * mumps->id.schur[i * N + j].i;
2553 #endif
2554           array[i * N + j] = val;
2555           array[j * N + i] = val;
2556         }
2557       }
2558     } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */
2559       PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur));
2560     } else { /* ICNTL(19) == 1 lower triangular stored by rows */
2561       PetscInt i, j, N = mumps->id.size_schur;
2562       for (i = 0; i < N; i++) {
2563         for (j = 0; j < i + 1; j++) {
2564 #if !defined(PETSC_USE_COMPLEX)
2565           PetscScalar val = mumps->id.schur[i * N + j];
2566 #else
2567           PetscScalar val = mumps->id.schur[i * N + j].r + im * mumps->id.schur[i * N + j].i;
2568 #endif
2569           array[i * N + j] = val;
2570           array[j * N + i] = val;
2571         }
2572       }
2573     }
2574   }
2575   PetscCall(MatDenseRestoreArray(St, &array));
2576   *S = St;
2577   PetscFunctionReturn(PETSC_SUCCESS);
2578 }
2579 
2580 PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt ival)
2581 {
2582   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2583 
2584   PetscFunctionBegin;
2585   if (mumps->id.job == JOB_NULL) {                                       /* need to cache icntl and ival since PetscMUMPS_c() has never been called */
2586     PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; /* number of already cached ICNTL */
2587     for (i = 0; i < nICNTL_pre; ++i)
2588       if (mumps->ICNTL_pre[1 + 2 * i] == icntl) break; /* is this ICNTL already cached? */
2589     if (i == nICNTL_pre) {                             /* not already cached */
2590       if (i > 0) PetscCall(PetscRealloc(sizeof(PetscMUMPSInt) * (2 * nICNTL_pre + 3), &mumps->ICNTL_pre));
2591       else PetscCall(PetscCalloc(sizeof(PetscMUMPSInt) * 3, &mumps->ICNTL_pre));
2592       mumps->ICNTL_pre[0]++;
2593     }
2594     mumps->ICNTL_pre[1 + 2 * i] = icntl;
2595     PetscCall(PetscMUMPSIntCast(ival, mumps->ICNTL_pre + 2 + 2 * i));
2596   } else PetscCall(PetscMUMPSIntCast(ival, &mumps->id.ICNTL(icntl)));
2597   PetscFunctionReturn(PETSC_SUCCESS);
2598 }
2599 
2600 PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt *ival)
2601 {
2602   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2603 
2604   PetscFunctionBegin;
2605   if (mumps->id.job == JOB_NULL) {
2606     PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0;
2607     *ival = 0;
2608     for (i = 0; i < nICNTL_pre; ++i) {
2609       if (mumps->ICNTL_pre[1 + 2 * i] == icntl) *ival = mumps->ICNTL_pre[2 + 2 * i];
2610     }
2611   } else *ival = mumps->id.ICNTL(icntl);
2612   PetscFunctionReturn(PETSC_SUCCESS);
2613 }
2614 
2615 /*@
2616   MatMumpsSetIcntl - Set MUMPS parameter ICNTL()
2617 
2618    Logically Collective
2619 
2620    Input Parameters:
2621 +  F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2622 .  icntl - index of MUMPS parameter array ICNTL()
2623 -  ival - value of MUMPS ICNTL(icntl)
2624 
2625   Options Database Key:
2626 .   -mat_mumps_icntl_<icntl> <ival> - change the option numbered icntl to ival
2627 
2628    Level: beginner
2629 
2630    References:
2631 .  * - MUMPS Users' Guide
2632 
2633 .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2634 @*/
2635 PetscErrorCode MatMumpsSetIcntl(Mat F, PetscInt icntl, PetscInt ival)
2636 {
2637   PetscFunctionBegin;
2638   PetscValidType(F, 1);
2639   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2640   PetscValidLogicalCollectiveInt(F, icntl, 2);
2641   PetscValidLogicalCollectiveInt(F, ival, 3);
2642   PetscCheck(icntl >= 1 && icntl <= 38, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl);
2643   PetscTryMethod(F, "MatMumpsSetIcntl_C", (Mat, PetscInt, PetscInt), (F, icntl, ival));
2644   PetscFunctionReturn(PETSC_SUCCESS);
2645 }
2646 
2647 /*@
2648   MatMumpsGetIcntl - Get MUMPS parameter ICNTL()
2649 
2650    Logically Collective
2651 
2652    Input Parameters:
2653 +  F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2654 -  icntl - index of MUMPS parameter array ICNTL()
2655 
2656   Output Parameter:
2657 .  ival - value of MUMPS ICNTL(icntl)
2658 
2659    Level: beginner
2660 
2661    References:
2662 .  * - MUMPS Users' Guide
2663 
2664 .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2665 @*/
2666 PetscErrorCode MatMumpsGetIcntl(Mat F, PetscInt icntl, PetscInt *ival)
2667 {
2668   PetscFunctionBegin;
2669   PetscValidType(F, 1);
2670   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2671   PetscValidLogicalCollectiveInt(F, icntl, 2);
2672   PetscValidIntPointer(ival, 3);
2673   PetscCheck(icntl >= 1 && icntl <= 38, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl);
2674   PetscUseMethod(F, "MatMumpsGetIcntl_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival));
2675   PetscFunctionReturn(PETSC_SUCCESS);
2676 }
2677 
2678 PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal val)
2679 {
2680   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2681 
2682   PetscFunctionBegin;
2683   if (mumps->id.job == JOB_NULL) {
2684     PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0;
2685     for (i = 0; i < nCNTL_pre; ++i)
2686       if (mumps->CNTL_pre[1 + 2 * i] == icntl) break;
2687     if (i == nCNTL_pre) {
2688       if (i > 0) PetscCall(PetscRealloc(sizeof(PetscReal) * (2 * nCNTL_pre + 3), &mumps->CNTL_pre));
2689       else PetscCall(PetscCalloc(sizeof(PetscReal) * 3, &mumps->CNTL_pre));
2690       mumps->CNTL_pre[0]++;
2691     }
2692     mumps->CNTL_pre[1 + 2 * i] = icntl;
2693     mumps->CNTL_pre[2 + 2 * i] = val;
2694   } else mumps->id.CNTL(icntl) = val;
2695   PetscFunctionReturn(PETSC_SUCCESS);
2696 }
2697 
2698 PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal *val)
2699 {
2700   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2701 
2702   PetscFunctionBegin;
2703   if (mumps->id.job == JOB_NULL) {
2704     PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0;
2705     *val = 0.0;
2706     for (i = 0; i < nCNTL_pre; ++i) {
2707       if (mumps->CNTL_pre[1 + 2 * i] == icntl) *val = mumps->CNTL_pre[2 + 2 * i];
2708     }
2709   } else *val = mumps->id.CNTL(icntl);
2710   PetscFunctionReturn(PETSC_SUCCESS);
2711 }
2712 
2713 /*@
2714   MatMumpsSetCntl - Set MUMPS parameter CNTL()
2715 
2716    Logically Collective
2717 
2718    Input Parameters:
2719 +  F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2720 .  icntl - index of MUMPS parameter array CNTL()
2721 -  val - value of MUMPS CNTL(icntl)
2722 
2723   Options Database Key:
2724 .   -mat_mumps_cntl_<icntl> <val>  - change the option numbered icntl to ival
2725 
2726    Level: beginner
2727 
2728    References:
2729 .  * - MUMPS Users' Guide
2730 
2731 .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2732 @*/
2733 PetscErrorCode MatMumpsSetCntl(Mat F, PetscInt icntl, PetscReal val)
2734 {
2735   PetscFunctionBegin;
2736   PetscValidType(F, 1);
2737   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2738   PetscValidLogicalCollectiveInt(F, icntl, 2);
2739   PetscValidLogicalCollectiveReal(F, val, 3);
2740   PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl);
2741   PetscTryMethod(F, "MatMumpsSetCntl_C", (Mat, PetscInt, PetscReal), (F, icntl, val));
2742   PetscFunctionReturn(PETSC_SUCCESS);
2743 }
2744 
2745 /*@
2746   MatMumpsGetCntl - Get MUMPS parameter CNTL()
2747 
2748    Logically Collective
2749 
2750    Input Parameters:
2751 +  F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2752 -  icntl - index of MUMPS parameter array CNTL()
2753 
2754   Output Parameter:
2755 .  val - value of MUMPS CNTL(icntl)
2756 
2757    Level: beginner
2758 
2759    References:
2760 .  * - MUMPS Users' Guide
2761 
2762 .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2763 @*/
2764 PetscErrorCode MatMumpsGetCntl(Mat F, PetscInt icntl, PetscReal *val)
2765 {
2766   PetscFunctionBegin;
2767   PetscValidType(F, 1);
2768   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2769   PetscValidLogicalCollectiveInt(F, icntl, 2);
2770   PetscValidRealPointer(val, 3);
2771   PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl);
2772   PetscUseMethod(F, "MatMumpsGetCntl_C", (Mat, PetscInt, PetscReal *), (F, icntl, val));
2773   PetscFunctionReturn(PETSC_SUCCESS);
2774 }
2775 
2776 PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F, PetscInt icntl, PetscInt *info)
2777 {
2778   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2779 
2780   PetscFunctionBegin;
2781   *info = mumps->id.INFO(icntl);
2782   PetscFunctionReturn(PETSC_SUCCESS);
2783 }
2784 
2785 PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F, PetscInt icntl, PetscInt *infog)
2786 {
2787   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2788 
2789   PetscFunctionBegin;
2790   *infog = mumps->id.INFOG(icntl);
2791   PetscFunctionReturn(PETSC_SUCCESS);
2792 }
2793 
2794 PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfo)
2795 {
2796   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2797 
2798   PetscFunctionBegin;
2799   *rinfo = mumps->id.RINFO(icntl);
2800   PetscFunctionReturn(PETSC_SUCCESS);
2801 }
2802 
2803 PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfog)
2804 {
2805   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2806 
2807   PetscFunctionBegin;
2808   *rinfog = mumps->id.RINFOG(icntl);
2809   PetscFunctionReturn(PETSC_SUCCESS);
2810 }
2811 
2812 PetscErrorCode MatMumpsGetNullPivots_MUMPS(Mat F, PetscInt *size, PetscInt **array)
2813 {
2814   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2815 
2816   PetscFunctionBegin;
2817   PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection");
2818   *size  = 0;
2819   *array = NULL;
2820   if (!mumps->myid) {
2821     *size = mumps->id.INFOG(28);
2822     PetscCall(PetscMalloc1(*size, array));
2823     for (int i = 0; i < *size; i++) (*array)[i] = mumps->id.pivnul_list[i] - 1;
2824   }
2825   PetscFunctionReturn(PETSC_SUCCESS);
2826 }
2827 
2828 PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F, Mat spRHS)
2829 {
2830   Mat          Bt = NULL, Btseq = NULL;
2831   PetscBool    flg;
2832   Mat_MUMPS   *mumps = (Mat_MUMPS *)F->data;
2833   PetscScalar *aa;
2834   PetscInt     spnr, *ia, *ja, M, nrhs;
2835 
2836   PetscFunctionBegin;
2837   PetscValidPointer(spRHS, 2);
2838   PetscCall(PetscObjectTypeCompare((PetscObject)spRHS, MATTRANSPOSEVIRTUAL, &flg));
2839   if (flg) {
2840     PetscCall(MatTransposeGetMat(spRHS, &Bt));
2841   } else SETERRQ(PetscObjectComm((PetscObject)spRHS), PETSC_ERR_ARG_WRONG, "Matrix spRHS must be type MATTRANSPOSEVIRTUAL matrix");
2842 
2843   PetscCall(MatMumpsSetIcntl(F, 30, 1));
2844 
2845   if (mumps->petsc_size > 1) {
2846     Mat_MPIAIJ *b = (Mat_MPIAIJ *)Bt->data;
2847     Btseq         = b->A;
2848   } else {
2849     Btseq = Bt;
2850   }
2851 
2852   PetscCall(MatGetSize(spRHS, &M, &nrhs));
2853   mumps->id.nrhs = nrhs;
2854   mumps->id.lrhs = M;
2855   mumps->id.rhs  = NULL;
2856 
2857   if (!mumps->myid) {
2858     PetscCall(MatSeqAIJGetArray(Btseq, &aa));
2859     PetscCall(MatGetRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
2860     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
2861     PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs));
2862     mumps->id.rhs_sparse = (MumpsScalar *)aa;
2863   } else {
2864     mumps->id.irhs_ptr    = NULL;
2865     mumps->id.irhs_sparse = NULL;
2866     mumps->id.nz_rhs      = 0;
2867     mumps->id.rhs_sparse  = NULL;
2868   }
2869   mumps->id.ICNTL(20) = 1; /* rhs is sparse */
2870   mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */
2871 
2872   /* solve phase */
2873   mumps->id.job = JOB_SOLVE;
2874   PetscMUMPS_c(mumps);
2875   PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d INFO(2)=%d", mumps->id.INFOG(1), mumps->id.INFO(2));
2876 
2877   if (!mumps->myid) {
2878     PetscCall(MatSeqAIJRestoreArray(Btseq, &aa));
2879     PetscCall(MatRestoreRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
2880     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
2881   }
2882   PetscFunctionReturn(PETSC_SUCCESS);
2883 }
2884 
2885 /*@
2886   MatMumpsGetInverse - Get user-specified set of entries in inverse of `A`
2887 
2888    Logically Collective
2889 
2890    Input Parameter:
2891 .  F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2892 
2893   Output Parameter:
2894 . spRHS - sequential sparse matrix in `MATTRANSPOSEVIRTUAL` format with requested entries of inverse of `A`
2895 
2896    Level: beginner
2897 
2898    References:
2899 .  * - MUMPS Users' Guide
2900 
2901 .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()`
2902 @*/
2903 PetscErrorCode MatMumpsGetInverse(Mat F, Mat spRHS)
2904 {
2905   PetscFunctionBegin;
2906   PetscValidType(F, 1);
2907   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2908   PetscUseMethod(F, "MatMumpsGetInverse_C", (Mat, Mat), (F, spRHS));
2909   PetscFunctionReturn(PETSC_SUCCESS);
2910 }
2911 
2912 PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F, Mat spRHST)
2913 {
2914   Mat spRHS;
2915 
2916   PetscFunctionBegin;
2917   PetscCall(MatCreateTranspose(spRHST, &spRHS));
2918   PetscCall(MatMumpsGetInverse_MUMPS(F, spRHS));
2919   PetscCall(MatDestroy(&spRHS));
2920   PetscFunctionReturn(PETSC_SUCCESS);
2921 }
2922 
2923 /*@
2924   MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix `A`^T
2925 
2926    Logically Collective
2927 
2928    Input Parameter:
2929 .  F - the factored matrix of A obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2930 
2931   Output Parameter:
2932 . spRHST - sequential sparse matrix in `MATAIJ` format containing the requested entries of inverse of `A`^T
2933 
2934    Level: beginner
2935 
2936    References:
2937 .  * - MUMPS Users' Guide
2938 
2939 .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()`, `MatMumpsGetInverse()`
2940 @*/
2941 PetscErrorCode MatMumpsGetInverseTranspose(Mat F, Mat spRHST)
2942 {
2943   PetscBool flg;
2944 
2945   PetscFunctionBegin;
2946   PetscValidType(F, 1);
2947   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2948   PetscCall(PetscObjectTypeCompareAny((PetscObject)spRHST, &flg, MATSEQAIJ, MATMPIAIJ, NULL));
2949   PetscCheck(flg, PetscObjectComm((PetscObject)spRHST), PETSC_ERR_ARG_WRONG, "Matrix spRHST must be MATAIJ matrix");
2950 
2951   PetscUseMethod(F, "MatMumpsGetInverseTranspose_C", (Mat, Mat), (F, spRHST));
2952   PetscFunctionReturn(PETSC_SUCCESS);
2953 }
2954 
2955 /*@
2956   MatMumpsGetInfo - Get MUMPS parameter INFO()
2957 
2958    Logically Collective
2959 
2960    Input Parameters:
2961 +  F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2962 -  icntl - index of MUMPS parameter array INFO()
2963 
2964   Output Parameter:
2965 .  ival - value of MUMPS INFO(icntl)
2966 
2967    Level: beginner
2968 
2969    References:
2970 .  * - MUMPS Users' Guide
2971 
2972 .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2973 @*/
2974 PetscErrorCode MatMumpsGetInfo(Mat F, PetscInt icntl, PetscInt *ival)
2975 {
2976   PetscFunctionBegin;
2977   PetscValidType(F, 1);
2978   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2979   PetscValidIntPointer(ival, 3);
2980   PetscUseMethod(F, "MatMumpsGetInfo_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival));
2981   PetscFunctionReturn(PETSC_SUCCESS);
2982 }
2983 
2984 /*@
2985   MatMumpsGetInfog - Get MUMPS parameter INFOG()
2986 
2987    Logically Collective
2988 
2989    Input Parameters:
2990 +  F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2991 -  icntl - index of MUMPS parameter array INFOG()
2992 
2993   Output Parameter:
2994 .  ival - value of MUMPS INFOG(icntl)
2995 
2996    Level: beginner
2997 
2998    References:
2999 .  * - MUMPS Users' Guide
3000 
3001 .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
3002 @*/
3003 PetscErrorCode MatMumpsGetInfog(Mat F, PetscInt icntl, PetscInt *ival)
3004 {
3005   PetscFunctionBegin;
3006   PetscValidType(F, 1);
3007   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3008   PetscValidIntPointer(ival, 3);
3009   PetscUseMethod(F, "MatMumpsGetInfog_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival));
3010   PetscFunctionReturn(PETSC_SUCCESS);
3011 }
3012 
3013 /*@
3014   MatMumpsGetRinfo - Get MUMPS parameter RINFO()
3015 
3016    Logically Collective
3017 
3018    Input Parameters:
3019 +  F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
3020 -  icntl - index of MUMPS parameter array RINFO()
3021 
3022   Output Parameter:
3023 .  val - value of MUMPS RINFO(icntl)
3024 
3025    Level: beginner
3026 
3027    References:
3028 .  * - MUMPS Users' Guide
3029 
3030 .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfog()`
3031 @*/
3032 PetscErrorCode MatMumpsGetRinfo(Mat F, PetscInt icntl, PetscReal *val)
3033 {
3034   PetscFunctionBegin;
3035   PetscValidType(F, 1);
3036   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3037   PetscValidRealPointer(val, 3);
3038   PetscUseMethod(F, "MatMumpsGetRinfo_C", (Mat, PetscInt, PetscReal *), (F, icntl, val));
3039   PetscFunctionReturn(PETSC_SUCCESS);
3040 }
3041 
3042 /*@
3043   MatMumpsGetRinfog - Get MUMPS parameter RINFOG()
3044 
3045    Logically Collective
3046 
3047    Input Parameters:
3048 +  F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
3049 -  icntl - index of MUMPS parameter array RINFOG()
3050 
3051   Output Parameter:
3052 .  val - value of MUMPS RINFOG(icntl)
3053 
3054    Level: beginner
3055 
3056    References:
3057 .  * - MUMPS Users' Guide
3058 
3059 .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`
3060 @*/
3061 PetscErrorCode MatMumpsGetRinfog(Mat F, PetscInt icntl, PetscReal *val)
3062 {
3063   PetscFunctionBegin;
3064   PetscValidType(F, 1);
3065   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3066   PetscValidRealPointer(val, 3);
3067   PetscUseMethod(F, "MatMumpsGetRinfog_C", (Mat, PetscInt, PetscReal *), (F, icntl, val));
3068   PetscFunctionReturn(PETSC_SUCCESS);
3069 }
3070 
3071 /*@
3072   MatMumpsGetNullPivots - Get MUMPS parameter PIVNUL_LIST()
3073 
3074    Logically Collective
3075 
3076    Input Parameter:
3077 .  F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
3078 
3079   Output Parameters:
3080 +  size - local size of the array. The size of the array is non-zero only on the host.
3081 -  array - array of rows with null pivot, these rows follow 0-based indexing. The array gets allocated within the function and the user is responsible
3082            for freeing this array.
3083 
3084    Level: beginner
3085 
3086    References:
3087 .  * - MUMPS Users' Guide
3088 
3089 .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`
3090 @*/
3091 PetscErrorCode MatMumpsGetNullPivots(Mat F, PetscInt *size, PetscInt **array)
3092 {
3093   PetscFunctionBegin;
3094   PetscValidType(F, 1);
3095   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3096   PetscValidIntPointer(size, 3);
3097   PetscValidPointer(array, 4);
3098   PetscUseMethod(F, "MatMumpsGetNullPivots_C", (Mat, PetscInt *, PetscInt **), (F, size, array));
3099   PetscFunctionReturn(PETSC_SUCCESS);
3100 }
3101 
3102 /*MC
3103   MATSOLVERMUMPS -  A matrix type providing direct solvers (LU and Cholesky) for
3104   distributed and sequential matrices via the external package MUMPS.
3105 
3106   Works with `MATAIJ` and `MATSBAIJ` matrices
3107 
3108   Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS
3109 
3110   Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode.
3111   See details below.
3112 
3113   Use `-pc_type cholesky` or `lu` `-pc_factor_mat_solver_type mumps` to use this direct solver
3114 
3115   Options Database Keys:
3116 +  -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages
3117 .  -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning
3118 .  -mat_mumps_icntl_3 -  ICNTL(3): output stream for global information, collected on the host
3119 .  -mat_mumps_icntl_4 -  ICNTL(4): level of printing (0 to 4)
3120 .  -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)
3121 .  -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto
3122                         Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only)
3123 .  -mat_mumps_icntl_8  - ICNTL(8): scaling strategy (-2 to 8 or 77)
3124 .  -mat_mumps_icntl_10  - ICNTL(10): max num of refinements
3125 .  -mat_mumps_icntl_11  - ICNTL(11): statistics related to an error analysis (via -ksp_view)
3126 .  -mat_mumps_icntl_12  - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)
3127 .  -mat_mumps_icntl_13  - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting
3128 .  -mat_mumps_icntl_14  - ICNTL(14): percentage increase in the estimated working space
3129 .  -mat_mumps_icntl_15  - ICNTL(15): compression of the input matrix resulting from a block format
3130 .  -mat_mumps_icntl_19  - ICNTL(19): computes the Schur complement
3131 .  -mat_mumps_icntl_20  - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS
3132 .  -mat_mumps_icntl_22  - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)
3133 .  -mat_mumps_icntl_23  - ICNTL(23): max size of the working memory (MB) that can allocate per processor
3134 .  -mat_mumps_icntl_24  - ICNTL(24): detection of null pivot rows (0 or 1)
3135 .  -mat_mumps_icntl_25  - ICNTL(25): compute a solution of a deficient matrix and a null space basis
3136 .  -mat_mumps_icntl_26  - ICNTL(26): drives the solution phase if a Schur complement matrix
3137 .  -mat_mumps_icntl_28  - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering
3138 .  -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis
3139 .  -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A)
3140 .  -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization
3141 .  -mat_mumps_icntl_33 - ICNTL(33): compute determinant
3142 .  -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature
3143 .  -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant
3144 .  -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR
3145 .  -mat_mumps_cntl_1  - CNTL(1): relative pivoting threshold
3146 .  -mat_mumps_cntl_2  -  CNTL(2): stopping criterion of refinement
3147 .  -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold
3148 .  -mat_mumps_cntl_4 - CNTL(4): value for static pivoting
3149 .  -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots
3150 .  -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization
3151 -  -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS.
3152                                    Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual.
3153 
3154   Level: beginner
3155 
3156     Notes:
3157     MUMPS Cholesky does not handle (complex) Hermitian matrices (see User's Guide at https://mumps-solver.org/index.php?page=doc) so using it will
3158     error if the matrix is Hermitian.
3159 
3160     When used within a `KSP`/`PC` solve the options are prefixed with that of the `PC`. Otherwise one can set the options prefix by calling
3161     `MatSetOptionsPrefixFactor()` on the matrix from which the factor was obtained or `MatSetOptionsPrefix()` on the factor matrix.
3162 
3163     When a MUMPS factorization fails inside a KSP solve, for example with a `KSP_DIVERGED_PC_FAILED`, one can find the MUMPS information about
3164     the failure with
3165 .vb
3166           KSPGetPC(ksp,&pc);
3167           PCFactorGetMatrix(pc,&mat);
3168           MatMumpsGetInfo(mat,....);
3169           MatMumpsGetInfog(mat,....); etc.
3170 .ve
3171     Or run with `-ksp_error_if_not_converged` and the program will be stopped and the information printed in the error message.
3172 
3173     MUMPS provides 64-bit integer support in two build modes:
3174       full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and
3175       requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI).
3176 
3177       selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices,
3178       MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and
3179       columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit
3180       integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS.
3181 
3182     With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc.
3183 
3184   Two modes to run MUMPS/PETSc with OpenMP
3185 .vb
3186      Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP
3187      threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test".
3188 .ve
3189 
3190 .vb
3191      -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example,
3192     if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16"
3193 .ve
3194 
3195    To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part
3196    (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with `--with-openmp` `--download-hwloc`
3197    (or `--with-hwloc`), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS
3198    libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS
3199    (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided).
3200 
3201    If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI
3202    processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of
3203    size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm
3204    are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set
3205    by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs.
3206    In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets,
3207    if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind
3208    MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half
3209    cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the
3210    problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding.
3211    For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and
3212    examine the mapping result.
3213 
3214    PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set `OMP_PROC_BIND` and `OMP_PLACES` in job scripts,
3215    for example, export `OMP_PLACES`=threads and export `OMP_PROC_BIND`=spread. One does not need to export `OMP_NUM_THREADS`=m in job scripts as PETSc
3216    calls `omp_set_num_threads`(m) internally before calling MUMPS.
3217 
3218    References:
3219 +  * - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011).
3220 -  * - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017.
3221 
3222 .seealso: [](chapter_matrices), `Mat`, `PCFactorSetMatSolverType()`, `MatSolverType`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`, `KSPGetPC()`, `PCFactorGetMatrix()`
3223 M*/
3224 
3225 static PetscErrorCode MatFactorGetSolverType_mumps(Mat A, MatSolverType *type)
3226 {
3227   PetscFunctionBegin;
3228   *type = MATSOLVERMUMPS;
3229   PetscFunctionReturn(PETSC_SUCCESS);
3230 }
3231 
3232 /* MatGetFactor for Seq and MPI AIJ matrices */
3233 static PetscErrorCode MatGetFactor_aij_mumps(Mat A, MatFactorType ftype, Mat *F)
3234 {
3235   Mat         B;
3236   Mat_MUMPS  *mumps;
3237   PetscBool   isSeqAIJ;
3238   PetscMPIInt size;
3239 
3240   PetscFunctionBegin;
3241 #if defined(PETSC_USE_COMPLEX)
3242   PetscCheck(A->hermitian != PETSC_BOOL3_TRUE || A->symmetric == PETSC_BOOL3_TRUE || ftype != MAT_FACTOR_CHOLESKY, PETSC_COMM_SELF, PETSC_ERR_SUP, "Hermitian CHOLESKY Factor is not supported");
3243 #endif
3244   /* Create the factorization matrix */
3245   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isSeqAIJ));
3246   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
3247   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
3248   PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name));
3249   PetscCall(MatSetUp(B));
3250 
3251   PetscCall(PetscNew(&mumps));
3252 
3253   B->ops->view    = MatView_MUMPS;
3254   B->ops->getinfo = MatGetInfo_MUMPS;
3255 
3256   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
3257   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
3258   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
3259   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
3260   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
3261   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
3262   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
3263   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
3264   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
3265   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
3266   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
3267   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
3268   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
3269   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
3270 
3271   if (ftype == MAT_FACTOR_LU) {
3272     B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
3273     B->factortype            = MAT_FACTOR_LU;
3274     if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij;
3275     else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij;
3276     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]));
3277     mumps->sym = 0;
3278   } else {
3279     B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
3280     B->factortype                  = MAT_FACTOR_CHOLESKY;
3281     if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij;
3282     else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij;
3283     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY]));
3284 #if defined(PETSC_USE_COMPLEX)
3285     mumps->sym = 2;
3286 #else
3287     if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1;
3288     else mumps->sym = 2;
3289 #endif
3290   }
3291 
3292   /* set solvertype */
3293   PetscCall(PetscFree(B->solvertype));
3294   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
3295   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
3296   if (size == 1) {
3297     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3298     B->canuseordering = PETSC_TRUE;
3299   }
3300   B->ops->destroy = MatDestroy_MUMPS;
3301   B->data         = (void *)mumps;
3302 
3303   *F               = B;
3304   mumps->id.job    = JOB_NULL;
3305   mumps->ICNTL_pre = NULL;
3306   mumps->CNTL_pre  = NULL;
3307   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
3308   PetscFunctionReturn(PETSC_SUCCESS);
3309 }
3310 
3311 /* MatGetFactor for Seq and MPI SBAIJ matrices */
3312 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A, MatFactorType ftype, Mat *F)
3313 {
3314   Mat         B;
3315   Mat_MUMPS  *mumps;
3316   PetscBool   isSeqSBAIJ;
3317   PetscMPIInt size;
3318 
3319   PetscFunctionBegin;
3320 #if defined(PETSC_USE_COMPLEX)
3321   PetscCheck(A->hermitian != PETSC_BOOL3_TRUE || A->symmetric == PETSC_BOOL3_TRUE, PETSC_COMM_SELF, PETSC_ERR_SUP, "Hermitian CHOLESKY Factor is not supported");
3322 #endif
3323   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
3324   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
3325   PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name));
3326   PetscCall(MatSetUp(B));
3327 
3328   PetscCall(PetscNew(&mumps));
3329   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &isSeqSBAIJ));
3330   if (isSeqSBAIJ) {
3331     mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij;
3332   } else {
3333     mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij;
3334   }
3335 
3336   B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
3337   B->ops->view                   = MatView_MUMPS;
3338   B->ops->getinfo                = MatGetInfo_MUMPS;
3339 
3340   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
3341   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
3342   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
3343   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
3344   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
3345   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
3346   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
3347   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
3348   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
3349   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
3350   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
3351   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
3352   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
3353   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
3354 
3355   B->factortype = MAT_FACTOR_CHOLESKY;
3356 #if defined(PETSC_USE_COMPLEX)
3357   mumps->sym = 2;
3358 #else
3359   if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1;
3360   else mumps->sym = 2;
3361 #endif
3362 
3363   /* set solvertype */
3364   PetscCall(PetscFree(B->solvertype));
3365   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
3366   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
3367   if (size == 1) {
3368     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3369     B->canuseordering = PETSC_TRUE;
3370   }
3371   PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY]));
3372   B->ops->destroy = MatDestroy_MUMPS;
3373   B->data         = (void *)mumps;
3374 
3375   *F               = B;
3376   mumps->id.job    = JOB_NULL;
3377   mumps->ICNTL_pre = NULL;
3378   mumps->CNTL_pre  = NULL;
3379   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
3380   PetscFunctionReturn(PETSC_SUCCESS);
3381 }
3382 
3383 static PetscErrorCode MatGetFactor_baij_mumps(Mat A, MatFactorType ftype, Mat *F)
3384 {
3385   Mat         B;
3386   Mat_MUMPS  *mumps;
3387   PetscBool   isSeqBAIJ;
3388   PetscMPIInt size;
3389 
3390   PetscFunctionBegin;
3391   /* Create the factorization matrix */
3392   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &isSeqBAIJ));
3393   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
3394   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
3395   PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name));
3396   PetscCall(MatSetUp(B));
3397 
3398   PetscCall(PetscNew(&mumps));
3399   if (ftype == MAT_FACTOR_LU) {
3400     B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS;
3401     B->factortype            = MAT_FACTOR_LU;
3402     if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij;
3403     else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij;
3404     mumps->sym = 0;
3405     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]));
3406   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead");
3407 
3408   B->ops->view    = MatView_MUMPS;
3409   B->ops->getinfo = MatGetInfo_MUMPS;
3410 
3411   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
3412   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
3413   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
3414   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
3415   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
3416   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
3417   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
3418   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
3419   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
3420   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
3421   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
3422   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
3423   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
3424   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
3425 
3426   /* set solvertype */
3427   PetscCall(PetscFree(B->solvertype));
3428   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
3429   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
3430   if (size == 1) {
3431     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3432     B->canuseordering = PETSC_TRUE;
3433   }
3434   B->ops->destroy = MatDestroy_MUMPS;
3435   B->data         = (void *)mumps;
3436 
3437   *F               = B;
3438   mumps->id.job    = JOB_NULL;
3439   mumps->ICNTL_pre = NULL;
3440   mumps->CNTL_pre  = NULL;
3441   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
3442   PetscFunctionReturn(PETSC_SUCCESS);
3443 }
3444 
3445 /* MatGetFactor for Seq and MPI SELL matrices */
3446 static PetscErrorCode MatGetFactor_sell_mumps(Mat A, MatFactorType ftype, Mat *F)
3447 {
3448   Mat         B;
3449   Mat_MUMPS  *mumps;
3450   PetscBool   isSeqSELL;
3451   PetscMPIInt size;
3452 
3453   PetscFunctionBegin;
3454   /* Create the factorization matrix */
3455   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSELL, &isSeqSELL));
3456   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
3457   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
3458   PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name));
3459   PetscCall(MatSetUp(B));
3460 
3461   PetscCall(PetscNew(&mumps));
3462 
3463   B->ops->view    = MatView_MUMPS;
3464   B->ops->getinfo = MatGetInfo_MUMPS;
3465 
3466   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
3467   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
3468   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
3469   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
3470   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
3471   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
3472   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
3473   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
3474   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
3475   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
3476   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
3477   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
3478 
3479   if (ftype == MAT_FACTOR_LU) {
3480     B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
3481     B->factortype            = MAT_FACTOR_LU;
3482     if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij;
3483     else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented");
3484     mumps->sym = 0;
3485     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]));
3486   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented");
3487 
3488   /* set solvertype */
3489   PetscCall(PetscFree(B->solvertype));
3490   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
3491   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
3492   if (size == 1) {
3493     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization  */
3494     B->canuseordering = PETSC_TRUE;
3495   }
3496   B->ops->destroy = MatDestroy_MUMPS;
3497   B->data         = (void *)mumps;
3498 
3499   *F               = B;
3500   mumps->id.job    = JOB_NULL;
3501   mumps->ICNTL_pre = NULL;
3502   mumps->CNTL_pre  = NULL;
3503   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
3504   PetscFunctionReturn(PETSC_SUCCESS);
3505 }
3506 
3507 /* MatGetFactor for MATNEST matrices */
3508 static PetscErrorCode MatGetFactor_nest_mumps(Mat A, MatFactorType ftype, Mat *F)
3509 {
3510   Mat         B, **mats;
3511   Mat_MUMPS  *mumps;
3512   PetscInt    nr, nc;
3513   PetscMPIInt size;
3514 
3515   PetscFunctionBegin;
3516 #if defined(PETSC_USE_COMPLEX)
3517   PetscCheck(A->hermitian != PETSC_BOOL3_TRUE || A->symmetric == PETSC_BOOL3_TRUE || ftype != MAT_FACTOR_CHOLESKY, PETSC_COMM_SELF, PETSC_ERR_SUP, "Hermitian CHOLESKY Factor is not supported");
3518 #endif
3519 
3520   /* Errors if some condition is not satisfied */
3521   PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats));
3522   if (ftype == MAT_FACTOR_CHOLESKY) {
3523     IS       *rows, *cols;
3524     PetscBool flg = PETSC_TRUE;
3525     PetscInt *m, *M;
3526 
3527     PetscCheck(nr == nc, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MAT_FACTOR_CHOLESKY not supported for nest sizes %" PetscInt_FMT " != %" PetscInt_FMT ". Use MAT_FACTOR_LU.", nr, nc);
3528     PetscCall(PetscMalloc2(nr, &rows, nc, &cols));
3529     PetscCall(MatNestGetISs(A, rows, cols));
3530     for (PetscInt r = 0; flg && r < nr; r++) PetscCall(ISEqualUnsorted(rows[r], cols[r], &flg));
3531     if (!flg) PetscCall(PetscFree2(rows, cols));
3532     PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MAT_FACTOR_CHOLESKY not supported for unequal row and column maps. Use MAT_FACTOR_LU.");
3533     PetscCall(PetscMalloc2(nr, &m, nr, &M));
3534     for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetMinMax(rows[r], &m[r], &M[r]));
3535     for (PetscInt r = 0; flg && r < nr; r++)
3536       for (PetscInt k = r + 1; flg && k < nr; k++)
3537         if ((m[k] <= m[r] && m[r] <= M[k]) || (m[k] <= M[r] && M[r] <= M[k])) flg = PETSC_FALSE;
3538     PetscCall(PetscFree2(m, M));
3539     PetscCall(PetscFree2(rows, cols));
3540     PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MAT_FACTOR_CHOLESKY not supported for intersecting row maps. Use MAT_FACTOR_LU.");
3541   }
3542 
3543   for (PetscInt r = 0; r < nr; r++) {
3544     for (PetscInt c = 0; c < nc; c++) {
3545       Mat       sub = mats[r][c];
3546       PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ;
3547 
3548       if (!sub || (ftype == MAT_FACTOR_CHOLESKY && c < r)) continue;
3549       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ));
3550       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ));
3551       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ));
3552       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ));
3553       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ));
3554       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ));
3555       if (ftype == MAT_FACTOR_CHOLESKY) {
3556         if (r == c) PetscCheck(isSeqAIJ || isMPIAIJ || isSeqSBAIJ || isMPISBAIJ, PetscObjectComm((PetscObject)sub), PETSC_ERR_SUP, "Not for diagonal block of type %s", ((PetscObject)sub)->type_name);
3557         else PetscCheck(isSeqAIJ || isMPIAIJ || isSeqBAIJ || isMPIBAIJ, PetscObjectComm((PetscObject)sub), PETSC_ERR_SUP, "Not for offdiagonal block of type %s", ((PetscObject)sub)->type_name);
3558       } else PetscCheck(isSeqAIJ || isMPIAIJ || isSeqBAIJ || isMPIBAIJ, PetscObjectComm((PetscObject)sub), PETSC_ERR_SUP, "Not for block of type %s", ((PetscObject)sub)->type_name);
3559     }
3560   }
3561 
3562   /* Create the factorization matrix */
3563   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
3564   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
3565   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name));
3566   PetscCall(MatSetUp(B));
3567 
3568   PetscCall(PetscNew(&mumps));
3569 
3570   B->ops->view    = MatView_MUMPS;
3571   B->ops->getinfo = MatGetInfo_MUMPS;
3572 
3573   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
3574   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
3575   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
3576   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
3577   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
3578   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
3579   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
3580   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
3581   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
3582   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
3583   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
3584   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
3585   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
3586   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
3587 
3588   if (ftype == MAT_FACTOR_LU) {
3589     B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
3590     B->factortype            = MAT_FACTOR_LU;
3591     mumps->sym               = 0;
3592   } else {
3593     B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
3594     B->factortype                  = MAT_FACTOR_CHOLESKY;
3595 #if defined(PETSC_USE_COMPLEX)
3596     mumps->sym = 2;
3597 #else
3598     if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1;
3599     else mumps->sym = 2;
3600 #endif
3601   }
3602   mumps->ConvertToTriples = MatConvertToTriples_nest_xaij;
3603   PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[ftype]));
3604 
3605   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
3606   if (size == 1) {
3607     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3608     B->canuseordering = PETSC_TRUE;
3609   }
3610 
3611   /* set solvertype */
3612   PetscCall(PetscFree(B->solvertype));
3613   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
3614   B->ops->destroy = MatDestroy_MUMPS;
3615   B->data         = (void *)mumps;
3616 
3617   *F               = B;
3618   mumps->id.job    = JOB_NULL;
3619   mumps->ICNTL_pre = NULL;
3620   mumps->CNTL_pre  = NULL;
3621   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
3622   PetscFunctionReturn(PETSC_SUCCESS);
3623 }
3624 
3625 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void)
3626 {
3627   PetscFunctionBegin;
3628   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
3629   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
3630   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps));
3631   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps));
3632   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPISBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps));
3633   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
3634   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
3635   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps));
3636   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps));
3637   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps));
3638   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSELL, MAT_FACTOR_LU, MatGetFactor_sell_mumps));
3639   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_LU, MatGetFactor_nest_mumps));
3640   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_CHOLESKY, MatGetFactor_nest_mumps));
3641   PetscFunctionReturn(PETSC_SUCCESS);
3642 }
3643