xref: /petsc/src/mat/impls/aij/mpi/mumps/mumps.c (revision 43cdf1ebbcbfa05bee08e48007ef1bae3f20f4e9)
1 
2 /*
3     Provides an interface to the MUMPS sparse solver
4 */
5 #include <petscpkg_version.h>
6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I  "petscmat.h"  I*/
7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
8 #include <../src/mat/impls/sell/mpi/mpisell.h>
9 
10 EXTERN_C_BEGIN
11 #if defined(PETSC_USE_COMPLEX)
12   #if defined(PETSC_USE_REAL_SINGLE)
13     #include <cmumps_c.h>
14   #else
15     #include <zmumps_c.h>
16   #endif
17 #else
18   #if defined(PETSC_USE_REAL_SINGLE)
19     #include <smumps_c.h>
20   #else
21     #include <dmumps_c.h>
22   #endif
23 #endif
24 EXTERN_C_END
25 #define JOB_INIT         -1
26 #define JOB_NULL         0
27 #define JOB_FACTSYMBOLIC 1
28 #define JOB_FACTNUMERIC  2
29 #define JOB_SOLVE        3
30 #define JOB_END          -2
31 
32 /* calls to MUMPS */
33 #if defined(PETSC_USE_COMPLEX)
34   #if defined(PETSC_USE_REAL_SINGLE)
35     #define MUMPS_c cmumps_c
36   #else
37     #define MUMPS_c zmumps_c
38   #endif
39 #else
40   #if defined(PETSC_USE_REAL_SINGLE)
41     #define MUMPS_c smumps_c
42   #else
43     #define MUMPS_c dmumps_c
44   #endif
45 #endif
46 
47 /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for
48    number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the
49    naming convention in PetscMPIInt, PetscBLASInt etc.
50 */
51 typedef MUMPS_INT PetscMUMPSInt;
52 
53 #if PETSC_PKG_MUMPS_VERSION_GE(5, 3, 0)
54   #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */
55     #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out"
56   #endif
57 #else
58   #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */
59     #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out"
60   #endif
61 #endif
62 
63 #define MPIU_MUMPSINT       MPI_INT
64 #define PETSC_MUMPS_INT_MAX 2147483647
65 #define PETSC_MUMPS_INT_MIN -2147483648
66 
67 /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/
68 static inline PetscErrorCode PetscMUMPSIntCast(PetscInt a, PetscMUMPSInt *b)
69 {
70   PetscFunctionBegin;
71 #if PetscDefined(USE_64BIT_INDICES)
72   PetscAssert(a <= PETSC_MUMPS_INT_MAX && a >= PETSC_MUMPS_INT_MIN, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt");
73 #endif
74   *b = (PetscMUMPSInt)(a);
75   PetscFunctionReturn(0);
76 }
77 
78 /* Put these utility routines here since they are only used in this file */
79 static inline PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems *PetscOptionsObject, const char opt[], const char text[], const char man[], PetscMUMPSInt currentvalue, PetscMUMPSInt *value, PetscBool *set, PetscMUMPSInt lb, PetscMUMPSInt ub)
80 {
81   PetscInt  myval;
82   PetscBool myset;
83   PetscFunctionBegin;
84   /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */
85   PetscCall(PetscOptionsInt_Private(PetscOptionsObject, opt, text, man, (PetscInt)currentvalue, &myval, &myset, lb, ub));
86   if (myset) PetscCall(PetscMUMPSIntCast(myval, value));
87   if (set) *set = myset;
88   PetscFunctionReturn(0);
89 }
90 #define PetscOptionsMUMPSInt(a, b, c, d, e, f) PetscOptionsMUMPSInt_Private(PetscOptionsObject, a, b, c, d, e, f, PETSC_MUMPS_INT_MIN, PETSC_MUMPS_INT_MAX)
91 
92 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */
93 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
94   #define PetscMUMPS_c(mumps) \
95     do { \
96       if (mumps->use_petsc_omp_support) { \
97         if (mumps->is_omp_master) { \
98           PetscCall(PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl)); \
99           MUMPS_c(&mumps->id); \
100           PetscCall(PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl)); \
101         } \
102         PetscCall(PetscOmpCtrlBarrier(mumps->omp_ctrl)); \
103         /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific      \
104          to processes, so we only Bcast info[1], an error code and leave others (since they do not have   \
105          an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82.                   \
106          omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \
107       */ \
108         PetscCallMPI(MPI_Bcast(mumps->id.infog, 40, MPIU_MUMPSINT, 0, mumps->omp_comm)); \
109         PetscCallMPI(MPI_Bcast(mumps->id.rinfog, 20, MPIU_REAL, 0, mumps->omp_comm)); \
110         PetscCallMPI(MPI_Bcast(mumps->id.info, 1, MPIU_MUMPSINT, 0, mumps->omp_comm)); \
111       } else { \
112         MUMPS_c(&mumps->id); \
113       } \
114     } while (0)
115 #else
116   #define PetscMUMPS_c(mumps) \
117     do { \
118       MUMPS_c(&mumps->id); \
119     } while (0)
120 #endif
121 
122 /* declare MumpsScalar */
123 #if defined(PETSC_USE_COMPLEX)
124   #if defined(PETSC_USE_REAL_SINGLE)
125     #define MumpsScalar mumps_complex
126   #else
127     #define MumpsScalar mumps_double_complex
128   #endif
129 #else
130   #define MumpsScalar PetscScalar
131 #endif
132 
133 /* macros s.t. indices match MUMPS documentation */
134 #define ICNTL(I)  icntl[(I)-1]
135 #define CNTL(I)   cntl[(I)-1]
136 #define INFOG(I)  infog[(I)-1]
137 #define INFO(I)   info[(I)-1]
138 #define RINFOG(I) rinfog[(I)-1]
139 #define RINFO(I)  rinfo[(I)-1]
140 
141 typedef struct Mat_MUMPS Mat_MUMPS;
142 struct Mat_MUMPS {
143 #if defined(PETSC_USE_COMPLEX)
144   #if defined(PETSC_USE_REAL_SINGLE)
145   CMUMPS_STRUC_C id;
146   #else
147   ZMUMPS_STRUC_C id;
148   #endif
149 #else
150   #if defined(PETSC_USE_REAL_SINGLE)
151   SMUMPS_STRUC_C id;
152   #else
153   DMUMPS_STRUC_C id;
154   #endif
155 #endif
156 
157   MatStructure   matstruc;
158   PetscMPIInt    myid, petsc_size;
159   PetscMUMPSInt *irn, *jcn;       /* the (i,j,v) triplets passed to mumps. */
160   PetscScalar   *val, *val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */
161   PetscInt64     nnz;             /* number of nonzeros. The type is called selective 64-bit in mumps */
162   PetscMUMPSInt  sym;
163   MPI_Comm       mumps_comm;
164   PetscMUMPSInt *ICNTL_pre;
165   PetscReal     *CNTL_pre;
166   PetscMUMPSInt  ICNTL9_pre;         /* check if ICNTL(9) is changed from previous MatSolve */
167   VecScatter     scat_rhs, scat_sol; /* used by MatSolve() */
168   PetscMUMPSInt  ICNTL20;            /* use centralized (0) or distributed (10) dense RHS */
169   PetscMUMPSInt  lrhs_loc, nloc_rhs, *irhs_loc;
170 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
171   PetscInt    *rhs_nrow, max_nrhs;
172   PetscMPIInt *rhs_recvcounts, *rhs_disps;
173   PetscScalar *rhs_loc, *rhs_recvbuf;
174 #endif
175   Vec            b_seq, x_seq;
176   PetscInt       ninfo, *info; /* which INFO to display */
177   PetscInt       sizeredrhs;
178   PetscScalar   *schur_sol;
179   PetscInt       schur_sizesol;
180   PetscMUMPSInt *ia_alloc, *ja_alloc; /* work arrays used for the CSR struct for sparse rhs */
181   PetscInt64     cur_ilen, cur_jlen;  /* current len of ia_alloc[], ja_alloc[] */
182   PetscErrorCode (*ConvertToTriples)(Mat, PetscInt, MatReuse, Mat_MUMPS *);
183 
184   /* stuff used by petsc/mumps OpenMP support*/
185   PetscBool    use_petsc_omp_support;
186   PetscOmpCtrl omp_ctrl;             /* an OpenMP controler that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */
187   MPI_Comm     petsc_comm, omp_comm; /* petsc_comm is petsc matrix's comm */
188   PetscInt64  *recvcount;            /* a collection of nnz on omp_master */
189   PetscMPIInt  tag, omp_comm_size;
190   PetscBool    is_omp_master; /* is this rank the master of omp_comm */
191   MPI_Request *reqs;
192 };
193 
194 /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt.
195    Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices.
196  */
197 static PetscErrorCode PetscMUMPSIntCSRCast(Mat_MUMPS *mumps, PetscInt nrow, PetscInt *ia, PetscInt *ja, PetscMUMPSInt **ia_mumps, PetscMUMPSInt **ja_mumps, PetscMUMPSInt *nnz_mumps)
198 {
199   PetscInt nnz = ia[nrow] - 1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscInt64 since mumps only uses PetscMUMPSInt for rhs */
200 
201   PetscFunctionBegin;
202 #if defined(PETSC_USE_64BIT_INDICES)
203   {
204     PetscInt i;
205     if (nrow + 1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */
206       PetscCall(PetscFree(mumps->ia_alloc));
207       PetscCall(PetscMalloc1(nrow + 1, &mumps->ia_alloc));
208       mumps->cur_ilen = nrow + 1;
209     }
210     if (nnz > mumps->cur_jlen) {
211       PetscCall(PetscFree(mumps->ja_alloc));
212       PetscCall(PetscMalloc1(nnz, &mumps->ja_alloc));
213       mumps->cur_jlen = nnz;
214     }
215     for (i = 0; i < nrow + 1; i++) PetscCall(PetscMUMPSIntCast(ia[i], &(mumps->ia_alloc[i])));
216     for (i = 0; i < nnz; i++) PetscCall(PetscMUMPSIntCast(ja[i], &(mumps->ja_alloc[i])));
217     *ia_mumps = mumps->ia_alloc;
218     *ja_mumps = mumps->ja_alloc;
219   }
220 #else
221   *ia_mumps          = ia;
222   *ja_mumps          = ja;
223 #endif
224   PetscCall(PetscMUMPSIntCast(nnz, nnz_mumps));
225   PetscFunctionReturn(0);
226 }
227 
228 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS *mumps)
229 {
230   PetscFunctionBegin;
231   PetscCall(PetscFree(mumps->id.listvar_schur));
232   PetscCall(PetscFree(mumps->id.redrhs));
233   PetscCall(PetscFree(mumps->schur_sol));
234   mumps->id.size_schur = 0;
235   mumps->id.schur_lld  = 0;
236   mumps->id.ICNTL(19)  = 0;
237   PetscFunctionReturn(0);
238 }
239 
240 /* solve with rhs in mumps->id.redrhs and return in the same location */
241 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F)
242 {
243   Mat_MUMPS           *mumps = (Mat_MUMPS *)F->data;
244   Mat                  S, B, X;
245   MatFactorSchurStatus schurstatus;
246   PetscInt             sizesol;
247 
248   PetscFunctionBegin;
249   PetscCall(MatFactorFactorizeSchurComplement(F));
250   PetscCall(MatFactorGetSchurComplement(F, &S, &schurstatus));
251   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &B));
252   PetscCall(MatSetType(B, ((PetscObject)S)->type_name));
253 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
254   PetscCall(MatBindToCPU(B, S->boundtocpu));
255 #endif
256   switch (schurstatus) {
257   case MAT_FACTOR_SCHUR_FACTORED:
258     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &X));
259     PetscCall(MatSetType(X, ((PetscObject)S)->type_name));
260 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
261     PetscCall(MatBindToCPU(X, S->boundtocpu));
262 #endif
263     if (!mumps->id.ICNTL(9)) { /* transpose solve */
264       PetscCall(MatMatSolveTranspose(S, B, X));
265     } else {
266       PetscCall(MatMatSolve(S, B, X));
267     }
268     break;
269   case MAT_FACTOR_SCHUR_INVERTED:
270     sizesol = mumps->id.nrhs * mumps->id.size_schur;
271     if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) {
272       PetscCall(PetscFree(mumps->schur_sol));
273       PetscCall(PetscMalloc1(sizesol, &mumps->schur_sol));
274       mumps->schur_sizesol = sizesol;
275     }
276     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->schur_sol, &X));
277     PetscCall(MatSetType(X, ((PetscObject)S)->type_name));
278 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
279     PetscCall(MatBindToCPU(X, S->boundtocpu));
280 #endif
281     PetscCall(MatProductCreateWithMat(S, B, NULL, X));
282     if (!mumps->id.ICNTL(9)) { /* transpose solve */
283       PetscCall(MatProductSetType(X, MATPRODUCT_AtB));
284     } else {
285       PetscCall(MatProductSetType(X, MATPRODUCT_AB));
286     }
287     PetscCall(MatProductSetFromOptions(X));
288     PetscCall(MatProductSymbolic(X));
289     PetscCall(MatProductNumeric(X));
290 
291     PetscCall(MatCopy(X, B, SAME_NONZERO_PATTERN));
292     break;
293   default:
294     SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unhandled MatFactorSchurStatus %d", F->schur_status);
295   }
296   PetscCall(MatFactorRestoreSchurComplement(F, &S, schurstatus));
297   PetscCall(MatDestroy(&B));
298   PetscCall(MatDestroy(&X));
299   PetscFunctionReturn(0);
300 }
301 
302 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion)
303 {
304   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
305 
306   PetscFunctionBegin;
307   if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */
308     PetscFunctionReturn(0);
309   }
310   if (!expansion) { /* prepare for the condensation step */
311     PetscInt sizeredrhs = mumps->id.nrhs * mumps->id.size_schur;
312     /* allocate MUMPS internal array to store reduced right-hand sides */
313     if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) {
314       PetscCall(PetscFree(mumps->id.redrhs));
315       mumps->id.lredrhs = mumps->id.size_schur;
316       PetscCall(PetscMalloc1(mumps->id.nrhs * mumps->id.lredrhs, &mumps->id.redrhs));
317       mumps->sizeredrhs = mumps->id.nrhs * mumps->id.lredrhs;
318     }
319     mumps->id.ICNTL(26) = 1; /* condensation phase */
320   } else {                   /* prepare for the expansion step */
321     /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */
322     PetscCall(MatMumpsSolveSchur_Private(F));
323     mumps->id.ICNTL(26) = 2; /* expansion phase */
324     PetscMUMPS_c(mumps);
325     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1));
326     /* restore defaults */
327     mumps->id.ICNTL(26) = -1;
328     /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */
329     if (mumps->id.nrhs > 1) {
330       PetscCall(PetscFree(mumps->id.redrhs));
331       mumps->id.lredrhs = 0;
332       mumps->sizeredrhs = 0;
333     }
334   }
335   PetscFunctionReturn(0);
336 }
337 
338 /*
339   MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz]
340 
341   input:
342     A       - matrix in aij,baij or sbaij format
343     shift   - 0: C style output triple; 1: Fortran style output triple.
344     reuse   - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple
345               MAT_REUSE_MATRIX:   only the values in v array are updated
346   output:
347     nnz     - dim of r, c, and v (number of local nonzero entries of A)
348     r, c, v - row and col index, matrix values (matrix triples)
349 
350   The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is
351   freed with PetscFree(mumps->irn);  This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means
352   that the PetscMalloc() cannot easily be replaced with a PetscMalloc3().
353 
354  */
355 
356 PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
357 {
358   const PetscScalar *av;
359   const PetscInt    *ai, *aj, *ajj, M = A->rmap->n;
360   PetscInt64         nz, rnz, i, j, k;
361   PetscMUMPSInt     *row, *col;
362   Mat_SeqAIJ        *aa = (Mat_SeqAIJ *)A->data;
363 
364   PetscFunctionBegin;
365   PetscCall(MatSeqAIJGetArrayRead(A, &av));
366   mumps->val = (PetscScalar *)av;
367   if (reuse == MAT_INITIAL_MATRIX) {
368     nz = aa->nz;
369     ai = aa->i;
370     aj = aa->j;
371     PetscCall(PetscMalloc2(nz, &row, nz, &col));
372     for (i = k = 0; i < M; i++) {
373       rnz = ai[i + 1] - ai[i];
374       ajj = aj + ai[i];
375       for (j = 0; j < rnz; j++) {
376         PetscCall(PetscMUMPSIntCast(i + shift, &row[k]));
377         PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[k]));
378         k++;
379       }
380     }
381     mumps->irn = row;
382     mumps->jcn = col;
383     mumps->nnz = nz;
384   }
385   PetscCall(MatSeqAIJRestoreArrayRead(A, &av));
386   PetscFunctionReturn(0);
387 }
388 
389 PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
390 {
391   PetscInt64     nz, i, j, k, r;
392   Mat_SeqSELL   *a = (Mat_SeqSELL *)A->data;
393   PetscMUMPSInt *row, *col;
394 
395   PetscFunctionBegin;
396   mumps->val = a->val;
397   if (reuse == MAT_INITIAL_MATRIX) {
398     nz = a->sliidx[a->totalslices];
399     PetscCall(PetscMalloc2(nz, &row, nz, &col));
400     for (i = k = 0; i < a->totalslices; i++) {
401       for (j = a->sliidx[i], r = 0; j < a->sliidx[i + 1]; j++, r = ((r + 1) & 0x07)) PetscCall(PetscMUMPSIntCast(8 * i + r + shift, &row[k++]));
402     }
403     for (i = 0; i < nz; i++) PetscCall(PetscMUMPSIntCast(a->colidx[i] + shift, &col[i]));
404     mumps->irn = row;
405     mumps->jcn = col;
406     mumps->nnz = nz;
407   }
408   PetscFunctionReturn(0);
409 }
410 
411 PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
412 {
413   Mat_SeqBAIJ    *aa = (Mat_SeqBAIJ *)A->data;
414   const PetscInt *ai, *aj, *ajj, bs2 = aa->bs2;
415   PetscInt64      M, nz, idx = 0, rnz, i, j, k, m;
416   PetscInt        bs;
417   PetscMUMPSInt  *row, *col;
418 
419   PetscFunctionBegin;
420   PetscCall(MatGetBlockSize(A, &bs));
421   M          = A->rmap->N / bs;
422   mumps->val = aa->a;
423   if (reuse == MAT_INITIAL_MATRIX) {
424     ai = aa->i;
425     aj = aa->j;
426     nz = bs2 * aa->nz;
427     PetscCall(PetscMalloc2(nz, &row, nz, &col));
428     for (i = 0; i < M; i++) {
429       ajj = aj + ai[i];
430       rnz = ai[i + 1] - ai[i];
431       for (k = 0; k < rnz; k++) {
432         for (j = 0; j < bs; j++) {
433           for (m = 0; m < bs; m++) {
434             PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[idx]));
435             PetscCall(PetscMUMPSIntCast(bs * ajj[k] + j + shift, &col[idx]));
436             idx++;
437           }
438         }
439       }
440     }
441     mumps->irn = row;
442     mumps->jcn = col;
443     mumps->nnz = nz;
444   }
445   PetscFunctionReturn(0);
446 }
447 
448 PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
449 {
450   const PetscInt *ai, *aj, *ajj;
451   PetscInt        bs;
452   PetscInt64      nz, rnz, i, j, k, m;
453   PetscMUMPSInt  *row, *col;
454   PetscScalar    *val;
455   Mat_SeqSBAIJ   *aa  = (Mat_SeqSBAIJ *)A->data;
456   const PetscInt  bs2 = aa->bs2, mbs = aa->mbs;
457 #if defined(PETSC_USE_COMPLEX)
458   PetscBool isset, hermitian;
459 #endif
460 
461   PetscFunctionBegin;
462 #if defined(PETSC_USE_COMPLEX)
463   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
464   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
465 #endif
466   ai = aa->i;
467   aj = aa->j;
468   PetscCall(MatGetBlockSize(A, &bs));
469   if (reuse == MAT_INITIAL_MATRIX) {
470     nz = aa->nz;
471     PetscCall(PetscMalloc2(bs2 * nz, &row, bs2 * nz, &col));
472     if (bs > 1) {
473       PetscCall(PetscMalloc1(bs2 * nz, &mumps->val_alloc));
474       mumps->val = mumps->val_alloc;
475     } else {
476       mumps->val = aa->a;
477     }
478     mumps->irn = row;
479     mumps->jcn = col;
480   } else {
481     if (bs == 1) mumps->val = aa->a;
482     row = mumps->irn;
483     col = mumps->jcn;
484   }
485   val = mumps->val;
486 
487   nz = 0;
488   if (bs > 1) {
489     for (i = 0; i < mbs; i++) {
490       rnz = ai[i + 1] - ai[i];
491       ajj = aj + ai[i];
492       for (j = 0; j < rnz; j++) {
493         for (k = 0; k < bs; k++) {
494           for (m = 0; m < bs; m++) {
495             if (ajj[j] > i || k >= m) {
496               if (reuse == MAT_INITIAL_MATRIX) {
497                 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[nz]));
498                 PetscCall(PetscMUMPSIntCast(ajj[j] * bs + k + shift, &col[nz]));
499               }
500               val[nz++] = aa->a[(ai[i] + j) * bs2 + m + k * bs];
501             }
502           }
503         }
504       }
505     }
506   } else if (reuse == MAT_INITIAL_MATRIX) {
507     for (i = 0; i < mbs; i++) {
508       rnz = ai[i + 1] - ai[i];
509       ajj = aj + ai[i];
510       for (j = 0; j < rnz; j++) {
511         PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
512         PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz]));
513         nz++;
514       }
515     }
516     PetscCheck(nz == aa->nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different numbers of nonzeros %" PetscInt64_FMT " != %" PetscInt_FMT, nz, aa->nz);
517   }
518   if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz;
519   PetscFunctionReturn(0);
520 }
521 
522 PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
523 {
524   const PetscInt    *ai, *aj, *ajj, *adiag, M = A->rmap->n;
525   PetscInt64         nz, rnz, i, j;
526   const PetscScalar *av, *v1;
527   PetscScalar       *val;
528   PetscMUMPSInt     *row, *col;
529   Mat_SeqAIJ        *aa = (Mat_SeqAIJ *)A->data;
530   PetscBool          missing;
531 #if defined(PETSC_USE_COMPLEX)
532   PetscBool hermitian, isset;
533 #endif
534 
535   PetscFunctionBegin;
536 #if defined(PETSC_USE_COMPLEX)
537   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
538   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
539 #endif
540   PetscCall(MatSeqAIJGetArrayRead(A, &av));
541   ai    = aa->i;
542   aj    = aa->j;
543   adiag = aa->diag;
544   PetscCall(MatMissingDiagonal_SeqAIJ(A, &missing, NULL));
545   if (reuse == MAT_INITIAL_MATRIX) {
546     /* count nz in the upper triangular part of A */
547     nz = 0;
548     if (missing) {
549       for (i = 0; i < M; i++) {
550         if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
551           for (j = ai[i]; j < ai[i + 1]; j++) {
552             if (aj[j] < i) continue;
553             nz++;
554           }
555         } else {
556           nz += ai[i + 1] - adiag[i];
557         }
558       }
559     } else {
560       for (i = 0; i < M; i++) nz += ai[i + 1] - adiag[i];
561     }
562     PetscCall(PetscMalloc2(nz, &row, nz, &col));
563     PetscCall(PetscMalloc1(nz, &val));
564     mumps->nnz = nz;
565     mumps->irn = row;
566     mumps->jcn = col;
567     mumps->val = mumps->val_alloc = val;
568 
569     nz = 0;
570     if (missing) {
571       for (i = 0; i < M; i++) {
572         if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
573           for (j = ai[i]; j < ai[i + 1]; j++) {
574             if (aj[j] < i) continue;
575             PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
576             PetscCall(PetscMUMPSIntCast(aj[j] + shift, &col[nz]));
577             val[nz] = av[j];
578             nz++;
579           }
580         } else {
581           rnz = ai[i + 1] - adiag[i];
582           ajj = aj + adiag[i];
583           v1  = av + adiag[i];
584           for (j = 0; j < rnz; j++) {
585             PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
586             PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz]));
587             val[nz++] = v1[j];
588           }
589         }
590       }
591     } else {
592       for (i = 0; i < M; i++) {
593         rnz = ai[i + 1] - adiag[i];
594         ajj = aj + adiag[i];
595         v1  = av + adiag[i];
596         for (j = 0; j < rnz; j++) {
597           PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
598           PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz]));
599           val[nz++] = v1[j];
600         }
601       }
602     }
603   } else {
604     nz  = 0;
605     val = mumps->val;
606     if (missing) {
607       for (i = 0; i < M; i++) {
608         if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
609           for (j = ai[i]; j < ai[i + 1]; j++) {
610             if (aj[j] < i) continue;
611             val[nz++] = av[j];
612           }
613         } else {
614           rnz = ai[i + 1] - adiag[i];
615           v1  = av + adiag[i];
616           for (j = 0; j < rnz; j++) val[nz++] = v1[j];
617         }
618       }
619     } else {
620       for (i = 0; i < M; i++) {
621         rnz = ai[i + 1] - adiag[i];
622         v1  = av + adiag[i];
623         for (j = 0; j < rnz; j++) val[nz++] = v1[j];
624       }
625     }
626   }
627   PetscCall(MatSeqAIJRestoreArrayRead(A, &av));
628   PetscFunctionReturn(0);
629 }
630 
631 PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
632 {
633   const PetscInt    *ai, *aj, *bi, *bj, *garray, *ajj, *bjj;
634   PetscInt           bs;
635   PetscInt64         rstart, nz, i, j, k, m, jj, irow, countA, countB;
636   PetscMUMPSInt     *row, *col;
637   const PetscScalar *av, *bv, *v1, *v2;
638   PetscScalar       *val;
639   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ *)A->data;
640   Mat_SeqSBAIJ      *aa  = (Mat_SeqSBAIJ *)(mat->A)->data;
641   Mat_SeqBAIJ       *bb  = (Mat_SeqBAIJ *)(mat->B)->data;
642   const PetscInt     bs2 = aa->bs2, mbs = aa->mbs;
643 #if defined(PETSC_USE_COMPLEX)
644   PetscBool hermitian, isset;
645 #endif
646 
647   PetscFunctionBegin;
648 #if defined(PETSC_USE_COMPLEX)
649   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
650   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
651 #endif
652   PetscCall(MatGetBlockSize(A, &bs));
653   rstart = A->rmap->rstart;
654   ai     = aa->i;
655   aj     = aa->j;
656   bi     = bb->i;
657   bj     = bb->j;
658   av     = aa->a;
659   bv     = bb->a;
660 
661   garray = mat->garray;
662 
663   if (reuse == MAT_INITIAL_MATRIX) {
664     nz = (aa->nz + bb->nz) * bs2; /* just a conservative estimate */
665     PetscCall(PetscMalloc2(nz, &row, nz, &col));
666     PetscCall(PetscMalloc1(nz, &val));
667     /* can not decide the exact mumps->nnz now because of the SBAIJ */
668     mumps->irn = row;
669     mumps->jcn = col;
670     mumps->val = mumps->val_alloc = val;
671   } else {
672     val = mumps->val;
673   }
674 
675   jj   = 0;
676   irow = rstart;
677   for (i = 0; i < mbs; i++) {
678     ajj    = aj + ai[i]; /* ptr to the beginning of this row */
679     countA = ai[i + 1] - ai[i];
680     countB = bi[i + 1] - bi[i];
681     bjj    = bj + bi[i];
682     v1     = av + ai[i] * bs2;
683     v2     = bv + bi[i] * bs2;
684 
685     if (bs > 1) {
686       /* A-part */
687       for (j = 0; j < countA; j++) {
688         for (k = 0; k < bs; k++) {
689           for (m = 0; m < bs; m++) {
690             if (rstart + ajj[j] * bs > irow || k >= m) {
691               if (reuse == MAT_INITIAL_MATRIX) {
692                 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj]));
693                 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] * bs + k + shift, &col[jj]));
694               }
695               val[jj++] = v1[j * bs2 + m + k * bs];
696             }
697           }
698         }
699       }
700 
701       /* B-part */
702       for (j = 0; j < countB; j++) {
703         for (k = 0; k < bs; k++) {
704           for (m = 0; m < bs; m++) {
705             if (reuse == MAT_INITIAL_MATRIX) {
706               PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj]));
707               PetscCall(PetscMUMPSIntCast(garray[bjj[j]] * bs + k + shift, &col[jj]));
708             }
709             val[jj++] = v2[j * bs2 + m + k * bs];
710           }
711         }
712       }
713     } else {
714       /* A-part */
715       for (j = 0; j < countA; j++) {
716         if (reuse == MAT_INITIAL_MATRIX) {
717           PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
718           PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj]));
719         }
720         val[jj++] = v1[j];
721       }
722 
723       /* B-part */
724       for (j = 0; j < countB; j++) {
725         if (reuse == MAT_INITIAL_MATRIX) {
726           PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
727           PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]));
728         }
729         val[jj++] = v2[j];
730       }
731     }
732     irow += bs;
733   }
734   mumps->nnz = jj;
735   PetscFunctionReturn(0);
736 }
737 
738 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
739 {
740   const PetscInt    *ai, *aj, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj;
741   PetscInt64         rstart, nz, i, j, jj, irow, countA, countB;
742   PetscMUMPSInt     *row, *col;
743   const PetscScalar *av, *bv, *v1, *v2;
744   PetscScalar       *val;
745   Mat                Ad, Ao;
746   Mat_SeqAIJ        *aa;
747   Mat_SeqAIJ        *bb;
748 
749   PetscFunctionBegin;
750   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray));
751   PetscCall(MatSeqAIJGetArrayRead(Ad, &av));
752   PetscCall(MatSeqAIJGetArrayRead(Ao, &bv));
753 
754   aa = (Mat_SeqAIJ *)(Ad)->data;
755   bb = (Mat_SeqAIJ *)(Ao)->data;
756   ai = aa->i;
757   aj = aa->j;
758   bi = bb->i;
759   bj = bb->j;
760 
761   rstart = A->rmap->rstart;
762 
763   if (reuse == MAT_INITIAL_MATRIX) {
764     nz = (PetscInt64)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */
765     PetscCall(PetscMalloc2(nz, &row, nz, &col));
766     PetscCall(PetscMalloc1(nz, &val));
767     mumps->nnz = nz;
768     mumps->irn = row;
769     mumps->jcn = col;
770     mumps->val = mumps->val_alloc = val;
771   } else {
772     val = mumps->val;
773   }
774 
775   jj   = 0;
776   irow = rstart;
777   for (i = 0; i < m; i++) {
778     ajj    = aj + ai[i]; /* ptr to the beginning of this row */
779     countA = ai[i + 1] - ai[i];
780     countB = bi[i + 1] - bi[i];
781     bjj    = bj + bi[i];
782     v1     = av + ai[i];
783     v2     = bv + bi[i];
784 
785     /* A-part */
786     for (j = 0; j < countA; j++) {
787       if (reuse == MAT_INITIAL_MATRIX) {
788         PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
789         PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj]));
790       }
791       val[jj++] = v1[j];
792     }
793 
794     /* B-part */
795     for (j = 0; j < countB; j++) {
796       if (reuse == MAT_INITIAL_MATRIX) {
797         PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
798         PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]));
799       }
800       val[jj++] = v2[j];
801     }
802     irow++;
803   }
804   PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av));
805   PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv));
806   PetscFunctionReturn(0);
807 }
808 
809 PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
810 {
811   Mat_MPIBAIJ       *mat = (Mat_MPIBAIJ *)A->data;
812   Mat_SeqBAIJ       *aa  = (Mat_SeqBAIJ *)(mat->A)->data;
813   Mat_SeqBAIJ       *bb  = (Mat_SeqBAIJ *)(mat->B)->data;
814   const PetscInt    *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j, *ajj, *bjj;
815   const PetscInt    *garray = mat->garray, mbs = mat->mbs, rstart = A->rmap->rstart;
816   const PetscInt     bs2 = mat->bs2;
817   PetscInt           bs;
818   PetscInt64         nz, i, j, k, n, jj, irow, countA, countB, idx;
819   PetscMUMPSInt     *row, *col;
820   const PetscScalar *av = aa->a, *bv = bb->a, *v1, *v2;
821   PetscScalar       *val;
822 
823   PetscFunctionBegin;
824   PetscCall(MatGetBlockSize(A, &bs));
825   if (reuse == MAT_INITIAL_MATRIX) {
826     nz = bs2 * (aa->nz + bb->nz);
827     PetscCall(PetscMalloc2(nz, &row, nz, &col));
828     PetscCall(PetscMalloc1(nz, &val));
829     mumps->nnz = nz;
830     mumps->irn = row;
831     mumps->jcn = col;
832     mumps->val = mumps->val_alloc = val;
833   } else {
834     val = mumps->val;
835   }
836 
837   jj   = 0;
838   irow = rstart;
839   for (i = 0; i < mbs; i++) {
840     countA = ai[i + 1] - ai[i];
841     countB = bi[i + 1] - bi[i];
842     ajj    = aj + ai[i];
843     bjj    = bj + bi[i];
844     v1     = av + bs2 * ai[i];
845     v2     = bv + bs2 * bi[i];
846 
847     idx = 0;
848     /* A-part */
849     for (k = 0; k < countA; k++) {
850       for (j = 0; j < bs; j++) {
851         for (n = 0; n < bs; n++) {
852           if (reuse == MAT_INITIAL_MATRIX) {
853             PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj]));
854             PetscCall(PetscMUMPSIntCast(rstart + bs * ajj[k] + j + shift, &col[jj]));
855           }
856           val[jj++] = v1[idx++];
857         }
858       }
859     }
860 
861     idx = 0;
862     /* B-part */
863     for (k = 0; k < countB; k++) {
864       for (j = 0; j < bs; j++) {
865         for (n = 0; n < bs; n++) {
866           if (reuse == MAT_INITIAL_MATRIX) {
867             PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj]));
868             PetscCall(PetscMUMPSIntCast(bs * garray[bjj[k]] + j + shift, &col[jj]));
869           }
870           val[jj++] = v2[idx++];
871         }
872       }
873     }
874     irow += bs;
875   }
876   PetscFunctionReturn(0);
877 }
878 
879 PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
880 {
881   const PetscInt    *ai, *aj, *adiag, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj;
882   PetscInt64         rstart, nz, nza, nzb, i, j, jj, irow, countA, countB;
883   PetscMUMPSInt     *row, *col;
884   const PetscScalar *av, *bv, *v1, *v2;
885   PetscScalar       *val;
886   Mat                Ad, Ao;
887   Mat_SeqAIJ        *aa;
888   Mat_SeqAIJ        *bb;
889 #if defined(PETSC_USE_COMPLEX)
890   PetscBool hermitian, isset;
891 #endif
892 
893   PetscFunctionBegin;
894 #if defined(PETSC_USE_COMPLEX)
895   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
896   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
897 #endif
898   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray));
899   PetscCall(MatSeqAIJGetArrayRead(Ad, &av));
900   PetscCall(MatSeqAIJGetArrayRead(Ao, &bv));
901 
902   aa    = (Mat_SeqAIJ *)(Ad)->data;
903   bb    = (Mat_SeqAIJ *)(Ao)->data;
904   ai    = aa->i;
905   aj    = aa->j;
906   adiag = aa->diag;
907   bi    = bb->i;
908   bj    = bb->j;
909 
910   rstart = A->rmap->rstart;
911 
912   if (reuse == MAT_INITIAL_MATRIX) {
913     nza = 0; /* num of upper triangular entries in mat->A, including diagonals */
914     nzb = 0; /* num of upper triangular entries in mat->B */
915     for (i = 0; i < m; i++) {
916       nza += (ai[i + 1] - adiag[i]);
917       countB = bi[i + 1] - bi[i];
918       bjj    = bj + bi[i];
919       for (j = 0; j < countB; j++) {
920         if (garray[bjj[j]] > rstart) nzb++;
921       }
922     }
923 
924     nz = nza + nzb; /* total nz of upper triangular part of mat */
925     PetscCall(PetscMalloc2(nz, &row, nz, &col));
926     PetscCall(PetscMalloc1(nz, &val));
927     mumps->nnz = nz;
928     mumps->irn = row;
929     mumps->jcn = col;
930     mumps->val = mumps->val_alloc = val;
931   } else {
932     val = mumps->val;
933   }
934 
935   jj   = 0;
936   irow = rstart;
937   for (i = 0; i < m; i++) {
938     ajj    = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */
939     v1     = av + adiag[i];
940     countA = ai[i + 1] - adiag[i];
941     countB = bi[i + 1] - bi[i];
942     bjj    = bj + bi[i];
943     v2     = bv + bi[i];
944 
945     /* A-part */
946     for (j = 0; j < countA; j++) {
947       if (reuse == MAT_INITIAL_MATRIX) {
948         PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
949         PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj]));
950       }
951       val[jj++] = v1[j];
952     }
953 
954     /* B-part */
955     for (j = 0; j < countB; j++) {
956       if (garray[bjj[j]] > rstart) {
957         if (reuse == MAT_INITIAL_MATRIX) {
958           PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
959           PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]));
960         }
961         val[jj++] = v2[j];
962       }
963     }
964     irow++;
965   }
966   PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av));
967   PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv));
968   PetscFunctionReturn(0);
969 }
970 
971 PetscErrorCode MatDestroy_MUMPS(Mat A)
972 {
973   Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;
974 
975   PetscFunctionBegin;
976   PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc));
977   PetscCall(VecScatterDestroy(&mumps->scat_rhs));
978   PetscCall(VecScatterDestroy(&mumps->scat_sol));
979   PetscCall(VecDestroy(&mumps->b_seq));
980   PetscCall(VecDestroy(&mumps->x_seq));
981   PetscCall(PetscFree(mumps->id.perm_in));
982   PetscCall(PetscFree2(mumps->irn, mumps->jcn));
983   PetscCall(PetscFree(mumps->val_alloc));
984   PetscCall(PetscFree(mumps->info));
985   PetscCall(PetscFree(mumps->ICNTL_pre));
986   PetscCall(PetscFree(mumps->CNTL_pre));
987   PetscCall(MatMumpsResetSchur_Private(mumps));
988   if (mumps->id.job != JOB_NULL) { /* cannot call PetscMUMPS_c() if JOB_INIT has never been called for this instance */
989     mumps->id.job = JOB_END;
990     PetscMUMPS_c(mumps);
991     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in MatDestroy_MUMPS: INFOG(1)=%d", mumps->id.INFOG(1));
992     if (mumps->mumps_comm != MPI_COMM_NULL) {
993       if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) PetscCallMPI(MPI_Comm_free(&mumps->mumps_comm));
994       else PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A), &mumps->mumps_comm));
995     }
996   }
997 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
998   if (mumps->use_petsc_omp_support) {
999     PetscCall(PetscOmpCtrlDestroy(&mumps->omp_ctrl));
1000     PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf));
1001     PetscCall(PetscFree3(mumps->rhs_nrow, mumps->rhs_recvcounts, mumps->rhs_disps));
1002   }
1003 #endif
1004   PetscCall(PetscFree(mumps->ia_alloc));
1005   PetscCall(PetscFree(mumps->ja_alloc));
1006   PetscCall(PetscFree(mumps->recvcount));
1007   PetscCall(PetscFree(mumps->reqs));
1008   PetscCall(PetscFree(mumps->irhs_loc));
1009   PetscCall(PetscFree(A->data));
1010 
1011   /* clear composed functions */
1012   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
1013   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorSetSchurIS_C", NULL));
1014   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorCreateSchurComplement_C", NULL));
1015   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetIcntl_C", NULL));
1016   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetIcntl_C", NULL));
1017   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetCntl_C", NULL));
1018   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetCntl_C", NULL));
1019   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfo_C", NULL));
1020   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfog_C", NULL));
1021   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfo_C", NULL));
1022   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfog_C", NULL));
1023   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverse_C", NULL));
1024   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverseTranspose_C", NULL));
1025   PetscFunctionReturn(0);
1026 }
1027 
1028 /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */
1029 static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A, PetscInt nrhs, const PetscScalar *array)
1030 {
1031   Mat_MUMPS        *mumps   = (Mat_MUMPS *)A->data;
1032   const PetscMPIInt ompsize = mumps->omp_comm_size;
1033   PetscInt          i, m, M, rstart;
1034 
1035   PetscFunctionBegin;
1036   PetscCall(MatGetSize(A, &M, NULL));
1037   PetscCall(MatGetLocalSize(A, &m, NULL));
1038   PetscCheck(M <= PETSC_MUMPS_INT_MAX, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt");
1039   if (ompsize == 1) {
1040     if (!mumps->irhs_loc) {
1041       mumps->nloc_rhs = m;
1042       PetscCall(PetscMalloc1(m, &mumps->irhs_loc));
1043       PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
1044       for (i = 0; i < m; i++) mumps->irhs_loc[i] = rstart + i + 1; /* use 1-based indices */
1045     }
1046     mumps->id.rhs_loc = (MumpsScalar *)array;
1047   } else {
1048 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1049     const PetscInt *ranges;
1050     PetscMPIInt     j, k, sendcount, *petsc_ranks, *omp_ranks;
1051     MPI_Group       petsc_group, omp_group;
1052     PetscScalar    *recvbuf = NULL;
1053 
1054     if (mumps->is_omp_master) {
1055       /* Lazily initialize the omp stuff for distributed rhs */
1056       if (!mumps->irhs_loc) {
1057         PetscCall(PetscMalloc2(ompsize, &omp_ranks, ompsize, &petsc_ranks));
1058         PetscCall(PetscMalloc3(ompsize, &mumps->rhs_nrow, ompsize, &mumps->rhs_recvcounts, ompsize, &mumps->rhs_disps));
1059         PetscCallMPI(MPI_Comm_group(mumps->petsc_comm, &petsc_group));
1060         PetscCallMPI(MPI_Comm_group(mumps->omp_comm, &omp_group));
1061         for (j = 0; j < ompsize; j++) omp_ranks[j] = j;
1062         PetscCallMPI(MPI_Group_translate_ranks(omp_group, ompsize, omp_ranks, petsc_group, petsc_ranks));
1063 
1064         /* Populate mumps->irhs_loc[], rhs_nrow[] */
1065         mumps->nloc_rhs = 0;
1066         PetscCall(MatGetOwnershipRanges(A, &ranges));
1067         for (j = 0; j < ompsize; j++) {
1068           mumps->rhs_nrow[j] = ranges[petsc_ranks[j] + 1] - ranges[petsc_ranks[j]];
1069           mumps->nloc_rhs += mumps->rhs_nrow[j];
1070         }
1071         PetscCall(PetscMalloc1(mumps->nloc_rhs, &mumps->irhs_loc));
1072         for (j = k = 0; j < ompsize; j++) {
1073           for (i = ranges[petsc_ranks[j]]; i < ranges[petsc_ranks[j] + 1]; i++, k++) mumps->irhs_loc[k] = i + 1; /* uses 1-based indices */
1074         }
1075 
1076         PetscCall(PetscFree2(omp_ranks, petsc_ranks));
1077         PetscCallMPI(MPI_Group_free(&petsc_group));
1078         PetscCallMPI(MPI_Group_free(&omp_group));
1079       }
1080 
1081       /* Realloc buffers when current nrhs is bigger than what we have met */
1082       if (nrhs > mumps->max_nrhs) {
1083         PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf));
1084         PetscCall(PetscMalloc2(mumps->nloc_rhs * nrhs, &mumps->rhs_loc, mumps->nloc_rhs * nrhs, &mumps->rhs_recvbuf));
1085         mumps->max_nrhs = nrhs;
1086       }
1087 
1088       /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */
1089       for (j = 0; j < ompsize; j++) PetscCall(PetscMPIIntCast(mumps->rhs_nrow[j] * nrhs, &mumps->rhs_recvcounts[j]));
1090       mumps->rhs_disps[0] = 0;
1091       for (j = 1; j < ompsize; j++) {
1092         mumps->rhs_disps[j] = mumps->rhs_disps[j - 1] + mumps->rhs_recvcounts[j - 1];
1093         PetscCheck(mumps->rhs_disps[j] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscMPIInt overflow!");
1094       }
1095       recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */
1096     }
1097 
1098     PetscCall(PetscMPIIntCast(m * nrhs, &sendcount));
1099     PetscCallMPI(MPI_Gatherv(array, sendcount, MPIU_SCALAR, recvbuf, mumps->rhs_recvcounts, mumps->rhs_disps, MPIU_SCALAR, 0, mumps->omp_comm));
1100 
1101     if (mumps->is_omp_master) {
1102       if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */
1103         PetscScalar *dst, *dstbase = mumps->rhs_loc;
1104         for (j = 0; j < ompsize; j++) {
1105           const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j];
1106           dst                    = dstbase;
1107           for (i = 0; i < nrhs; i++) {
1108             PetscCall(PetscArraycpy(dst, src, mumps->rhs_nrow[j]));
1109             src += mumps->rhs_nrow[j];
1110             dst += mumps->nloc_rhs;
1111           }
1112           dstbase += mumps->rhs_nrow[j];
1113         }
1114       }
1115       mumps->id.rhs_loc = (MumpsScalar *)mumps->rhs_loc;
1116     }
1117 #endif /* PETSC_HAVE_OPENMP_SUPPORT */
1118   }
1119   mumps->id.nrhs     = nrhs;
1120   mumps->id.nloc_rhs = mumps->nloc_rhs;
1121   mumps->id.lrhs_loc = mumps->nloc_rhs;
1122   mumps->id.irhs_loc = mumps->irhs_loc;
1123   PetscFunctionReturn(0);
1124 }
1125 
1126 PetscErrorCode MatSolve_MUMPS(Mat A, Vec b, Vec x)
1127 {
1128   Mat_MUMPS         *mumps  = (Mat_MUMPS *)A->data;
1129   const PetscScalar *rarray = NULL;
1130   PetscScalar       *array;
1131   IS                 is_iden, is_petsc;
1132   PetscInt           i;
1133   PetscBool          second_solve = PETSC_FALSE;
1134   static PetscBool   cite1 = PETSC_FALSE, cite2 = PETSC_FALSE;
1135 
1136   PetscFunctionBegin;
1137   PetscCall(PetscCitationsRegister("@article{MUMPS01,\n  author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n  title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n  journal = {SIAM "
1138                                    "Journal on Matrix Analysis and Applications},\n  volume = {23},\n  number = {1},\n  pages = {15--41},\n  year = {2001}\n}\n",
1139                                    &cite1));
1140   PetscCall(PetscCitationsRegister("@article{MUMPS02,\n  author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n  title = {Hybrid scheduling for the parallel solution of linear systems},\n  journal = {Parallel "
1141                                    "Computing},\n  volume = {32},\n  number = {2},\n  pages = {136--156},\n  year = {2006}\n}\n",
1142                                    &cite2));
1143 
1144   if (A->factorerrortype) {
1145     PetscCall(PetscInfo(A, "MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1146     PetscCall(VecSetInf(x));
1147     PetscFunctionReturn(0);
1148   }
1149 
1150   mumps->id.nrhs = 1;
1151   if (mumps->petsc_size > 1) {
1152     if (mumps->ICNTL20 == 10) {
1153       mumps->id.ICNTL(20) = 10; /* dense distributed RHS */
1154       PetscCall(VecGetArrayRead(b, &rarray));
1155       PetscCall(MatMumpsSetUpDistRHSInfo(A, 1, rarray));
1156     } else {
1157       mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential rhs vector*/
1158       PetscCall(VecScatterBegin(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD));
1159       PetscCall(VecScatterEnd(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD));
1160       if (!mumps->myid) {
1161         PetscCall(VecGetArray(mumps->b_seq, &array));
1162         mumps->id.rhs = (MumpsScalar *)array;
1163       }
1164     }
1165   } else {                   /* petsc_size == 1 */
1166     mumps->id.ICNTL(20) = 0; /* dense centralized RHS */
1167     PetscCall(VecCopy(b, x));
1168     PetscCall(VecGetArray(x, &array));
1169     mumps->id.rhs = (MumpsScalar *)array;
1170   }
1171 
1172   /*
1173      handle condensation step of Schur complement (if any)
1174      We set by default ICNTL(26) == -1 when Schur indices have been provided by the user.
1175      According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase
1176      Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system.
1177      This requires an extra call to PetscMUMPS_c and the computation of the factors for S
1178   */
1179   if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) {
1180     PetscCheck(mumps->petsc_size <= 1, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc");
1181     second_solve = PETSC_TRUE;
1182     PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE));
1183   }
1184   /* solve phase */
1185   /*-------------*/
1186   mumps->id.job = JOB_SOLVE;
1187   PetscMUMPS_c(mumps);
1188   PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1));
1189 
1190   /* handle expansion step of Schur complement (if any) */
1191   if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE));
1192 
1193   if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */
1194     if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) {
1195       /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */
1196       PetscCall(VecScatterDestroy(&mumps->scat_sol));
1197     }
1198     if (!mumps->scat_sol) { /* create scatter scat_sol */
1199       PetscInt *isol2_loc = NULL;
1200       PetscCall(ISCreateStride(PETSC_COMM_SELF, mumps->id.lsol_loc, 0, 1, &is_iden)); /* from */
1201       PetscCall(PetscMalloc1(mumps->id.lsol_loc, &isol2_loc));
1202       for (i = 0; i < mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i] - 1;                        /* change Fortran style to C style */
1203       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, mumps->id.lsol_loc, isol2_loc, PETSC_OWN_POINTER, &is_petsc)); /* to */
1204       PetscCall(VecScatterCreate(mumps->x_seq, is_iden, x, is_petsc, &mumps->scat_sol));
1205       PetscCall(ISDestroy(&is_iden));
1206       PetscCall(ISDestroy(&is_petsc));
1207       mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */
1208     }
1209 
1210     PetscCall(VecScatterBegin(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD));
1211     PetscCall(VecScatterEnd(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD));
1212   }
1213 
1214   if (mumps->petsc_size > 1) {
1215     if (mumps->ICNTL20 == 10) {
1216       PetscCall(VecRestoreArrayRead(b, &rarray));
1217     } else if (!mumps->myid) {
1218       PetscCall(VecRestoreArray(mumps->b_seq, &array));
1219     }
1220   } else PetscCall(VecRestoreArray(x, &array));
1221 
1222   PetscCall(PetscLogFlops(2.0 * mumps->id.RINFO(3)));
1223   PetscFunctionReturn(0);
1224 }
1225 
1226 PetscErrorCode MatSolveTranspose_MUMPS(Mat A, Vec b, Vec x)
1227 {
1228   Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;
1229 
1230   PetscFunctionBegin;
1231   mumps->id.ICNTL(9) = 0;
1232   PetscCall(MatSolve_MUMPS(A, b, x));
1233   mumps->id.ICNTL(9) = 1;
1234   PetscFunctionReturn(0);
1235 }
1236 
1237 PetscErrorCode MatMatSolve_MUMPS(Mat A, Mat B, Mat X)
1238 {
1239   Mat                Bt = NULL;
1240   PetscBool          denseX, denseB, flg, flgT;
1241   Mat_MUMPS         *mumps = (Mat_MUMPS *)A->data;
1242   PetscInt           i, nrhs, M;
1243   PetscScalar       *array;
1244   const PetscScalar *rbray;
1245   PetscInt           lsol_loc, nlsol_loc, *idxx, iidx = 0;
1246   PetscMUMPSInt     *isol_loc, *isol_loc_save;
1247   PetscScalar       *bray, *sol_loc, *sol_loc_save;
1248   IS                 is_to, is_from;
1249   PetscInt           k, proc, j, m, myrstart;
1250   const PetscInt    *rstart;
1251   Vec                v_mpi, msol_loc;
1252   VecScatter         scat_sol;
1253   Vec                b_seq;
1254   VecScatter         scat_rhs;
1255   PetscScalar       *aa;
1256   PetscInt           spnr, *ia, *ja;
1257   Mat_MPIAIJ        *b = NULL;
1258 
1259   PetscFunctionBegin;
1260   PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &denseX, MATSEQDENSE, MATMPIDENSE, NULL));
1261   PetscCheck(denseX, PetscObjectComm((PetscObject)X), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix");
1262 
1263   PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &denseB, MATSEQDENSE, MATMPIDENSE, NULL));
1264   if (denseB) {
1265     PetscCheck(B->rmap->n == X->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Matrix B and X must have same row distribution");
1266     mumps->id.ICNTL(20) = 0; /* dense RHS */
1267   } else {                   /* sparse B */
1268     PetscCheck(X != B, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_IDN, "X and B must be different matrices");
1269     PetscCall(PetscObjectTypeCompare((PetscObject)B, MATTRANSPOSEVIRTUAL, &flgT));
1270     if (flgT) { /* input B is transpose of actural RHS matrix,
1271                  because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */
1272       PetscCall(MatTransposeGetMat(B, &Bt));
1273     } else SETERRQ(PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_WRONG, "Matrix B must be MATTRANSPOSEVIRTUAL matrix");
1274     mumps->id.ICNTL(20) = 1; /* sparse RHS */
1275   }
1276 
1277   PetscCall(MatGetSize(B, &M, &nrhs));
1278   mumps->id.nrhs = nrhs;
1279   mumps->id.lrhs = M;
1280   mumps->id.rhs  = NULL;
1281 
1282   if (mumps->petsc_size == 1) {
1283     PetscScalar *aa;
1284     PetscInt     spnr, *ia, *ja;
1285     PetscBool    second_solve = PETSC_FALSE;
1286 
1287     PetscCall(MatDenseGetArray(X, &array));
1288     mumps->id.rhs = (MumpsScalar *)array;
1289 
1290     if (denseB) {
1291       /* copy B to X */
1292       PetscCall(MatDenseGetArrayRead(B, &rbray));
1293       PetscCall(PetscArraycpy(array, rbray, M * nrhs));
1294       PetscCall(MatDenseRestoreArrayRead(B, &rbray));
1295     } else { /* sparse B */
1296       PetscCall(MatSeqAIJGetArray(Bt, &aa));
1297       PetscCall(MatGetRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
1298       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
1299       PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs));
1300       mumps->id.rhs_sparse = (MumpsScalar *)aa;
1301     }
1302     /* handle condensation step of Schur complement (if any) */
1303     if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) {
1304       second_solve = PETSC_TRUE;
1305       PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE));
1306     }
1307     /* solve phase */
1308     /*-------------*/
1309     mumps->id.job = JOB_SOLVE;
1310     PetscMUMPS_c(mumps);
1311     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1));
1312 
1313     /* handle expansion step of Schur complement (if any) */
1314     if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE));
1315     if (!denseB) { /* sparse B */
1316       PetscCall(MatSeqAIJRestoreArray(Bt, &aa));
1317       PetscCall(MatRestoreRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
1318       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure");
1319     }
1320     PetscCall(MatDenseRestoreArray(X, &array));
1321     PetscFunctionReturn(0);
1322   }
1323 
1324   /*--------- parallel case: MUMPS requires rhs B to be centralized on the host! --------*/
1325   PetscCheck(mumps->petsc_size <= 1 || !mumps->id.ICNTL(19), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc");
1326 
1327   /* create msol_loc to hold mumps local solution */
1328   isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */
1329   sol_loc_save  = (PetscScalar *)mumps->id.sol_loc;
1330 
1331   lsol_loc  = mumps->id.lsol_loc;
1332   nlsol_loc = nrhs * lsol_loc; /* length of sol_loc */
1333   PetscCall(PetscMalloc2(nlsol_loc, &sol_loc, lsol_loc, &isol_loc));
1334   mumps->id.sol_loc  = (MumpsScalar *)sol_loc;
1335   mumps->id.isol_loc = isol_loc;
1336 
1337   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nlsol_loc, (PetscScalar *)sol_loc, &msol_loc));
1338 
1339   if (denseB) {
1340     if (mumps->ICNTL20 == 10) {
1341       mumps->id.ICNTL(20) = 10; /* dense distributed RHS */
1342       PetscCall(MatDenseGetArrayRead(B, &rbray));
1343       PetscCall(MatMumpsSetUpDistRHSInfo(A, nrhs, rbray));
1344       PetscCall(MatDenseRestoreArrayRead(B, &rbray));
1345       PetscCall(MatGetLocalSize(B, &m, NULL));
1346       PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhs * M, NULL, &v_mpi));
1347     } else {
1348       mumps->id.ICNTL(20) = 0; /* dense centralized RHS */
1349       /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in
1350         very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank
1351         0, re-arrange B into desired order, which is a local operation.
1352       */
1353 
1354       /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */
1355       /* wrap dense rhs matrix B into a vector v_mpi */
1356       PetscCall(MatGetLocalSize(B, &m, NULL));
1357       PetscCall(MatDenseGetArray(B, &bray));
1358       PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhs * M, (const PetscScalar *)bray, &v_mpi));
1359       PetscCall(MatDenseRestoreArray(B, &bray));
1360 
1361       /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */
1362       if (!mumps->myid) {
1363         PetscInt *idx;
1364         /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */
1365         PetscCall(PetscMalloc1(nrhs * M, &idx));
1366         PetscCall(MatGetOwnershipRanges(B, &rstart));
1367         k = 0;
1368         for (proc = 0; proc < mumps->petsc_size; proc++) {
1369           for (j = 0; j < nrhs; j++) {
1370             for (i = rstart[proc]; i < rstart[proc + 1]; i++) idx[k++] = j * M + i;
1371           }
1372         }
1373 
1374         PetscCall(VecCreateSeq(PETSC_COMM_SELF, nrhs * M, &b_seq));
1375         PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nrhs * M, idx, PETSC_OWN_POINTER, &is_to));
1376         PetscCall(ISCreateStride(PETSC_COMM_SELF, nrhs * M, 0, 1, &is_from));
1377       } else {
1378         PetscCall(VecCreateSeq(PETSC_COMM_SELF, 0, &b_seq));
1379         PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_to));
1380         PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_from));
1381       }
1382       PetscCall(VecScatterCreate(v_mpi, is_from, b_seq, is_to, &scat_rhs));
1383       PetscCall(VecScatterBegin(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD));
1384       PetscCall(ISDestroy(&is_to));
1385       PetscCall(ISDestroy(&is_from));
1386       PetscCall(VecScatterEnd(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD));
1387 
1388       if (!mumps->myid) { /* define rhs on the host */
1389         PetscCall(VecGetArray(b_seq, &bray));
1390         mumps->id.rhs = (MumpsScalar *)bray;
1391         PetscCall(VecRestoreArray(b_seq, &bray));
1392       }
1393     }
1394   } else { /* sparse B */
1395     b = (Mat_MPIAIJ *)Bt->data;
1396 
1397     /* wrap dense X into a vector v_mpi */
1398     PetscCall(MatGetLocalSize(X, &m, NULL));
1399     PetscCall(MatDenseGetArray(X, &bray));
1400     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)X), 1, nrhs * m, nrhs * M, (const PetscScalar *)bray, &v_mpi));
1401     PetscCall(MatDenseRestoreArray(X, &bray));
1402 
1403     if (!mumps->myid) {
1404       PetscCall(MatSeqAIJGetArray(b->A, &aa));
1405       PetscCall(MatGetRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
1406       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
1407       PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs));
1408       mumps->id.rhs_sparse = (MumpsScalar *)aa;
1409     } else {
1410       mumps->id.irhs_ptr    = NULL;
1411       mumps->id.irhs_sparse = NULL;
1412       mumps->id.nz_rhs      = 0;
1413       mumps->id.rhs_sparse  = NULL;
1414     }
1415   }
1416 
1417   /* solve phase */
1418   /*-------------*/
1419   mumps->id.job = JOB_SOLVE;
1420   PetscMUMPS_c(mumps);
1421   PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1));
1422 
1423   /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */
1424   PetscCall(MatDenseGetArray(X, &array));
1425   PetscCall(VecPlaceArray(v_mpi, array));
1426 
1427   /* create scatter scat_sol */
1428   PetscCall(MatGetOwnershipRanges(X, &rstart));
1429   /* iidx: index for scatter mumps solution to petsc X */
1430 
1431   PetscCall(ISCreateStride(PETSC_COMM_SELF, nlsol_loc, 0, 1, &is_from));
1432   PetscCall(PetscMalloc1(nlsol_loc, &idxx));
1433   for (i = 0; i < lsol_loc; i++) {
1434     isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */
1435 
1436     for (proc = 0; proc < mumps->petsc_size; proc++) {
1437       if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc + 1]) {
1438         myrstart = rstart[proc];
1439         k        = isol_loc[i] - myrstart;          /* local index on 1st column of petsc vector X */
1440         iidx     = k + myrstart * nrhs;             /* maps mumps isol_loc[i] to petsc index in X */
1441         m        = rstart[proc + 1] - rstart[proc]; /* rows of X for this proc */
1442         break;
1443       }
1444     }
1445 
1446     for (j = 0; j < nrhs; j++) idxx[i + j * lsol_loc] = iidx + j * m;
1447   }
1448   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nlsol_loc, idxx, PETSC_COPY_VALUES, &is_to));
1449   PetscCall(VecScatterCreate(msol_loc, is_from, v_mpi, is_to, &scat_sol));
1450   PetscCall(VecScatterBegin(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD));
1451   PetscCall(ISDestroy(&is_from));
1452   PetscCall(ISDestroy(&is_to));
1453   PetscCall(VecScatterEnd(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD));
1454   PetscCall(MatDenseRestoreArray(X, &array));
1455 
1456   /* free spaces */
1457   mumps->id.sol_loc  = (MumpsScalar *)sol_loc_save;
1458   mumps->id.isol_loc = isol_loc_save;
1459 
1460   PetscCall(PetscFree2(sol_loc, isol_loc));
1461   PetscCall(PetscFree(idxx));
1462   PetscCall(VecDestroy(&msol_loc));
1463   PetscCall(VecDestroy(&v_mpi));
1464   if (!denseB) {
1465     if (!mumps->myid) {
1466       b = (Mat_MPIAIJ *)Bt->data;
1467       PetscCall(MatSeqAIJRestoreArray(b->A, &aa));
1468       PetscCall(MatRestoreRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
1469       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure");
1470     }
1471   } else {
1472     if (mumps->ICNTL20 == 0) {
1473       PetscCall(VecDestroy(&b_seq));
1474       PetscCall(VecScatterDestroy(&scat_rhs));
1475     }
1476   }
1477   PetscCall(VecScatterDestroy(&scat_sol));
1478   PetscCall(PetscLogFlops(2.0 * nrhs * mumps->id.RINFO(3)));
1479   PetscFunctionReturn(0);
1480 }
1481 
1482 PetscErrorCode MatMatSolveTranspose_MUMPS(Mat A, Mat B, Mat X)
1483 {
1484   Mat_MUMPS    *mumps    = (Mat_MUMPS *)A->data;
1485   PetscMUMPSInt oldvalue = mumps->id.ICNTL(9);
1486 
1487   PetscFunctionBegin;
1488   mumps->id.ICNTL(9) = 0;
1489   PetscCall(MatMatSolve_MUMPS(A, B, X));
1490   mumps->id.ICNTL(9) = oldvalue;
1491   PetscFunctionReturn(0);
1492 }
1493 
1494 PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A, Mat Bt, Mat X)
1495 {
1496   PetscBool flg;
1497   Mat       B;
1498 
1499   PetscFunctionBegin;
1500   PetscCall(PetscObjectTypeCompareAny((PetscObject)Bt, &flg, MATSEQAIJ, MATMPIAIJ, NULL));
1501   PetscCheck(flg, PetscObjectComm((PetscObject)Bt), PETSC_ERR_ARG_WRONG, "Matrix Bt must be MATAIJ matrix");
1502 
1503   /* Create B=Bt^T that uses Bt's data structure */
1504   PetscCall(MatCreateTranspose(Bt, &B));
1505 
1506   PetscCall(MatMatSolve_MUMPS(A, B, X));
1507   PetscCall(MatDestroy(&B));
1508   PetscFunctionReturn(0);
1509 }
1510 
1511 #if !defined(PETSC_USE_COMPLEX)
1512 /*
1513   input:
1514    F:        numeric factor
1515   output:
1516    nneg:     total number of negative pivots
1517    nzero:    total number of zero pivots
1518    npos:     (global dimension of F) - nneg - nzero
1519 */
1520 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos)
1521 {
1522   Mat_MUMPS  *mumps = (Mat_MUMPS *)F->data;
1523   PetscMPIInt size;
1524 
1525   PetscFunctionBegin;
1526   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)F), &size));
1527   /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */
1528   PetscCheck(size <= 1 || mumps->id.ICNTL(13) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia", mumps->id.INFOG(13));
1529 
1530   if (nneg) *nneg = mumps->id.INFOG(12);
1531   if (nzero || npos) {
1532     PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection");
1533     if (nzero) *nzero = mumps->id.INFOG(28);
1534     if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28));
1535   }
1536   PetscFunctionReturn(0);
1537 }
1538 #endif
1539 
1540 PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse, Mat_MUMPS *mumps)
1541 {
1542   PetscInt       i, nreqs;
1543   PetscMUMPSInt *irn, *jcn;
1544   PetscMPIInt    count;
1545   PetscInt64     totnnz, remain;
1546   const PetscInt osize = mumps->omp_comm_size;
1547   PetscScalar   *val;
1548 
1549   PetscFunctionBegin;
1550   if (osize > 1) {
1551     if (reuse == MAT_INITIAL_MATRIX) {
1552       /* master first gathers counts of nonzeros to receive */
1553       if (mumps->is_omp_master) PetscCall(PetscMalloc1(osize, &mumps->recvcount));
1554       PetscCallMPI(MPI_Gather(&mumps->nnz, 1, MPIU_INT64, mumps->recvcount, 1, MPIU_INT64, 0 /*master*/, mumps->omp_comm));
1555 
1556       /* Then each computes number of send/recvs */
1557       if (mumps->is_omp_master) {
1558         /* Start from 1 since self communication is not done in MPI */
1559         nreqs = 0;
1560         for (i = 1; i < osize; i++) nreqs += (mumps->recvcount[i] + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX;
1561       } else {
1562         nreqs = (mumps->nnz + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX;
1563       }
1564       PetscCall(PetscMalloc1(nreqs * 3, &mumps->reqs)); /* Triple the requests since we send irn, jcn and val seperately */
1565 
1566       /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others.
1567          MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz
1568          might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size
1569          is very small, the current approach should have no extra overhead compared to MPI_Gatherv.
1570        */
1571       nreqs = 0; /* counter for actual send/recvs */
1572       if (mumps->is_omp_master) {
1573         for (i = 0, totnnz = 0; i < osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */
1574         PetscCall(PetscMalloc2(totnnz, &irn, totnnz, &jcn));
1575         PetscCall(PetscMalloc1(totnnz, &val));
1576 
1577         /* Self communication */
1578         PetscCall(PetscArraycpy(irn, mumps->irn, mumps->nnz));
1579         PetscCall(PetscArraycpy(jcn, mumps->jcn, mumps->nnz));
1580         PetscCall(PetscArraycpy(val, mumps->val, mumps->nnz));
1581 
1582         /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */
1583         PetscCall(PetscFree2(mumps->irn, mumps->jcn));
1584         PetscCall(PetscFree(mumps->val_alloc));
1585         mumps->nnz = totnnz;
1586         mumps->irn = irn;
1587         mumps->jcn = jcn;
1588         mumps->val = mumps->val_alloc = val;
1589 
1590         irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */
1591         jcn += mumps->recvcount[0];
1592         val += mumps->recvcount[0];
1593 
1594         /* Remote communication */
1595         for (i = 1; i < osize; i++) {
1596           count  = PetscMin(mumps->recvcount[i], PETSC_MPI_INT_MAX);
1597           remain = mumps->recvcount[i] - count;
1598           while (count > 0) {
1599             PetscCallMPI(MPI_Irecv(irn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1600             PetscCallMPI(MPI_Irecv(jcn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1601             PetscCallMPI(MPI_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1602             irn += count;
1603             jcn += count;
1604             val += count;
1605             count = PetscMin(remain, PETSC_MPI_INT_MAX);
1606             remain -= count;
1607           }
1608         }
1609       } else {
1610         irn    = mumps->irn;
1611         jcn    = mumps->jcn;
1612         val    = mumps->val;
1613         count  = PetscMin(mumps->nnz, PETSC_MPI_INT_MAX);
1614         remain = mumps->nnz - count;
1615         while (count > 0) {
1616           PetscCallMPI(MPI_Isend(irn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1617           PetscCallMPI(MPI_Isend(jcn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1618           PetscCallMPI(MPI_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1619           irn += count;
1620           jcn += count;
1621           val += count;
1622           count = PetscMin(remain, PETSC_MPI_INT_MAX);
1623           remain -= count;
1624         }
1625       }
1626     } else {
1627       nreqs = 0;
1628       if (mumps->is_omp_master) {
1629         val = mumps->val + mumps->recvcount[0];
1630         for (i = 1; i < osize; i++) { /* Remote communication only since self data is already in place */
1631           count  = PetscMin(mumps->recvcount[i], PETSC_MPI_INT_MAX);
1632           remain = mumps->recvcount[i] - count;
1633           while (count > 0) {
1634             PetscCallMPI(MPI_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1635             val += count;
1636             count = PetscMin(remain, PETSC_MPI_INT_MAX);
1637             remain -= count;
1638           }
1639         }
1640       } else {
1641         val    = mumps->val;
1642         count  = PetscMin(mumps->nnz, PETSC_MPI_INT_MAX);
1643         remain = mumps->nnz - count;
1644         while (count > 0) {
1645           PetscCallMPI(MPI_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1646           val += count;
1647           count = PetscMin(remain, PETSC_MPI_INT_MAX);
1648           remain -= count;
1649         }
1650       }
1651     }
1652     PetscCallMPI(MPI_Waitall(nreqs, mumps->reqs, MPI_STATUSES_IGNORE));
1653     mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */
1654   }
1655   PetscFunctionReturn(0);
1656 }
1657 
1658 PetscErrorCode MatFactorNumeric_MUMPS(Mat F, Mat A, const MatFactorInfo *info)
1659 {
1660   Mat_MUMPS *mumps = (Mat_MUMPS *)(F)->data;
1661   PetscBool  isMPIAIJ;
1662 
1663   PetscFunctionBegin;
1664   if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) {
1665     if (mumps->id.INFOG(1) == -6) PetscCall(PetscInfo(A, "MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1666     PetscCall(PetscInfo(A, "MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1667     PetscFunctionReturn(0);
1668   }
1669 
1670   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps));
1671   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX, mumps));
1672 
1673   /* numerical factorization phase */
1674   /*-------------------------------*/
1675   mumps->id.job = JOB_FACTNUMERIC;
1676   if (!mumps->id.ICNTL(18)) { /* A is centralized */
1677     if (!mumps->myid) mumps->id.a = (MumpsScalar *)mumps->val;
1678   } else {
1679     mumps->id.a_loc = (MumpsScalar *)mumps->val;
1680   }
1681   PetscMUMPS_c(mumps);
1682   if (mumps->id.INFOG(1) < 0) {
1683     PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d", mumps->id.INFOG(1), mumps->id.INFO(2));
1684     if (mumps->id.INFOG(1) == -10) { /* numerically singular matrix */
1685       PetscCall(PetscInfo(F, "matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1686       F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
1687     } else if (mumps->id.INFOG(1) == -13) {
1688       PetscCall(PetscInfo(F, "MUMPS in numerical factorization phase: INFOG(1)=%d, cannot allocate required memory %d megabytes\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1689       F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1690     } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) {
1691       PetscCall(PetscInfo(F, "MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d, problem with workarray\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1692       F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1693     } else {
1694       PetscCall(PetscInfo(F, "MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1695       F->factorerrortype = MAT_FACTOR_OTHER;
1696     }
1697   }
1698   PetscCheck(mumps->myid || mumps->id.ICNTL(16) <= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "  mumps->id.ICNTL(16):=%d", mumps->id.INFOG(16));
1699 
1700   F->assembled = PETSC_TRUE;
1701 
1702   if (F->schur) { /* reset Schur status to unfactored */
1703 #if defined(PETSC_HAVE_CUDA)
1704     F->schur->offloadmask = PETSC_OFFLOAD_CPU;
1705 #endif
1706     if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
1707       mumps->id.ICNTL(19) = 2;
1708       PetscCall(MatTranspose(F->schur, MAT_INPLACE_MATRIX, &F->schur));
1709     }
1710     PetscCall(MatFactorRestoreSchurComplement(F, NULL, MAT_FACTOR_SCHUR_UNFACTORED));
1711   }
1712 
1713   /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */
1714   if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3;
1715 
1716   if (!mumps->is_omp_master) mumps->id.INFO(23) = 0;
1717   if (mumps->petsc_size > 1) {
1718     PetscInt     lsol_loc;
1719     PetscScalar *sol_loc;
1720 
1721     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &isMPIAIJ));
1722 
1723     /* distributed solution; Create x_seq=sol_loc for repeated use */
1724     if (mumps->x_seq) {
1725       PetscCall(VecScatterDestroy(&mumps->scat_sol));
1726       PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc));
1727       PetscCall(VecDestroy(&mumps->x_seq));
1728     }
1729     lsol_loc = mumps->id.INFO(23); /* length of sol_loc */
1730     PetscCall(PetscMalloc2(lsol_loc, &sol_loc, lsol_loc, &mumps->id.isol_loc));
1731     mumps->id.lsol_loc = lsol_loc;
1732     mumps->id.sol_loc  = (MumpsScalar *)sol_loc;
1733     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, lsol_loc, sol_loc, &mumps->x_seq));
1734   }
1735   PetscCall(PetscLogFlops(mumps->id.RINFO(2)));
1736   PetscFunctionReturn(0);
1737 }
1738 
1739 /* Sets MUMPS options from the options database */
1740 PetscErrorCode MatSetFromOptions_MUMPS(Mat F, Mat A)
1741 {
1742   Mat_MUMPS    *mumps = (Mat_MUMPS *)F->data;
1743   PetscMUMPSInt icntl = 0, size, *listvar_schur;
1744   PetscInt      info[80], i, ninfo = 80, rbs, cbs;
1745   PetscBool     flg = PETSC_FALSE, schur = (PetscBool)(mumps->id.ICNTL(26) == -1);
1746   MumpsScalar  *arr;
1747 
1748   PetscFunctionBegin;
1749   PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "MUMPS Options", "Mat");
1750   if (mumps->id.job == JOB_NULL) { /* MatSetFromOptions_MUMPS() has never been called before */
1751     PetscInt nthreads   = 0;
1752     PetscInt nCNTL_pre  = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0;
1753     PetscInt nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0;
1754 
1755     mumps->petsc_comm = PetscObjectComm((PetscObject)A);
1756     PetscCallMPI(MPI_Comm_size(mumps->petsc_comm, &mumps->petsc_size));
1757     PetscCallMPI(MPI_Comm_rank(mumps->petsc_comm, &mumps->myid)); /* "if (!myid)" still works even if mumps_comm is different */
1758 
1759     PetscCall(PetscOptionsName("-mat_mumps_use_omp_threads", "Convert MPI processes into OpenMP threads", "None", &mumps->use_petsc_omp_support));
1760     if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */
1761     /* do not use PetscOptionsInt() so that the option -mat_mumps_use_omp_threads is not displayed twice in the help */
1762     PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)F)->prefix, "-mat_mumps_use_omp_threads", &nthreads, NULL));
1763     if (mumps->use_petsc_omp_support) {
1764       PetscCheck(PetscDefined(HAVE_OPENMP_SUPPORT), PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "The system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual",
1765                  ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : "");
1766       PetscCheck(!schur, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use -%smat_mumps_use_omp_threads with the Schur complement feature", ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : "");
1767 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1768       PetscCall(PetscOmpCtrlCreate(mumps->petsc_comm, nthreads, &mumps->omp_ctrl));
1769       PetscCall(PetscOmpCtrlGetOmpComms(mumps->omp_ctrl, &mumps->omp_comm, &mumps->mumps_comm, &mumps->is_omp_master));
1770 #endif
1771     } else {
1772       mumps->omp_comm      = PETSC_COMM_SELF;
1773       mumps->mumps_comm    = mumps->petsc_comm;
1774       mumps->is_omp_master = PETSC_TRUE;
1775     }
1776     PetscCallMPI(MPI_Comm_size(mumps->omp_comm, &mumps->omp_comm_size));
1777     mumps->reqs = NULL;
1778     mumps->tag  = 0;
1779 
1780     if (mumps->mumps_comm != MPI_COMM_NULL) {
1781       if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) {
1782         /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */
1783         MPI_Comm comm;
1784         PetscCallMPI(MPI_Comm_dup(mumps->mumps_comm, &comm));
1785         mumps->mumps_comm = comm;
1786       } else PetscCall(PetscCommGetComm(mumps->petsc_comm, &mumps->mumps_comm));
1787     }
1788 
1789     mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm);
1790     mumps->id.job          = JOB_INIT;
1791     mumps->id.par          = 1; /* host participates factorizaton and solve */
1792     mumps->id.sym          = mumps->sym;
1793 
1794     size          = mumps->id.size_schur;
1795     arr           = mumps->id.schur;
1796     listvar_schur = mumps->id.listvar_schur;
1797     PetscMUMPS_c(mumps);
1798     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS: INFOG(1)=%d", mumps->id.INFOG(1));
1799     /* restore cached ICNTL and CNTL values */
1800     for (icntl = 0; icntl < nICNTL_pre; ++icntl) mumps->id.ICNTL(mumps->ICNTL_pre[1 + 2 * icntl]) = mumps->ICNTL_pre[2 + 2 * icntl];
1801     for (icntl = 0; icntl < nCNTL_pre; ++icntl) mumps->id.CNTL((PetscInt)mumps->CNTL_pre[1 + 2 * icntl]) = mumps->CNTL_pre[2 + 2 * icntl];
1802     PetscCall(PetscFree(mumps->ICNTL_pre));
1803     PetscCall(PetscFree(mumps->CNTL_pre));
1804 
1805     if (schur) {
1806       mumps->id.size_schur    = size;
1807       mumps->id.schur_lld     = size;
1808       mumps->id.schur         = arr;
1809       mumps->id.listvar_schur = listvar_schur;
1810       if (mumps->petsc_size > 1) {
1811         PetscBool gs; /* gs is false if any rank other than root has non-empty IS */
1812 
1813         mumps->id.ICNTL(19) = 1;                                                                            /* MUMPS returns Schur centralized on the host */
1814         gs                  = mumps->myid ? (mumps->id.size_schur ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */
1815         PetscCallMPI(MPI_Allreduce(MPI_IN_PLACE, &gs, 1, MPIU_BOOL, MPI_LAND, mumps->petsc_comm));
1816         PetscCheck(gs, PETSC_COMM_SELF, PETSC_ERR_SUP, "MUMPS distributed parallel Schur complements not yet supported from PETSc");
1817       } else {
1818         if (F->factortype == MAT_FACTOR_LU) {
1819           mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */
1820         } else {
1821           mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */
1822         }
1823       }
1824       mumps->id.ICNTL(26) = -1;
1825     }
1826 
1827     /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code.
1828        For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS.
1829      */
1830     PetscCallMPI(MPI_Bcast(mumps->id.icntl, 40, MPI_INT, 0, mumps->omp_comm));
1831     PetscCallMPI(MPI_Bcast(mumps->id.cntl, 15, MPIU_REAL, 0, mumps->omp_comm));
1832 
1833     mumps->scat_rhs = NULL;
1834     mumps->scat_sol = NULL;
1835 
1836     /* set PETSc-MUMPS default options - override MUMPS default */
1837     mumps->id.ICNTL(3) = 0;
1838     mumps->id.ICNTL(4) = 0;
1839     if (mumps->petsc_size == 1) {
1840       mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */
1841       mumps->id.ICNTL(7)  = 7; /* automatic choice of ordering done by the package */
1842     } else {
1843       mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */
1844       mumps->id.ICNTL(21) = 1; /* distributed solution */
1845     }
1846   }
1847   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_1", "ICNTL(1): output stream for error messages", "None", mumps->id.ICNTL(1), &icntl, &flg));
1848   if (flg) mumps->id.ICNTL(1) = icntl;
1849   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_2", "ICNTL(2): output stream for diagnostic printing, statistics, and warning", "None", mumps->id.ICNTL(2), &icntl, &flg));
1850   if (flg) mumps->id.ICNTL(2) = icntl;
1851   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_3", "ICNTL(3): output stream for global information, collected on the host", "None", mumps->id.ICNTL(3), &icntl, &flg));
1852   if (flg) mumps->id.ICNTL(3) = icntl;
1853 
1854   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_4", "ICNTL(4): level of printing (0 to 4)", "None", mumps->id.ICNTL(4), &icntl, &flg));
1855   if (flg) mumps->id.ICNTL(4) = icntl;
1856   if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */
1857 
1858   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_6", "ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)", "None", mumps->id.ICNTL(6), &icntl, &flg));
1859   if (flg) mumps->id.ICNTL(6) = icntl;
1860 
1861   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_7", "ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)", "None", mumps->id.ICNTL(7), &icntl, &flg));
1862   if (flg) {
1863     PetscCheck(icntl != 1 && icntl >= 0 && icntl <= 7, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto");
1864     mumps->id.ICNTL(7) = icntl;
1865   }
1866 
1867   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_8", "ICNTL(8): scaling strategy (-2 to 8 or 77)", "None", mumps->id.ICNTL(8), &mumps->id.ICNTL(8), NULL));
1868   /* PetscCall(PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL)); handled by MatSolveTranspose_MUMPS() */
1869   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_10", "ICNTL(10): max num of refinements", "None", mumps->id.ICNTL(10), &mumps->id.ICNTL(10), NULL));
1870   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_11", "ICNTL(11): statistics related to an error analysis (via -ksp_view)", "None", mumps->id.ICNTL(11), &mumps->id.ICNTL(11), NULL));
1871   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_12", "ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)", "None", mumps->id.ICNTL(12), &mumps->id.ICNTL(12), NULL));
1872   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_13", "ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting", "None", mumps->id.ICNTL(13), &mumps->id.ICNTL(13), NULL));
1873   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_14", "ICNTL(14): percentage increase in the estimated working space", "None", mumps->id.ICNTL(14), &mumps->id.ICNTL(14), NULL));
1874   PetscCall(MatGetBlockSizes(A, &rbs, &cbs));
1875   if (rbs == cbs && rbs > 1) mumps->id.ICNTL(15) = -rbs;
1876   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_15", "ICNTL(15): compression of the input matrix resulting from a block format", "None", mumps->id.ICNTL(15), &mumps->id.ICNTL(15), &flg));
1877   if (flg) {
1878     PetscCheck(mumps->id.ICNTL(15) <= 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Positive -mat_mumps_icntl_15 not handled");
1879     PetscCheck((-mumps->id.ICNTL(15) % cbs == 0) && (-mumps->id.ICNTL(15) % rbs == 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "The opposite of -mat_mumps_icntl_15 must be a multiple of the column and row blocksizes");
1880   }
1881   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_19", "ICNTL(19): computes the Schur complement", "None", mumps->id.ICNTL(19), &mumps->id.ICNTL(19), NULL));
1882   if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */
1883     PetscCall(MatDestroy(&F->schur));
1884     PetscCall(MatMumpsResetSchur_Private(mumps));
1885   }
1886 
1887   /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps",
1888      and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran
1889      and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...'
1890      This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug,
1891      see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590.
1892      In short, we could not use distributed RHS with MPICH until v4.0b1.
1893    */
1894 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) || (defined(PETSC_HAVE_MPICH_NUMVERSION) && (PETSC_HAVE_MPICH_NUMVERSION < 40000101))
1895   mumps->ICNTL20 = 0; /* Centralized dense RHS*/
1896 #else
1897   mumps->ICNTL20     = 10; /* Distributed dense RHS*/
1898 #endif
1899   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_20", "ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides", "None", mumps->ICNTL20, &mumps->ICNTL20, &flg));
1900   PetscCheck(!flg || mumps->ICNTL20 == 10 || mumps->ICNTL20 == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10", (int)mumps->ICNTL20);
1901 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0)
1902   PetscCheck(!flg || mumps->ICNTL20 != 10, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=10 is not supported before MUMPS-5.3.0");
1903 #endif
1904   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL)); we only use distributed solution vector */
1905 
1906   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_22", "ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)", "None", mumps->id.ICNTL(22), &mumps->id.ICNTL(22), NULL));
1907   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_23", "ICNTL(23): max size of the working memory (MB) that can allocate per processor", "None", mumps->id.ICNTL(23), &mumps->id.ICNTL(23), NULL));
1908   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_24", "ICNTL(24): detection of null pivot rows (0 or 1)", "None", mumps->id.ICNTL(24), &mumps->id.ICNTL(24), NULL));
1909   if (mumps->id.ICNTL(24)) { mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ }
1910 
1911   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_25", "ICNTL(25): computes a solution of a deficient matrix and a null space basis", "None", mumps->id.ICNTL(25), &mumps->id.ICNTL(25), NULL));
1912   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_26", "ICNTL(26): drives the solution phase if a Schur complement matrix", "None", mumps->id.ICNTL(26), &mumps->id.ICNTL(26), NULL));
1913   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_27", "ICNTL(27): controls the blocking size for multiple right-hand sides", "None", mumps->id.ICNTL(27), &mumps->id.ICNTL(27), NULL));
1914   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_28", "ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering", "None", mumps->id.ICNTL(28), &mumps->id.ICNTL(28), NULL));
1915   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_29", "ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis", "None", mumps->id.ICNTL(29), &mumps->id.ICNTL(29), NULL));
1916   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL)); */ /* call MatMumpsGetInverse() directly */
1917   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_31", "ICNTL(31): indicates which factors may be discarded during factorization", "None", mumps->id.ICNTL(31), &mumps->id.ICNTL(31), NULL));
1918   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elemination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL));  -- not supported by PETSc API */
1919   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_33", "ICNTL(33): compute determinant", "None", mumps->id.ICNTL(33), &mumps->id.ICNTL(33), NULL));
1920   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_35", "ICNTL(35): activates Block Low Rank (BLR) based factorization", "None", mumps->id.ICNTL(35), &mumps->id.ICNTL(35), NULL));
1921   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_36", "ICNTL(36): choice of BLR factorization variant", "None", mumps->id.ICNTL(36), &mumps->id.ICNTL(36), NULL));
1922   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_38", "ICNTL(38): estimated compression rate of LU factors with BLR", "None", mumps->id.ICNTL(38), &mumps->id.ICNTL(38), NULL));
1923 
1924   PetscCall(PetscOptionsReal("-mat_mumps_cntl_1", "CNTL(1): relative pivoting threshold", "None", mumps->id.CNTL(1), &mumps->id.CNTL(1), NULL));
1925   PetscCall(PetscOptionsReal("-mat_mumps_cntl_2", "CNTL(2): stopping criterion of refinement", "None", mumps->id.CNTL(2), &mumps->id.CNTL(2), NULL));
1926   PetscCall(PetscOptionsReal("-mat_mumps_cntl_3", "CNTL(3): absolute pivoting threshold", "None", mumps->id.CNTL(3), &mumps->id.CNTL(3), NULL));
1927   PetscCall(PetscOptionsReal("-mat_mumps_cntl_4", "CNTL(4): value for static pivoting", "None", mumps->id.CNTL(4), &mumps->id.CNTL(4), NULL));
1928   PetscCall(PetscOptionsReal("-mat_mumps_cntl_5", "CNTL(5): fixation for null pivots", "None", mumps->id.CNTL(5), &mumps->id.CNTL(5), NULL));
1929   PetscCall(PetscOptionsReal("-mat_mumps_cntl_7", "CNTL(7): dropping parameter used during BLR", "None", mumps->id.CNTL(7), &mumps->id.CNTL(7), NULL));
1930 
1931   PetscCall(PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL));
1932 
1933   PetscCall(PetscOptionsIntArray("-mat_mumps_view_info", "request INFO local to each processor", "", info, &ninfo, NULL));
1934   if (ninfo) {
1935     PetscCheck(ninfo <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "number of INFO %" PetscInt_FMT " must <= 80", ninfo);
1936     PetscCall(PetscMalloc1(ninfo, &mumps->info));
1937     mumps->ninfo = ninfo;
1938     for (i = 0; i < ninfo; i++) {
1939       PetscCheck(info[i] >= 0 && info[i] <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "index of INFO %" PetscInt_FMT " must between 1 and 80", ninfo);
1940       mumps->info[i] = info[i];
1941     }
1942   }
1943   PetscOptionsEnd();
1944   PetscFunctionReturn(0);
1945 }
1946 
1947 PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F, Mat A, const MatFactorInfo *info, Mat_MUMPS *mumps)
1948 {
1949   PetscFunctionBegin;
1950   if (mumps->id.INFOG(1) < 0) {
1951     PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in analysis phase: INFOG(1)=%d", mumps->id.INFOG(1));
1952     if (mumps->id.INFOG(1) == -6) {
1953       PetscCall(PetscInfo(F, "matrix is singular in structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1954       F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT;
1955     } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) {
1956       PetscCall(PetscInfo(F, "problem of workspace, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1957       F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1958     } else if (mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0) {
1959       PetscCall(PetscInfo(F, "Empty matrix\n"));
1960     } else {
1961       PetscCall(PetscInfo(F, "Error reported by MUMPS in analysis phase: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1962       F->factorerrortype = MAT_FACTOR_OTHER;
1963     }
1964   }
1965   PetscFunctionReturn(0);
1966 }
1967 
1968 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info)
1969 {
1970   Mat_MUMPS     *mumps = (Mat_MUMPS *)F->data;
1971   Vec            b;
1972   const PetscInt M = A->rmap->N;
1973 
1974   PetscFunctionBegin;
1975   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
1976     /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */
1977     PetscFunctionReturn(0);
1978   }
1979 
1980   /* Set MUMPS options from the options database */
1981   PetscCall(MatSetFromOptions_MUMPS(F, A));
1982 
1983   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
1984   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps));
1985 
1986   /* analysis phase */
1987   /*----------------*/
1988   mumps->id.job = JOB_FACTSYMBOLIC;
1989   mumps->id.n   = M;
1990   switch (mumps->id.ICNTL(18)) {
1991   case 0: /* centralized assembled matrix input */
1992     if (!mumps->myid) {
1993       mumps->id.nnz = mumps->nnz;
1994       mumps->id.irn = mumps->irn;
1995       mumps->id.jcn = mumps->jcn;
1996       if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val;
1997       if (r) {
1998         mumps->id.ICNTL(7) = 1;
1999         if (!mumps->myid) {
2000           const PetscInt *idx;
2001           PetscInt        i;
2002 
2003           PetscCall(PetscMalloc1(M, &mumps->id.perm_in));
2004           PetscCall(ISGetIndices(r, &idx));
2005           for (i = 0; i < M; i++) PetscCall(PetscMUMPSIntCast(idx[i] + 1, &(mumps->id.perm_in[i]))); /* perm_in[]: start from 1, not 0! */
2006           PetscCall(ISRestoreIndices(r, &idx));
2007         }
2008       }
2009     }
2010     break;
2011   case 3: /* distributed assembled matrix input (size>1) */
2012     mumps->id.nnz_loc = mumps->nnz;
2013     mumps->id.irn_loc = mumps->irn;
2014     mumps->id.jcn_loc = mumps->jcn;
2015     if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val;
2016     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2017       PetscCall(MatCreateVecs(A, NULL, &b));
2018       PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq));
2019       PetscCall(VecDestroy(&b));
2020     }
2021     break;
2022   }
2023   PetscMUMPS_c(mumps);
2024   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps));
2025 
2026   F->ops->lufactornumeric   = MatFactorNumeric_MUMPS;
2027   F->ops->solve             = MatSolve_MUMPS;
2028   F->ops->solvetranspose    = MatSolveTranspose_MUMPS;
2029   F->ops->matsolve          = MatMatSolve_MUMPS;
2030   F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS;
2031   F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS;
2032 
2033   mumps->matstruc = SAME_NONZERO_PATTERN;
2034   PetscFunctionReturn(0);
2035 }
2036 
2037 /* Note the Petsc r and c permutations are ignored */
2038 PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info)
2039 {
2040   Mat_MUMPS     *mumps = (Mat_MUMPS *)F->data;
2041   Vec            b;
2042   const PetscInt M = A->rmap->N;
2043 
2044   PetscFunctionBegin;
2045   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
2046     /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */
2047     PetscFunctionReturn(0);
2048   }
2049 
2050   /* Set MUMPS options from the options database */
2051   PetscCall(MatSetFromOptions_MUMPS(F, A));
2052 
2053   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
2054   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps));
2055 
2056   /* analysis phase */
2057   /*----------------*/
2058   mumps->id.job = JOB_FACTSYMBOLIC;
2059   mumps->id.n   = M;
2060   switch (mumps->id.ICNTL(18)) {
2061   case 0: /* centralized assembled matrix input */
2062     if (!mumps->myid) {
2063       mumps->id.nnz = mumps->nnz;
2064       mumps->id.irn = mumps->irn;
2065       mumps->id.jcn = mumps->jcn;
2066       if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val;
2067     }
2068     break;
2069   case 3: /* distributed assembled matrix input (size>1) */
2070     mumps->id.nnz_loc = mumps->nnz;
2071     mumps->id.irn_loc = mumps->irn;
2072     mumps->id.jcn_loc = mumps->jcn;
2073     if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val;
2074     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2075       PetscCall(MatCreateVecs(A, NULL, &b));
2076       PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq));
2077       PetscCall(VecDestroy(&b));
2078     }
2079     break;
2080   }
2081   PetscMUMPS_c(mumps);
2082   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps));
2083 
2084   F->ops->lufactornumeric   = MatFactorNumeric_MUMPS;
2085   F->ops->solve             = MatSolve_MUMPS;
2086   F->ops->solvetranspose    = MatSolveTranspose_MUMPS;
2087   F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS;
2088 
2089   mumps->matstruc = SAME_NONZERO_PATTERN;
2090   PetscFunctionReturn(0);
2091 }
2092 
2093 /* Note the Petsc r permutation and factor info are ignored */
2094 PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F, Mat A, IS r, const MatFactorInfo *info)
2095 {
2096   Mat_MUMPS     *mumps = (Mat_MUMPS *)F->data;
2097   Vec            b;
2098   const PetscInt M = A->rmap->N;
2099 
2100   PetscFunctionBegin;
2101   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
2102     /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */
2103     PetscFunctionReturn(0);
2104   }
2105 
2106   /* Set MUMPS options from the options database */
2107   PetscCall(MatSetFromOptions_MUMPS(F, A));
2108 
2109   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
2110   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps));
2111 
2112   /* analysis phase */
2113   /*----------------*/
2114   mumps->id.job = JOB_FACTSYMBOLIC;
2115   mumps->id.n   = M;
2116   switch (mumps->id.ICNTL(18)) {
2117   case 0: /* centralized assembled matrix input */
2118     if (!mumps->myid) {
2119       mumps->id.nnz = mumps->nnz;
2120       mumps->id.irn = mumps->irn;
2121       mumps->id.jcn = mumps->jcn;
2122       if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val;
2123     }
2124     break;
2125   case 3: /* distributed assembled matrix input (size>1) */
2126     mumps->id.nnz_loc = mumps->nnz;
2127     mumps->id.irn_loc = mumps->irn;
2128     mumps->id.jcn_loc = mumps->jcn;
2129     if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val;
2130     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2131       PetscCall(MatCreateVecs(A, NULL, &b));
2132       PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq));
2133       PetscCall(VecDestroy(&b));
2134     }
2135     break;
2136   }
2137   PetscMUMPS_c(mumps);
2138   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps));
2139 
2140   F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS;
2141   F->ops->solve                 = MatSolve_MUMPS;
2142   F->ops->solvetranspose        = MatSolve_MUMPS;
2143   F->ops->matsolve              = MatMatSolve_MUMPS;
2144   F->ops->mattransposesolve     = MatMatTransposeSolve_MUMPS;
2145   F->ops->matsolvetranspose     = MatMatSolveTranspose_MUMPS;
2146 #if defined(PETSC_USE_COMPLEX)
2147   F->ops->getinertia = NULL;
2148 #else
2149   F->ops->getinertia = MatGetInertia_SBAIJMUMPS;
2150 #endif
2151 
2152   mumps->matstruc = SAME_NONZERO_PATTERN;
2153   PetscFunctionReturn(0);
2154 }
2155 
2156 PetscErrorCode MatView_MUMPS(Mat A, PetscViewer viewer)
2157 {
2158   PetscBool         iascii;
2159   PetscViewerFormat format;
2160   Mat_MUMPS        *mumps = (Mat_MUMPS *)A->data;
2161 
2162   PetscFunctionBegin;
2163   /* check if matrix is mumps type */
2164   if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(0);
2165 
2166   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
2167   if (iascii) {
2168     PetscCall(PetscViewerGetFormat(viewer, &format));
2169     if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
2170       PetscCall(PetscViewerASCIIPrintf(viewer, "MUMPS run parameters:\n"));
2171       if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
2172         PetscCall(PetscViewerASCIIPrintf(viewer, "  SYM (matrix type):                   %d\n", mumps->id.sym));
2173         PetscCall(PetscViewerASCIIPrintf(viewer, "  PAR (host participation):            %d\n", mumps->id.par));
2174         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(1) (output for error):         %d\n", mumps->id.ICNTL(1)));
2175         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(2) (output of diagnostic msg): %d\n", mumps->id.ICNTL(2)));
2176         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(3) (output for global info):   %d\n", mumps->id.ICNTL(3)));
2177         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(4) (level of printing):        %d\n", mumps->id.ICNTL(4)));
2178         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(5) (input mat struct):         %d\n", mumps->id.ICNTL(5)));
2179         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(6) (matrix prescaling):        %d\n", mumps->id.ICNTL(6)));
2180         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(7) (sequential matrix ordering):%d\n", mumps->id.ICNTL(7)));
2181         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(8) (scaling strategy):         %d\n", mumps->id.ICNTL(8)));
2182         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(10) (max num of refinements):  %d\n", mumps->id.ICNTL(10)));
2183         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(11) (error analysis):          %d\n", mumps->id.ICNTL(11)));
2184         if (mumps->id.ICNTL(11) > 0) {
2185           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(4) (inf norm of input mat):        %g\n", mumps->id.RINFOG(4)));
2186           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(5) (inf norm of solution):         %g\n", mumps->id.RINFOG(5)));
2187           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(6) (inf norm of residual):         %g\n", mumps->id.RINFOG(6)));
2188           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(7),RINFOG(8) (backward error est): %g, %g\n", mumps->id.RINFOG(7), mumps->id.RINFOG(8)));
2189           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(9) (error estimate):               %g\n", mumps->id.RINFOG(9)));
2190           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n", mumps->id.RINFOG(10), mumps->id.RINFOG(11)));
2191         }
2192         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(12) (efficiency control):                         %d\n", mumps->id.ICNTL(12)));
2193         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(13) (sequential factorization of the root node):  %d\n", mumps->id.ICNTL(13)));
2194         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(14) (percentage of estimated workspace increase): %d\n", mumps->id.ICNTL(14)));
2195         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(15) (compression of the input matrix):            %d\n", mumps->id.ICNTL(15)));
2196         /* ICNTL(15-17) not used */
2197         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(18) (input mat struct):                           %d\n", mumps->id.ICNTL(18)));
2198         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(19) (Schur complement info):                      %d\n", mumps->id.ICNTL(19)));
2199         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(20) (RHS sparse pattern):                         %d\n", mumps->id.ICNTL(20)));
2200         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(21) (solution struct):                            %d\n", mumps->id.ICNTL(21)));
2201         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(22) (in-core/out-of-core facility):               %d\n", mumps->id.ICNTL(22)));
2202         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(23) (max size of memory can be allocated locally):%d\n", mumps->id.ICNTL(23)));
2203 
2204         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(24) (detection of null pivot rows):               %d\n", mumps->id.ICNTL(24)));
2205         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(25) (computation of a null space basis):          %d\n", mumps->id.ICNTL(25)));
2206         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(26) (Schur options for RHS or solution):          %d\n", mumps->id.ICNTL(26)));
2207         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(27) (blocking size for multiple RHS):             %d\n", mumps->id.ICNTL(27)));
2208         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(28) (use parallel or sequential ordering):        %d\n", mumps->id.ICNTL(28)));
2209         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(29) (parallel ordering):                          %d\n", mumps->id.ICNTL(29)));
2210 
2211         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(30) (user-specified set of entries in inv(A)):    %d\n", mumps->id.ICNTL(30)));
2212         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(31) (factors is discarded in the solve phase):    %d\n", mumps->id.ICNTL(31)));
2213         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(33) (compute determinant):                        %d\n", mumps->id.ICNTL(33)));
2214         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(35) (activate BLR based factorization):           %d\n", mumps->id.ICNTL(35)));
2215         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(36) (choice of BLR factorization variant):        %d\n", mumps->id.ICNTL(36)));
2216         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(38) (estimated compression rate of LU factors):   %d\n", mumps->id.ICNTL(38)));
2217 
2218         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(1) (relative pivoting threshold):      %g\n", mumps->id.CNTL(1)));
2219         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(2) (stopping criterion of refinement): %g\n", mumps->id.CNTL(2)));
2220         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(3) (absolute pivoting threshold):      %g\n", mumps->id.CNTL(3)));
2221         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(4) (value of static pivoting):         %g\n", mumps->id.CNTL(4)));
2222         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(5) (fixation for null pivots):         %g\n", mumps->id.CNTL(5)));
2223         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(7) (dropping parameter for BLR):       %g\n", mumps->id.CNTL(7)));
2224 
2225         /* information local to each processor */
2226         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(1) (local estimated flops for the elimination after analysis):\n"));
2227         PetscCall(PetscViewerASCIIPushSynchronized(viewer));
2228         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %g\n", mumps->myid, mumps->id.RINFO(1)));
2229         PetscCall(PetscViewerFlush(viewer));
2230         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(2) (local estimated flops for the assembly after factorization):\n"));
2231         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %g\n", mumps->myid, mumps->id.RINFO(2)));
2232         PetscCall(PetscViewerFlush(viewer));
2233         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(3) (local estimated flops for the elimination after factorization):\n"));
2234         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %g\n", mumps->myid, mumps->id.RINFO(3)));
2235         PetscCall(PetscViewerFlush(viewer));
2236 
2237         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization):\n"));
2238         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(15)));
2239         PetscCall(PetscViewerFlush(viewer));
2240 
2241         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization):\n"));
2242         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(16)));
2243         PetscCall(PetscViewerFlush(viewer));
2244 
2245         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(23) (num of pivots eliminated on this processor after factorization):\n"));
2246         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(23)));
2247         PetscCall(PetscViewerFlush(viewer));
2248 
2249         if (mumps->ninfo && mumps->ninfo <= 80) {
2250           PetscInt i;
2251           for (i = 0; i < mumps->ninfo; i++) {
2252             PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(%" PetscInt_FMT "):\n", mumps->info[i]));
2253             PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(mumps->info[i])));
2254             PetscCall(PetscViewerFlush(viewer));
2255           }
2256         }
2257         PetscCall(PetscViewerASCIIPopSynchronized(viewer));
2258       } else PetscCall(PetscViewerASCIIPrintf(viewer, "  Use -%sksp_view ::ascii_info_detail to display information for all processes\n", ((PetscObject)A)->prefix ? ((PetscObject)A)->prefix : ""));
2259 
2260       if (mumps->myid == 0) { /* information from the host */
2261         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFOG(1) (global estimated flops for the elimination after analysis): %g\n", mumps->id.RINFOG(1)));
2262         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFOG(2) (global estimated flops for the assembly after factorization): %g\n", mumps->id.RINFOG(2)));
2263         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFOG(3) (global estimated flops for the elimination after factorization): %g\n", mumps->id.RINFOG(3)));
2264         PetscCall(PetscViewerASCIIPrintf(viewer, "  (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n", mumps->id.RINFOG(12), mumps->id.RINFOG(13), mumps->id.INFOG(34)));
2265 
2266         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(3)));
2267         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(4)));
2268         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(5) (estimated maximum front size in the complete tree): %d\n", mumps->id.INFOG(5)));
2269         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(6) (number of nodes in the complete tree): %d\n", mumps->id.INFOG(6)));
2270         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(7) (ordering option effectively used after analysis): %d\n", mumps->id.INFOG(7)));
2271         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n", mumps->id.INFOG(8)));
2272         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n", mumps->id.INFOG(9)));
2273         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(10) (total integer space store the matrix factors after factorization): %d\n", mumps->id.INFOG(10)));
2274         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(11) (order of largest frontal matrix after factorization): %d\n", mumps->id.INFOG(11)));
2275         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(12) (number of off-diagonal pivots): %d\n", mumps->id.INFOG(12)));
2276         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(13) (number of delayed pivots after factorization): %d\n", mumps->id.INFOG(13)));
2277         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(14) (number of memory compress after factorization): %d\n", mumps->id.INFOG(14)));
2278         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(15) (number of steps of iterative refinement after solution): %d\n", mumps->id.INFOG(15)));
2279         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n", mumps->id.INFOG(16)));
2280         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n", mumps->id.INFOG(17)));
2281         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n", mumps->id.INFOG(18)));
2282         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n", mumps->id.INFOG(19)));
2283         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(20) (estimated number of entries in the factors): %d\n", mumps->id.INFOG(20)));
2284         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n", mumps->id.INFOG(21)));
2285         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n", mumps->id.INFOG(22)));
2286         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n", mumps->id.INFOG(23)));
2287         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n", mumps->id.INFOG(24)));
2288         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n", mumps->id.INFOG(25)));
2289         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(28) (after factorization: number of null pivots encountered): %d\n", mumps->id.INFOG(28)));
2290         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n", mumps->id.INFOG(29)));
2291         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n", mumps->id.INFOG(30), mumps->id.INFOG(31)));
2292         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(32) (after analysis: type of analysis done): %d\n", mumps->id.INFOG(32)));
2293         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(33) (value used for ICNTL(8)): %d\n", mumps->id.INFOG(33)));
2294         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(34) (exponent of the determinant if determinant is requested): %d\n", mumps->id.INFOG(34)));
2295         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n", mumps->id.INFOG(35)));
2296         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(36)));
2297         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n", mumps->id.INFOG(37)));
2298         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(38)));
2299         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n", mumps->id.INFOG(39)));
2300       }
2301     }
2302   }
2303   PetscFunctionReturn(0);
2304 }
2305 
2306 PetscErrorCode MatGetInfo_MUMPS(Mat A, MatInfoType flag, MatInfo *info)
2307 {
2308   Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;
2309 
2310   PetscFunctionBegin;
2311   info->block_size        = 1.0;
2312   info->nz_allocated      = mumps->id.INFOG(20);
2313   info->nz_used           = mumps->id.INFOG(20);
2314   info->nz_unneeded       = 0.0;
2315   info->assemblies        = 0.0;
2316   info->mallocs           = 0.0;
2317   info->memory            = 0.0;
2318   info->fill_ratio_given  = 0;
2319   info->fill_ratio_needed = 0;
2320   info->factor_mallocs    = 0;
2321   PetscFunctionReturn(0);
2322 }
2323 
2324 /* -------------------------------------------------------------------------------------------*/
2325 PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is)
2326 {
2327   Mat_MUMPS         *mumps = (Mat_MUMPS *)F->data;
2328   const PetscScalar *arr;
2329   const PetscInt    *idxs;
2330   PetscInt           size, i;
2331 
2332   PetscFunctionBegin;
2333   PetscCall(ISGetLocalSize(is, &size));
2334   /* Schur complement matrix */
2335   PetscCall(MatDestroy(&F->schur));
2336   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size, size, NULL, &F->schur));
2337   PetscCall(MatDenseGetArrayRead(F->schur, &arr));
2338   mumps->id.schur      = (MumpsScalar *)arr;
2339   mumps->id.size_schur = size;
2340   mumps->id.schur_lld  = size;
2341   PetscCall(MatDenseRestoreArrayRead(F->schur, &arr));
2342   if (mumps->sym == 1) PetscCall(MatSetOption(F->schur, MAT_SPD, PETSC_TRUE));
2343 
2344   /* MUMPS expects Fortran style indices */
2345   PetscCall(PetscFree(mumps->id.listvar_schur));
2346   PetscCall(PetscMalloc1(size, &mumps->id.listvar_schur));
2347   PetscCall(ISGetIndices(is, &idxs));
2348   for (i = 0; i < size; i++) PetscCall(PetscMUMPSIntCast(idxs[i] + 1, &(mumps->id.listvar_schur[i])));
2349   PetscCall(ISRestoreIndices(is, &idxs));
2350   /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */
2351   mumps->id.ICNTL(26) = -1;
2352   PetscFunctionReturn(0);
2353 }
2354 
2355 /* -------------------------------------------------------------------------------------------*/
2356 PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F, Mat *S)
2357 {
2358   Mat          St;
2359   Mat_MUMPS   *mumps = (Mat_MUMPS *)F->data;
2360   PetscScalar *array;
2361 #if defined(PETSC_USE_COMPLEX)
2362   PetscScalar im = PetscSqrtScalar((PetscScalar)-1.0);
2363 #endif
2364 
2365   PetscFunctionBegin;
2366   PetscCheck(mumps->id.ICNTL(19), PetscObjectComm((PetscObject)F), PETSC_ERR_ORDER, "Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it");
2367   PetscCall(MatCreate(PETSC_COMM_SELF, &St));
2368   PetscCall(MatSetSizes(St, PETSC_DECIDE, PETSC_DECIDE, mumps->id.size_schur, mumps->id.size_schur));
2369   PetscCall(MatSetType(St, MATDENSE));
2370   PetscCall(MatSetUp(St));
2371   PetscCall(MatDenseGetArray(St, &array));
2372   if (!mumps->sym) {                /* MUMPS always return a full matrix */
2373     if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
2374       PetscInt i, j, N = mumps->id.size_schur;
2375       for (i = 0; i < N; i++) {
2376         for (j = 0; j < N; j++) {
2377 #if !defined(PETSC_USE_COMPLEX)
2378           PetscScalar val = mumps->id.schur[i * N + j];
2379 #else
2380           PetscScalar val = mumps->id.schur[i * N + j].r + im * mumps->id.schur[i * N + j].i;
2381 #endif
2382           array[j * N + i] = val;
2383         }
2384       }
2385     } else { /* stored by columns */
2386       PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur));
2387     }
2388   } else {                          /* either full or lower-triangular (not packed) */
2389     if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */
2390       PetscInt i, j, N = mumps->id.size_schur;
2391       for (i = 0; i < N; i++) {
2392         for (j = i; j < N; j++) {
2393 #if !defined(PETSC_USE_COMPLEX)
2394           PetscScalar val = mumps->id.schur[i * N + j];
2395 #else
2396           PetscScalar val = mumps->id.schur[i * N + j].r + im * mumps->id.schur[i * N + j].i;
2397 #endif
2398           array[i * N + j] = val;
2399           array[j * N + i] = val;
2400         }
2401       }
2402     } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */
2403       PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur));
2404     } else { /* ICNTL(19) == 1 lower triangular stored by rows */
2405       PetscInt i, j, N = mumps->id.size_schur;
2406       for (i = 0; i < N; i++) {
2407         for (j = 0; j < i + 1; j++) {
2408 #if !defined(PETSC_USE_COMPLEX)
2409           PetscScalar val = mumps->id.schur[i * N + j];
2410 #else
2411           PetscScalar val = mumps->id.schur[i * N + j].r + im * mumps->id.schur[i * N + j].i;
2412 #endif
2413           array[i * N + j] = val;
2414           array[j * N + i] = val;
2415         }
2416       }
2417     }
2418   }
2419   PetscCall(MatDenseRestoreArray(St, &array));
2420   *S = St;
2421   PetscFunctionReturn(0);
2422 }
2423 
2424 /* -------------------------------------------------------------------------------------------*/
2425 PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt ival)
2426 {
2427   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2428 
2429   PetscFunctionBegin;
2430   if (mumps->id.job == JOB_NULL) {                                       /* need to cache icntl and ival since PetscMUMPS_c() has never been called */
2431     PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; /* number of already cached ICNTL */
2432     for (i = 0; i < nICNTL_pre; ++i)
2433       if (mumps->ICNTL_pre[1 + 2 * i] == icntl) break; /* is this ICNTL already cached? */
2434     if (i == nICNTL_pre) {                             /* not already cached */
2435       if (i > 0) PetscCall(PetscRealloc(sizeof(PetscMUMPSInt) * (2 * nICNTL_pre + 3), &mumps->ICNTL_pre));
2436       else PetscCall(PetscCalloc(sizeof(PetscMUMPSInt) * 3, &mumps->ICNTL_pre));
2437       mumps->ICNTL_pre[0]++;
2438     }
2439     mumps->ICNTL_pre[1 + 2 * i] = icntl;
2440     PetscCall(PetscMUMPSIntCast(ival, mumps->ICNTL_pre + 2 + 2 * i));
2441   } else PetscCall(PetscMUMPSIntCast(ival, &mumps->id.ICNTL(icntl)));
2442   PetscFunctionReturn(0);
2443 }
2444 
2445 PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt *ival)
2446 {
2447   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2448 
2449   PetscFunctionBegin;
2450   *ival = mumps->id.ICNTL(icntl);
2451   PetscFunctionReturn(0);
2452 }
2453 
2454 /*@
2455   MatMumpsSetIcntl - Set MUMPS parameter ICNTL()
2456 
2457    Logically Collective
2458 
2459    Input Parameters:
2460 +  F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2461 .  icntl - index of MUMPS parameter array ICNTL()
2462 -  ival - value of MUMPS ICNTL(icntl)
2463 
2464   Options Database Key:
2465 .   -mat_mumps_icntl_<icntl> <ival> - change the option numbered icntl to ival
2466 
2467    Level: beginner
2468 
2469    References:
2470 .  * - MUMPS Users' Guide
2471 
2472 .seealso: `MatGetFactor()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2473 @*/
2474 PetscErrorCode MatMumpsSetIcntl(Mat F, PetscInt icntl, PetscInt ival)
2475 {
2476   PetscFunctionBegin;
2477   PetscValidType(F, 1);
2478   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2479   PetscValidLogicalCollectiveInt(F, icntl, 2);
2480   PetscValidLogicalCollectiveInt(F, ival, 3);
2481   PetscCheck(icntl >= 1 && icntl <= 38, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl);
2482   PetscTryMethod(F, "MatMumpsSetIcntl_C", (Mat, PetscInt, PetscInt), (F, icntl, ival));
2483   PetscFunctionReturn(0);
2484 }
2485 
2486 /*@
2487   MatMumpsGetIcntl - Get MUMPS parameter ICNTL()
2488 
2489    Logically Collective
2490 
2491    Input Parameters:
2492 +  F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2493 -  icntl - index of MUMPS parameter array ICNTL()
2494 
2495   Output Parameter:
2496 .  ival - value of MUMPS ICNTL(icntl)
2497 
2498    Level: beginner
2499 
2500    References:
2501 .  * - MUMPS Users' Guide
2502 
2503 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2504 @*/
2505 PetscErrorCode MatMumpsGetIcntl(Mat F, PetscInt icntl, PetscInt *ival)
2506 {
2507   PetscFunctionBegin;
2508   PetscValidType(F, 1);
2509   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2510   PetscValidLogicalCollectiveInt(F, icntl, 2);
2511   PetscValidIntPointer(ival, 3);
2512   PetscCheck(icntl >= 1 && icntl <= 38, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl);
2513   PetscUseMethod(F, "MatMumpsGetIcntl_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival));
2514   PetscFunctionReturn(0);
2515 }
2516 
2517 /* -------------------------------------------------------------------------------------------*/
2518 PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal val)
2519 {
2520   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2521 
2522   PetscFunctionBegin;
2523   if (mumps->id.job == JOB_NULL) {
2524     PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0;
2525     for (i = 0; i < nCNTL_pre; ++i)
2526       if (mumps->CNTL_pre[1 + 2 * i] == icntl) break;
2527     if (i == nCNTL_pre) {
2528       if (i > 0) PetscCall(PetscRealloc(sizeof(PetscReal) * (2 * nCNTL_pre + 3), &mumps->CNTL_pre));
2529       else PetscCall(PetscCalloc(sizeof(PetscReal) * 3, &mumps->CNTL_pre));
2530       mumps->CNTL_pre[0]++;
2531     }
2532     mumps->CNTL_pre[1 + 2 * i] = icntl;
2533     mumps->CNTL_pre[2 + 2 * i] = val;
2534   } else mumps->id.CNTL(icntl) = val;
2535   PetscFunctionReturn(0);
2536 }
2537 
2538 PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal *val)
2539 {
2540   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2541 
2542   PetscFunctionBegin;
2543   *val = mumps->id.CNTL(icntl);
2544   PetscFunctionReturn(0);
2545 }
2546 
2547 /*@
2548   MatMumpsSetCntl - Set MUMPS parameter CNTL()
2549 
2550    Logically Collective
2551 
2552    Input Parameters:
2553 +  F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2554 .  icntl - index of MUMPS parameter array CNTL()
2555 -  val - value of MUMPS CNTL(icntl)
2556 
2557   Options Database Key:
2558 .   -mat_mumps_cntl_<icntl> <val>  - change the option numbered icntl to ival
2559 
2560    Level: beginner
2561 
2562    References:
2563 .  * - MUMPS Users' Guide
2564 
2565 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2566 @*/
2567 PetscErrorCode MatMumpsSetCntl(Mat F, PetscInt icntl, PetscReal val)
2568 {
2569   PetscFunctionBegin;
2570   PetscValidType(F, 1);
2571   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2572   PetscValidLogicalCollectiveInt(F, icntl, 2);
2573   PetscValidLogicalCollectiveReal(F, val, 3);
2574   PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl);
2575   PetscTryMethod(F, "MatMumpsSetCntl_C", (Mat, PetscInt, PetscReal), (F, icntl, val));
2576   PetscFunctionReturn(0);
2577 }
2578 
2579 /*@
2580   MatMumpsGetCntl - Get MUMPS parameter CNTL()
2581 
2582    Logically Collective
2583 
2584    Input Parameters:
2585 +  F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2586 -  icntl - index of MUMPS parameter array CNTL()
2587 
2588   Output Parameter:
2589 .  val - value of MUMPS CNTL(icntl)
2590 
2591    Level: beginner
2592 
2593    References:
2594 .  * - MUMPS Users' Guide
2595 
2596 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2597 @*/
2598 PetscErrorCode MatMumpsGetCntl(Mat F, PetscInt icntl, PetscReal *val)
2599 {
2600   PetscFunctionBegin;
2601   PetscValidType(F, 1);
2602   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2603   PetscValidLogicalCollectiveInt(F, icntl, 2);
2604   PetscValidRealPointer(val, 3);
2605   PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl);
2606   PetscUseMethod(F, "MatMumpsGetCntl_C", (Mat, PetscInt, PetscReal *), (F, icntl, val));
2607   PetscFunctionReturn(0);
2608 }
2609 
2610 PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F, PetscInt icntl, PetscInt *info)
2611 {
2612   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2613 
2614   PetscFunctionBegin;
2615   *info = mumps->id.INFO(icntl);
2616   PetscFunctionReturn(0);
2617 }
2618 
2619 PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F, PetscInt icntl, PetscInt *infog)
2620 {
2621   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2622 
2623   PetscFunctionBegin;
2624   *infog = mumps->id.INFOG(icntl);
2625   PetscFunctionReturn(0);
2626 }
2627 
2628 PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfo)
2629 {
2630   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2631 
2632   PetscFunctionBegin;
2633   *rinfo = mumps->id.RINFO(icntl);
2634   PetscFunctionReturn(0);
2635 }
2636 
2637 PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfog)
2638 {
2639   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2640 
2641   PetscFunctionBegin;
2642   *rinfog = mumps->id.RINFOG(icntl);
2643   PetscFunctionReturn(0);
2644 }
2645 
2646 PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F, Mat spRHS)
2647 {
2648   Mat          Bt = NULL, Btseq = NULL;
2649   PetscBool    flg;
2650   Mat_MUMPS   *mumps = (Mat_MUMPS *)F->data;
2651   PetscScalar *aa;
2652   PetscInt     spnr, *ia, *ja, M, nrhs;
2653 
2654   PetscFunctionBegin;
2655   PetscValidPointer(spRHS, 2);
2656   PetscCall(PetscObjectTypeCompare((PetscObject)spRHS, MATTRANSPOSEVIRTUAL, &flg));
2657   if (flg) {
2658     PetscCall(MatTransposeGetMat(spRHS, &Bt));
2659   } else SETERRQ(PetscObjectComm((PetscObject)spRHS), PETSC_ERR_ARG_WRONG, "Matrix spRHS must be type MATTRANSPOSEVIRTUAL matrix");
2660 
2661   PetscCall(MatMumpsSetIcntl(F, 30, 1));
2662 
2663   if (mumps->petsc_size > 1) {
2664     Mat_MPIAIJ *b = (Mat_MPIAIJ *)Bt->data;
2665     Btseq         = b->A;
2666   } else {
2667     Btseq = Bt;
2668   }
2669 
2670   PetscCall(MatGetSize(spRHS, &M, &nrhs));
2671   mumps->id.nrhs = nrhs;
2672   mumps->id.lrhs = M;
2673   mumps->id.rhs  = NULL;
2674 
2675   if (!mumps->myid) {
2676     PetscCall(MatSeqAIJGetArray(Btseq, &aa));
2677     PetscCall(MatGetRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
2678     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
2679     PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs));
2680     mumps->id.rhs_sparse = (MumpsScalar *)aa;
2681   } else {
2682     mumps->id.irhs_ptr    = NULL;
2683     mumps->id.irhs_sparse = NULL;
2684     mumps->id.nz_rhs      = 0;
2685     mumps->id.rhs_sparse  = NULL;
2686   }
2687   mumps->id.ICNTL(20) = 1; /* rhs is sparse */
2688   mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */
2689 
2690   /* solve phase */
2691   /*-------------*/
2692   mumps->id.job = JOB_SOLVE;
2693   PetscMUMPS_c(mumps);
2694   PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d INFO(2)=%d", mumps->id.INFOG(1), mumps->id.INFO(2));
2695 
2696   if (!mumps->myid) {
2697     PetscCall(MatSeqAIJRestoreArray(Btseq, &aa));
2698     PetscCall(MatRestoreRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
2699     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
2700   }
2701   PetscFunctionReturn(0);
2702 }
2703 
2704 /*@
2705   MatMumpsGetInverse - Get user-specified set of entries in inverse of A
2706 
2707    Logically Collective
2708 
2709    Input Parameters:
2710 +  F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2711 -  spRHS - sequential sparse matrix in `MATTRANSPOSEVIRTUAL` format holding specified indices in processor[0]
2712 
2713   Output Parameter:
2714 . spRHS - requested entries of inverse of A
2715 
2716    Level: beginner
2717 
2718    References:
2719 .  * - MUMPS Users' Guide
2720 
2721 .seealso: `MatGetFactor()`, `MatCreateTranspose()`
2722 @*/
2723 PetscErrorCode MatMumpsGetInverse(Mat F, Mat spRHS)
2724 {
2725   PetscFunctionBegin;
2726   PetscValidType(F, 1);
2727   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2728   PetscUseMethod(F, "MatMumpsGetInverse_C", (Mat, Mat), (F, spRHS));
2729   PetscFunctionReturn(0);
2730 }
2731 
2732 PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F, Mat spRHST)
2733 {
2734   Mat spRHS;
2735 
2736   PetscFunctionBegin;
2737   PetscCall(MatCreateTranspose(spRHST, &spRHS));
2738   PetscCall(MatMumpsGetInverse_MUMPS(F, spRHS));
2739   PetscCall(MatDestroy(&spRHS));
2740   PetscFunctionReturn(0);
2741 }
2742 
2743 /*@
2744   MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix A^T
2745 
2746    Logically Collective
2747 
2748    Input Parameters:
2749 +  F - the factored matrix of A obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2750 -  spRHST - sequential sparse matrix in `MATAIJ` format holding specified indices of A^T in processor[0]
2751 
2752   Output Parameter:
2753 . spRHST - requested entries of inverse of A^T
2754 
2755    Level: beginner
2756 
2757    References:
2758 .  * - MUMPS Users' Guide
2759 
2760 .seealso: `MatGetFactor()`, `MatCreateTranspose()`, `MatMumpsGetInverse()`
2761 @*/
2762 PetscErrorCode MatMumpsGetInverseTranspose(Mat F, Mat spRHST)
2763 {
2764   PetscBool flg;
2765 
2766   PetscFunctionBegin;
2767   PetscValidType(F, 1);
2768   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2769   PetscCall(PetscObjectTypeCompareAny((PetscObject)spRHST, &flg, MATSEQAIJ, MATMPIAIJ, NULL));
2770   PetscCheck(flg, PetscObjectComm((PetscObject)spRHST), PETSC_ERR_ARG_WRONG, "Matrix spRHST must be MATAIJ matrix");
2771 
2772   PetscUseMethod(F, "MatMumpsGetInverseTranspose_C", (Mat, Mat), (F, spRHST));
2773   PetscFunctionReturn(0);
2774 }
2775 
2776 /*@
2777   MatMumpsGetInfo - Get MUMPS parameter INFO()
2778 
2779    Logically Collective
2780 
2781    Input Parameters:
2782 +  F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2783 -  icntl - index of MUMPS parameter array INFO()
2784 
2785   Output Parameter:
2786 .  ival - value of MUMPS INFO(icntl)
2787 
2788    Level: beginner
2789 
2790    References:
2791 .  * - MUMPS Users' Guide
2792 
2793 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2794 @*/
2795 PetscErrorCode MatMumpsGetInfo(Mat F, PetscInt icntl, PetscInt *ival)
2796 {
2797   PetscFunctionBegin;
2798   PetscValidType(F, 1);
2799   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2800   PetscValidIntPointer(ival, 3);
2801   PetscUseMethod(F, "MatMumpsGetInfo_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival));
2802   PetscFunctionReturn(0);
2803 }
2804 
2805 /*@
2806   MatMumpsGetInfog - Get MUMPS parameter INFOG()
2807 
2808    Logically Collective
2809 
2810    Input Parameters:
2811 +  F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2812 -  icntl - index of MUMPS parameter array INFOG()
2813 
2814   Output Parameter:
2815 .  ival - value of MUMPS INFOG(icntl)
2816 
2817    Level: beginner
2818 
2819    References:
2820 .  * - MUMPS Users' Guide
2821 
2822 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2823 @*/
2824 PetscErrorCode MatMumpsGetInfog(Mat F, PetscInt icntl, PetscInt *ival)
2825 {
2826   PetscFunctionBegin;
2827   PetscValidType(F, 1);
2828   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2829   PetscValidIntPointer(ival, 3);
2830   PetscUseMethod(F, "MatMumpsGetInfog_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival));
2831   PetscFunctionReturn(0);
2832 }
2833 
2834 /*@
2835   MatMumpsGetRinfo - Get MUMPS parameter RINFO()
2836 
2837    Logically Collective
2838 
2839    Input Parameters:
2840 +  F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2841 -  icntl - index of MUMPS parameter array RINFO()
2842 
2843   Output Parameter:
2844 .  val - value of MUMPS RINFO(icntl)
2845 
2846    Level: beginner
2847 
2848    References:
2849 .  * - MUMPS Users' Guide
2850 
2851 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfog()`
2852 @*/
2853 PetscErrorCode MatMumpsGetRinfo(Mat F, PetscInt icntl, PetscReal *val)
2854 {
2855   PetscFunctionBegin;
2856   PetscValidType(F, 1);
2857   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2858   PetscValidRealPointer(val, 3);
2859   PetscUseMethod(F, "MatMumpsGetRinfo_C", (Mat, PetscInt, PetscReal *), (F, icntl, val));
2860   PetscFunctionReturn(0);
2861 }
2862 
2863 /*@
2864   MatMumpsGetRinfog - Get MUMPS parameter RINFOG()
2865 
2866    Logically Collective
2867 
2868    Input Parameters:
2869 +  F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2870 -  icntl - index of MUMPS parameter array RINFOG()
2871 
2872   Output Parameter:
2873 .  val - value of MUMPS RINFOG(icntl)
2874 
2875    Level: beginner
2876 
2877    References:
2878 .  * - MUMPS Users' Guide
2879 
2880 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`
2881 @*/
2882 PetscErrorCode MatMumpsGetRinfog(Mat F, PetscInt icntl, PetscReal *val)
2883 {
2884   PetscFunctionBegin;
2885   PetscValidType(F, 1);
2886   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2887   PetscValidRealPointer(val, 3);
2888   PetscUseMethod(F, "MatMumpsGetRinfog_C", (Mat, PetscInt, PetscReal *), (F, icntl, val));
2889   PetscFunctionReturn(0);
2890 }
2891 
2892 /*MC
2893   MATSOLVERMUMPS -  A matrix type providing direct solvers (LU and Cholesky) for
2894   distributed and sequential matrices via the external package MUMPS.
2895 
2896   Works with `MATAIJ` and `MATSBAIJ` matrices
2897 
2898   Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS
2899 
2900   Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. See details below.
2901 
2902   Use -pc_type cholesky or lu -pc_factor_mat_solver_type mumps to use this direct solver
2903 
2904   Options Database Keys:
2905 +  -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages
2906 .  -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning
2907 .  -mat_mumps_icntl_3 -  ICNTL(3): output stream for global information, collected on the host
2908 .  -mat_mumps_icntl_4 -  ICNTL(4): level of printing (0 to 4)
2909 .  -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)
2910 .  -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto
2911                         Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only)
2912 .  -mat_mumps_icntl_8  - ICNTL(8): scaling strategy (-2 to 8 or 77)
2913 .  -mat_mumps_icntl_10  - ICNTL(10): max num of refinements
2914 .  -mat_mumps_icntl_11  - ICNTL(11): statistics related to an error analysis (via -ksp_view)
2915 .  -mat_mumps_icntl_12  - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)
2916 .  -mat_mumps_icntl_13  - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting
2917 .  -mat_mumps_icntl_14  - ICNTL(14): percentage increase in the estimated working space
2918 .  -mat_mumps_icntl_15  - ICNTL(15): compression of the input matrix resulting from a block format
2919 .  -mat_mumps_icntl_19  - ICNTL(19): computes the Schur complement
2920 .  -mat_mumps_icntl_20  - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS
2921 .  -mat_mumps_icntl_22  - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)
2922 .  -mat_mumps_icntl_23  - ICNTL(23): max size of the working memory (MB) that can allocate per processor
2923 .  -mat_mumps_icntl_24  - ICNTL(24): detection of null pivot rows (0 or 1)
2924 .  -mat_mumps_icntl_25  - ICNTL(25): compute a solution of a deficient matrix and a null space basis
2925 .  -mat_mumps_icntl_26  - ICNTL(26): drives the solution phase if a Schur complement matrix
2926 .  -mat_mumps_icntl_28  - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering
2927 .  -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis
2928 .  -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A)
2929 .  -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization
2930 .  -mat_mumps_icntl_33 - ICNTL(33): compute determinant
2931 .  -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature
2932 .  -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant
2933 .  -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR
2934 .  -mat_mumps_cntl_1  - CNTL(1): relative pivoting threshold
2935 .  -mat_mumps_cntl_2  -  CNTL(2): stopping criterion of refinement
2936 .  -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold
2937 .  -mat_mumps_cntl_4 - CNTL(4): value for static pivoting
2938 .  -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots
2939 .  -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization
2940 -  -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS.
2941                                    Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual.
2942 
2943   Level: beginner
2944 
2945     Notes:
2946     MUMPS Cholesky does not handle (complex) Hermitian matrices http://mumps.enseeiht.fr/doc/userguide_5.2.1.pdf so using it will error if the matrix is Hermitian.
2947 
2948     When used within a `KSP`/`PC` solve the options are prefixed with that of the `PC`. Otherwise one can set the options prefix by calling
2949     `MatSetOptionsPrefixFactor()` on the matrix from which the factor was obtained or `MatSetOptionsPrefix()` on the factor matrix.
2950 
2951     When a MUMPS factorization fails inside a KSP solve, for example with a `KSP_DIVERGED_PC_FAILED`, one can find the MUMPS information about the failure by calling
2952 $          KSPGetPC(ksp,&pc);
2953 $          PCFactorGetMatrix(pc,&mat);
2954 $          MatMumpsGetInfo(mat,....);
2955 $          MatMumpsGetInfog(mat,....); etc.
2956            Or you can run with -ksp_error_if_not_converged and the program will be stopped and the information printed in the error message.
2957 
2958   Using MUMPS with 64-bit integers
2959     MUMPS provides 64-bit integer support in two build modes:
2960       full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and
2961       requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI).
2962 
2963       selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices,
2964       MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and
2965       columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit
2966       integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS.
2967 
2968     With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc.
2969 
2970   Two modes to run MUMPS/PETSc with OpenMP
2971 $     Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP
2972 $     threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test".
2973 
2974 $     -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example,
2975 $     if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16"
2976 
2977    To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part
2978    (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with --with-openmp --download-hwloc
2979    (or --with-hwloc), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS
2980    libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS
2981    (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided).
2982 
2983    If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI
2984    processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of
2985    size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm
2986    are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set
2987    by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs.
2988    In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets,
2989    if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind
2990    MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half
2991    cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the
2992    problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding.
2993    For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and
2994    examine the mapping result.
2995 
2996    PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set `OMP_PROC_BIND` and `OMP_PLACES` in job scripts,
2997    for example, export `OMP_PLACES`=threads and export `OMP_PROC_BIND`=spread. One does not need to export `OMP_NUM_THREADS`=m in job scripts as PETSc
2998    calls `omp_set_num_threads`(m) internally before calling MUMPS.
2999 
3000    References:
3001 +  * - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011).
3002 -  * - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017.
3003 
3004 .seealso: `PCFactorSetMatSolverType()`, `MatSolverType`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`, `KSPGetPC()`, `PCFactorGetMatrix()`
3005 M*/
3006 
3007 static PetscErrorCode MatFactorGetSolverType_mumps(Mat A, MatSolverType *type)
3008 {
3009   PetscFunctionBegin;
3010   *type = MATSOLVERMUMPS;
3011   PetscFunctionReturn(0);
3012 }
3013 
3014 /* MatGetFactor for Seq and MPI AIJ matrices */
3015 static PetscErrorCode MatGetFactor_aij_mumps(Mat A, MatFactorType ftype, Mat *F)
3016 {
3017   Mat         B;
3018   Mat_MUMPS  *mumps;
3019   PetscBool   isSeqAIJ;
3020   PetscMPIInt size;
3021 
3022   PetscFunctionBegin;
3023 #if defined(PETSC_USE_COMPLEX)
3024   PetscCheck(A->hermitian != PETSC_BOOL3_TRUE || A->symmetric == PETSC_BOOL3_TRUE || ftype != MAT_FACTOR_CHOLESKY, PETSC_COMM_SELF, PETSC_ERR_SUP, "Hermitian CHOLESKY Factor is not supported");
3025 #endif
3026   /* Create the factorization matrix */
3027   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isSeqAIJ));
3028   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
3029   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
3030   PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name));
3031   PetscCall(MatSetUp(B));
3032 
3033   PetscCall(PetscNew(&mumps));
3034 
3035   B->ops->view    = MatView_MUMPS;
3036   B->ops->getinfo = MatGetInfo_MUMPS;
3037 
3038   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
3039   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
3040   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
3041   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
3042   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
3043   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
3044   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
3045   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
3046   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
3047   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
3048   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
3049   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
3050   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
3051 
3052   if (ftype == MAT_FACTOR_LU) {
3053     B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
3054     B->factortype            = MAT_FACTOR_LU;
3055     if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij;
3056     else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij;
3057     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]));
3058     mumps->sym = 0;
3059   } else {
3060     B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
3061     B->factortype                  = MAT_FACTOR_CHOLESKY;
3062     if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij;
3063     else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij;
3064     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY]));
3065 #if defined(PETSC_USE_COMPLEX)
3066     mumps->sym = 2;
3067 #else
3068     if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1;
3069     else mumps->sym = 2;
3070 #endif
3071   }
3072 
3073   /* set solvertype */
3074   PetscCall(PetscFree(B->solvertype));
3075   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
3076   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
3077   if (size == 1) {
3078     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3079     B->canuseordering = PETSC_TRUE;
3080   }
3081   B->ops->destroy = MatDestroy_MUMPS;
3082   B->data         = (void *)mumps;
3083 
3084   *F               = B;
3085   mumps->id.job    = JOB_NULL;
3086   mumps->ICNTL_pre = NULL;
3087   mumps->CNTL_pre  = NULL;
3088   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
3089   PetscFunctionReturn(0);
3090 }
3091 
3092 /* MatGetFactor for Seq and MPI SBAIJ matrices */
3093 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A, MatFactorType ftype, Mat *F)
3094 {
3095   Mat         B;
3096   Mat_MUMPS  *mumps;
3097   PetscBool   isSeqSBAIJ;
3098   PetscMPIInt size;
3099 
3100   PetscFunctionBegin;
3101 #if defined(PETSC_USE_COMPLEX)
3102   PetscCheck(A->hermitian != PETSC_BOOL3_TRUE || A->symmetric == PETSC_BOOL3_TRUE, PETSC_COMM_SELF, PETSC_ERR_SUP, "Hermitian CHOLESKY Factor is not supported");
3103 #endif
3104   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
3105   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
3106   PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name));
3107   PetscCall(MatSetUp(B));
3108 
3109   PetscCall(PetscNew(&mumps));
3110   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &isSeqSBAIJ));
3111   if (isSeqSBAIJ) {
3112     mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij;
3113   } else {
3114     mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij;
3115   }
3116 
3117   B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
3118   B->ops->view                   = MatView_MUMPS;
3119   B->ops->getinfo                = MatGetInfo_MUMPS;
3120 
3121   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
3122   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
3123   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
3124   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
3125   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
3126   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
3127   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
3128   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
3129   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
3130   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
3131   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
3132   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
3133   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
3134 
3135   B->factortype = MAT_FACTOR_CHOLESKY;
3136 #if defined(PETSC_USE_COMPLEX)
3137   mumps->sym = 2;
3138 #else
3139   if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1;
3140   else mumps->sym = 2;
3141 #endif
3142 
3143   /* set solvertype */
3144   PetscCall(PetscFree(B->solvertype));
3145   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
3146   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
3147   if (size == 1) {
3148     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3149     B->canuseordering = PETSC_TRUE;
3150   }
3151   PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY]));
3152   B->ops->destroy = MatDestroy_MUMPS;
3153   B->data         = (void *)mumps;
3154 
3155   *F               = B;
3156   mumps->id.job    = JOB_NULL;
3157   mumps->ICNTL_pre = NULL;
3158   mumps->CNTL_pre  = NULL;
3159   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
3160   PetscFunctionReturn(0);
3161 }
3162 
3163 static PetscErrorCode MatGetFactor_baij_mumps(Mat A, MatFactorType ftype, Mat *F)
3164 {
3165   Mat         B;
3166   Mat_MUMPS  *mumps;
3167   PetscBool   isSeqBAIJ;
3168   PetscMPIInt size;
3169 
3170   PetscFunctionBegin;
3171   /* Create the factorization matrix */
3172   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &isSeqBAIJ));
3173   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
3174   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
3175   PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name));
3176   PetscCall(MatSetUp(B));
3177 
3178   PetscCall(PetscNew(&mumps));
3179   if (ftype == MAT_FACTOR_LU) {
3180     B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS;
3181     B->factortype            = MAT_FACTOR_LU;
3182     if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij;
3183     else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij;
3184     mumps->sym = 0;
3185     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]));
3186   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead");
3187 
3188   B->ops->view    = MatView_MUMPS;
3189   B->ops->getinfo = MatGetInfo_MUMPS;
3190 
3191   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
3192   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
3193   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
3194   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
3195   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
3196   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
3197   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
3198   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
3199   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
3200   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
3201   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
3202   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
3203   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
3204 
3205   /* set solvertype */
3206   PetscCall(PetscFree(B->solvertype));
3207   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
3208   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
3209   if (size == 1) {
3210     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3211     B->canuseordering = PETSC_TRUE;
3212   }
3213   B->ops->destroy = MatDestroy_MUMPS;
3214   B->data         = (void *)mumps;
3215 
3216   *F               = B;
3217   mumps->id.job    = JOB_NULL;
3218   mumps->ICNTL_pre = NULL;
3219   mumps->CNTL_pre  = NULL;
3220   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
3221   PetscFunctionReturn(0);
3222 }
3223 
3224 /* MatGetFactor for Seq and MPI SELL matrices */
3225 static PetscErrorCode MatGetFactor_sell_mumps(Mat A, MatFactorType ftype, Mat *F)
3226 {
3227   Mat         B;
3228   Mat_MUMPS  *mumps;
3229   PetscBool   isSeqSELL;
3230   PetscMPIInt size;
3231 
3232   PetscFunctionBegin;
3233   /* Create the factorization matrix */
3234   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSELL, &isSeqSELL));
3235   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
3236   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
3237   PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name));
3238   PetscCall(MatSetUp(B));
3239 
3240   PetscCall(PetscNew(&mumps));
3241 
3242   B->ops->view    = MatView_MUMPS;
3243   B->ops->getinfo = MatGetInfo_MUMPS;
3244 
3245   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
3246   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
3247   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
3248   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
3249   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
3250   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
3251   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
3252   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
3253   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
3254   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
3255   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
3256 
3257   if (ftype == MAT_FACTOR_LU) {
3258     B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
3259     B->factortype            = MAT_FACTOR_LU;
3260     if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij;
3261     else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented");
3262     mumps->sym = 0;
3263     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]));
3264   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented");
3265 
3266   /* set solvertype */
3267   PetscCall(PetscFree(B->solvertype));
3268   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
3269   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
3270   if (size == 1) {
3271     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization  */
3272     B->canuseordering = PETSC_TRUE;
3273   }
3274   B->ops->destroy = MatDestroy_MUMPS;
3275   B->data         = (void *)mumps;
3276 
3277   *F               = B;
3278   mumps->id.job    = JOB_NULL;
3279   mumps->ICNTL_pre = NULL;
3280   mumps->CNTL_pre  = NULL;
3281   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
3282   PetscFunctionReturn(0);
3283 }
3284 
3285 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void)
3286 {
3287   PetscFunctionBegin;
3288   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
3289   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
3290   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps));
3291   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps));
3292   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPISBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps));
3293   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
3294   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
3295   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps));
3296   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps));
3297   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps));
3298   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSELL, MAT_FACTOR_LU, MatGetFactor_sell_mumps));
3299   PetscFunctionReturn(0);
3300 }
3301