xref: /petsc/src/mat/impls/aij/mpi/mumps/mumps.c (revision 6d5076fa512d0d8e4e12a3685978afdbfee2b1a4)
1 
2 /*
3     Provides an interface to the MUMPS sparse solver
4 */
5 #include <petscpkg_version.h>
6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I  "petscmat.h"  I*/
7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
8 #include <../src/mat/impls/sell/mpi/mpisell.h>
9 
10 EXTERN_C_BEGIN
11 #if defined(PETSC_USE_COMPLEX)
12   #if defined(PETSC_USE_REAL_SINGLE)
13     #include <cmumps_c.h>
14   #else
15     #include <zmumps_c.h>
16   #endif
17 #else
18   #if defined(PETSC_USE_REAL_SINGLE)
19     #include <smumps_c.h>
20   #else
21     #include <dmumps_c.h>
22   #endif
23 #endif
24 EXTERN_C_END
25 #define JOB_INIT         -1
26 #define JOB_NULL         0
27 #define JOB_FACTSYMBOLIC 1
28 #define JOB_FACTNUMERIC  2
29 #define JOB_SOLVE        3
30 #define JOB_END          -2
31 
32 /* calls to MUMPS */
33 #if defined(PETSC_USE_COMPLEX)
34   #if defined(PETSC_USE_REAL_SINGLE)
35     #define MUMPS_c cmumps_c
36   #else
37     #define MUMPS_c zmumps_c
38   #endif
39 #else
40   #if defined(PETSC_USE_REAL_SINGLE)
41     #define MUMPS_c smumps_c
42   #else
43     #define MUMPS_c dmumps_c
44   #endif
45 #endif
46 
47 /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for
48    number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the
49    naming convention in PetscMPIInt, PetscBLASInt etc.
50 */
51 typedef MUMPS_INT PetscMUMPSInt;
52 
53 #if PETSC_PKG_MUMPS_VERSION_GE(5, 3, 0)
54   #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */
55     #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out"
56   #endif
57 #else
58   #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */
59     #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out"
60   #endif
61 #endif
62 
63 #define MPIU_MUMPSINT       MPI_INT
64 #define PETSC_MUMPS_INT_MAX 2147483647
65 #define PETSC_MUMPS_INT_MIN -2147483648
66 
67 /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/
68 static inline PetscErrorCode PetscMUMPSIntCast(PetscInt a, PetscMUMPSInt *b)
69 {
70   PetscFunctionBegin;
71 #if PetscDefined(USE_64BIT_INDICES)
72   PetscAssert(a <= PETSC_MUMPS_INT_MAX && a >= PETSC_MUMPS_INT_MIN, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt");
73 #endif
74   *b = (PetscMUMPSInt)(a);
75   PetscFunctionReturn(PETSC_SUCCESS);
76 }
77 
78 /* Put these utility routines here since they are only used in this file */
79 static inline PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems *PetscOptionsObject, const char opt[], const char text[], const char man[], PetscMUMPSInt currentvalue, PetscMUMPSInt *value, PetscBool *set, PetscMUMPSInt lb, PetscMUMPSInt ub)
80 {
81   PetscInt  myval;
82   PetscBool myset;
83   PetscFunctionBegin;
84   /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */
85   PetscCall(PetscOptionsInt_Private(PetscOptionsObject, opt, text, man, (PetscInt)currentvalue, &myval, &myset, lb, ub));
86   if (myset) PetscCall(PetscMUMPSIntCast(myval, value));
87   if (set) *set = myset;
88   PetscFunctionReturn(PETSC_SUCCESS);
89 }
90 #define PetscOptionsMUMPSInt(a, b, c, d, e, f) PetscOptionsMUMPSInt_Private(PetscOptionsObject, a, b, c, d, e, f, PETSC_MUMPS_INT_MIN, PETSC_MUMPS_INT_MAX)
91 
92 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */
93 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
94   #define PetscMUMPS_c(mumps) \
95     do { \
96       if (mumps->use_petsc_omp_support) { \
97         if (mumps->is_omp_master) { \
98           PetscCall(PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl)); \
99           MUMPS_c(&mumps->id); \
100           PetscCall(PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl)); \
101         } \
102         PetscCall(PetscOmpCtrlBarrier(mumps->omp_ctrl)); \
103         /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific      \
104          to processes, so we only Bcast info[1], an error code and leave others (since they do not have   \
105          an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82.                   \
106          omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \
107       */ \
108         PetscCallMPI(MPI_Bcast(mumps->id.infog, 40, MPIU_MUMPSINT, 0, mumps->omp_comm)); \
109         PetscCallMPI(MPI_Bcast(mumps->id.rinfog, 20, MPIU_REAL, 0, mumps->omp_comm)); \
110         PetscCallMPI(MPI_Bcast(mumps->id.info, 1, MPIU_MUMPSINT, 0, mumps->omp_comm)); \
111       } else { \
112         MUMPS_c(&mumps->id); \
113       } \
114     } while (0)
115 #else
116   #define PetscMUMPS_c(mumps) \
117     do { \
118       MUMPS_c(&mumps->id); \
119     } while (0)
120 #endif
121 
122 /* declare MumpsScalar */
123 #if defined(PETSC_USE_COMPLEX)
124   #if defined(PETSC_USE_REAL_SINGLE)
125     #define MumpsScalar mumps_complex
126   #else
127     #define MumpsScalar mumps_double_complex
128   #endif
129 #else
130   #define MumpsScalar PetscScalar
131 #endif
132 
133 /* macros s.t. indices match MUMPS documentation */
134 #define ICNTL(I)  icntl[(I)-1]
135 #define CNTL(I)   cntl[(I)-1]
136 #define INFOG(I)  infog[(I)-1]
137 #define INFO(I)   info[(I)-1]
138 #define RINFOG(I) rinfog[(I)-1]
139 #define RINFO(I)  rinfo[(I)-1]
140 
141 typedef struct Mat_MUMPS Mat_MUMPS;
142 struct Mat_MUMPS {
143 #if defined(PETSC_USE_COMPLEX)
144   #if defined(PETSC_USE_REAL_SINGLE)
145   CMUMPS_STRUC_C id;
146   #else
147   ZMUMPS_STRUC_C id;
148   #endif
149 #else
150   #if defined(PETSC_USE_REAL_SINGLE)
151   SMUMPS_STRUC_C id;
152   #else
153   DMUMPS_STRUC_C id;
154   #endif
155 #endif
156 
157   MatStructure   matstruc;
158   PetscMPIInt    myid, petsc_size;
159   PetscMUMPSInt *irn, *jcn;       /* the (i,j,v) triplets passed to mumps. */
160   PetscScalar   *val, *val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */
161   PetscInt64     nnz;             /* number of nonzeros. The type is called selective 64-bit in mumps */
162   PetscMUMPSInt  sym;
163   MPI_Comm       mumps_comm;
164   PetscMUMPSInt *ICNTL_pre;
165   PetscReal     *CNTL_pre;
166   PetscMUMPSInt  ICNTL9_pre;         /* check if ICNTL(9) is changed from previous MatSolve */
167   VecScatter     scat_rhs, scat_sol; /* used by MatSolve() */
168   PetscMUMPSInt  ICNTL20;            /* use centralized (0) or distributed (10) dense RHS */
169   PetscMUMPSInt  lrhs_loc, nloc_rhs, *irhs_loc;
170 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
171   PetscInt    *rhs_nrow, max_nrhs;
172   PetscMPIInt *rhs_recvcounts, *rhs_disps;
173   PetscScalar *rhs_loc, *rhs_recvbuf;
174 #endif
175   Vec            b_seq, x_seq;
176   PetscInt       ninfo, *info; /* which INFO to display */
177   PetscInt       sizeredrhs;
178   PetscScalar   *schur_sol;
179   PetscInt       schur_sizesol;
180   PetscMUMPSInt *ia_alloc, *ja_alloc; /* work arrays used for the CSR struct for sparse rhs */
181   PetscInt64     cur_ilen, cur_jlen;  /* current len of ia_alloc[], ja_alloc[] */
182   PetscErrorCode (*ConvertToTriples)(Mat, PetscInt, MatReuse, Mat_MUMPS *);
183 
184   /* stuff used by petsc/mumps OpenMP support*/
185   PetscBool    use_petsc_omp_support;
186   PetscOmpCtrl omp_ctrl;             /* an OpenMP controller that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */
187   MPI_Comm     petsc_comm, omp_comm; /* petsc_comm is petsc matrix's comm */
188   PetscInt64  *recvcount;            /* a collection of nnz on omp_master */
189   PetscMPIInt  tag, omp_comm_size;
190   PetscBool    is_omp_master; /* is this rank the master of omp_comm */
191   MPI_Request *reqs;
192 };
193 
194 /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt.
195    Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices.
196  */
197 static PetscErrorCode PetscMUMPSIntCSRCast(Mat_MUMPS *mumps, PetscInt nrow, PetscInt *ia, PetscInt *ja, PetscMUMPSInt **ia_mumps, PetscMUMPSInt **ja_mumps, PetscMUMPSInt *nnz_mumps)
198 {
199   PetscInt nnz = ia[nrow] - 1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscInt64 since mumps only uses PetscMUMPSInt for rhs */
200 
201   PetscFunctionBegin;
202 #if defined(PETSC_USE_64BIT_INDICES)
203   {
204     PetscInt i;
205     if (nrow + 1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */
206       PetscCall(PetscFree(mumps->ia_alloc));
207       PetscCall(PetscMalloc1(nrow + 1, &mumps->ia_alloc));
208       mumps->cur_ilen = nrow + 1;
209     }
210     if (nnz > mumps->cur_jlen) {
211       PetscCall(PetscFree(mumps->ja_alloc));
212       PetscCall(PetscMalloc1(nnz, &mumps->ja_alloc));
213       mumps->cur_jlen = nnz;
214     }
215     for (i = 0; i < nrow + 1; i++) PetscCall(PetscMUMPSIntCast(ia[i], &(mumps->ia_alloc[i])));
216     for (i = 0; i < nnz; i++) PetscCall(PetscMUMPSIntCast(ja[i], &(mumps->ja_alloc[i])));
217     *ia_mumps = mumps->ia_alloc;
218     *ja_mumps = mumps->ja_alloc;
219   }
220 #else
221   *ia_mumps          = ia;
222   *ja_mumps          = ja;
223 #endif
224   PetscCall(PetscMUMPSIntCast(nnz, nnz_mumps));
225   PetscFunctionReturn(PETSC_SUCCESS);
226 }
227 
228 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS *mumps)
229 {
230   PetscFunctionBegin;
231   PetscCall(PetscFree(mumps->id.listvar_schur));
232   PetscCall(PetscFree(mumps->id.redrhs));
233   PetscCall(PetscFree(mumps->schur_sol));
234   mumps->id.size_schur = 0;
235   mumps->id.schur_lld  = 0;
236   mumps->id.ICNTL(19)  = 0;
237   PetscFunctionReturn(PETSC_SUCCESS);
238 }
239 
240 /* solve with rhs in mumps->id.redrhs and return in the same location */
241 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F)
242 {
243   Mat_MUMPS           *mumps = (Mat_MUMPS *)F->data;
244   Mat                  S, B, X;
245   MatFactorSchurStatus schurstatus;
246   PetscInt             sizesol;
247 
248   PetscFunctionBegin;
249   PetscCall(MatFactorFactorizeSchurComplement(F));
250   PetscCall(MatFactorGetSchurComplement(F, &S, &schurstatus));
251   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &B));
252   PetscCall(MatSetType(B, ((PetscObject)S)->type_name));
253 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
254   PetscCall(MatBindToCPU(B, S->boundtocpu));
255 #endif
256   switch (schurstatus) {
257   case MAT_FACTOR_SCHUR_FACTORED:
258     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &X));
259     PetscCall(MatSetType(X, ((PetscObject)S)->type_name));
260 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
261     PetscCall(MatBindToCPU(X, S->boundtocpu));
262 #endif
263     if (!mumps->id.ICNTL(9)) { /* transpose solve */
264       PetscCall(MatMatSolveTranspose(S, B, X));
265     } else {
266       PetscCall(MatMatSolve(S, B, X));
267     }
268     break;
269   case MAT_FACTOR_SCHUR_INVERTED:
270     sizesol = mumps->id.nrhs * mumps->id.size_schur;
271     if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) {
272       PetscCall(PetscFree(mumps->schur_sol));
273       PetscCall(PetscMalloc1(sizesol, &mumps->schur_sol));
274       mumps->schur_sizesol = sizesol;
275     }
276     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->schur_sol, &X));
277     PetscCall(MatSetType(X, ((PetscObject)S)->type_name));
278 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
279     PetscCall(MatBindToCPU(X, S->boundtocpu));
280 #endif
281     PetscCall(MatProductCreateWithMat(S, B, NULL, X));
282     if (!mumps->id.ICNTL(9)) { /* transpose solve */
283       PetscCall(MatProductSetType(X, MATPRODUCT_AtB));
284     } else {
285       PetscCall(MatProductSetType(X, MATPRODUCT_AB));
286     }
287     PetscCall(MatProductSetFromOptions(X));
288     PetscCall(MatProductSymbolic(X));
289     PetscCall(MatProductNumeric(X));
290 
291     PetscCall(MatCopy(X, B, SAME_NONZERO_PATTERN));
292     break;
293   default:
294     SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unhandled MatFactorSchurStatus %d", F->schur_status);
295   }
296   PetscCall(MatFactorRestoreSchurComplement(F, &S, schurstatus));
297   PetscCall(MatDestroy(&B));
298   PetscCall(MatDestroy(&X));
299   PetscFunctionReturn(PETSC_SUCCESS);
300 }
301 
302 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion)
303 {
304   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
305 
306   PetscFunctionBegin;
307   if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */
308     PetscFunctionReturn(PETSC_SUCCESS);
309   }
310   if (!expansion) { /* prepare for the condensation step */
311     PetscInt sizeredrhs = mumps->id.nrhs * mumps->id.size_schur;
312     /* allocate MUMPS internal array to store reduced right-hand sides */
313     if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) {
314       PetscCall(PetscFree(mumps->id.redrhs));
315       mumps->id.lredrhs = mumps->id.size_schur;
316       PetscCall(PetscMalloc1(mumps->id.nrhs * mumps->id.lredrhs, &mumps->id.redrhs));
317       mumps->sizeredrhs = mumps->id.nrhs * mumps->id.lredrhs;
318     }
319     mumps->id.ICNTL(26) = 1; /* condensation phase */
320   } else {                   /* prepare for the expansion step */
321     /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */
322     PetscCall(MatMumpsSolveSchur_Private(F));
323     mumps->id.ICNTL(26) = 2; /* expansion phase */
324     PetscMUMPS_c(mumps);
325     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1));
326     /* restore defaults */
327     mumps->id.ICNTL(26) = -1;
328     /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */
329     if (mumps->id.nrhs > 1) {
330       PetscCall(PetscFree(mumps->id.redrhs));
331       mumps->id.lredrhs = 0;
332       mumps->sizeredrhs = 0;
333     }
334   }
335   PetscFunctionReturn(PETSC_SUCCESS);
336 }
337 
338 /*
339   MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz]
340 
341   input:
342     A       - matrix in aij,baij or sbaij format
343     shift   - 0: C style output triple; 1: Fortran style output triple.
344     reuse   - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple
345               MAT_REUSE_MATRIX:   only the values in v array are updated
346   output:
347     nnz     - dim of r, c, and v (number of local nonzero entries of A)
348     r, c, v - row and col index, matrix values (matrix triples)
349 
350   The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is
351   freed with PetscFree(mumps->irn);  This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means
352   that the PetscMalloc() cannot easily be replaced with a PetscMalloc3().
353 
354  */
355 
356 PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
357 {
358   const PetscScalar *av;
359   const PetscInt    *ai, *aj, *ajj, M = A->rmap->n;
360   PetscInt64         nz, rnz, i, j, k;
361   PetscMUMPSInt     *row, *col;
362   Mat_SeqAIJ        *aa = (Mat_SeqAIJ *)A->data;
363 
364   PetscFunctionBegin;
365   PetscCall(MatSeqAIJGetArrayRead(A, &av));
366   mumps->val = (PetscScalar *)av;
367   if (reuse == MAT_INITIAL_MATRIX) {
368     nz = aa->nz;
369     ai = aa->i;
370     aj = aa->j;
371     PetscCall(PetscMalloc2(nz, &row, nz, &col));
372     for (i = k = 0; i < M; i++) {
373       rnz = ai[i + 1] - ai[i];
374       ajj = aj + ai[i];
375       for (j = 0; j < rnz; j++) {
376         PetscCall(PetscMUMPSIntCast(i + shift, &row[k]));
377         PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[k]));
378         k++;
379       }
380     }
381     mumps->irn = row;
382     mumps->jcn = col;
383     mumps->nnz = nz;
384   }
385   PetscCall(MatSeqAIJRestoreArrayRead(A, &av));
386   PetscFunctionReturn(PETSC_SUCCESS);
387 }
388 
389 PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
390 {
391   PetscInt64     nz, i, j, k, r;
392   Mat_SeqSELL   *a = (Mat_SeqSELL *)A->data;
393   PetscMUMPSInt *row, *col;
394 
395   PetscFunctionBegin;
396   mumps->val = a->val;
397   if (reuse == MAT_INITIAL_MATRIX) {
398     nz = a->sliidx[a->totalslices];
399     PetscCall(PetscMalloc2(nz, &row, nz, &col));
400     for (i = k = 0; i < a->totalslices; i++) {
401       for (j = a->sliidx[i], r = 0; j < a->sliidx[i + 1]; j++, r = ((r + 1) & 0x07)) PetscCall(PetscMUMPSIntCast(8 * i + r + shift, &row[k++]));
402     }
403     for (i = 0; i < nz; i++) PetscCall(PetscMUMPSIntCast(a->colidx[i] + shift, &col[i]));
404     mumps->irn = row;
405     mumps->jcn = col;
406     mumps->nnz = nz;
407   }
408   PetscFunctionReturn(PETSC_SUCCESS);
409 }
410 
411 PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
412 {
413   Mat_SeqBAIJ    *aa = (Mat_SeqBAIJ *)A->data;
414   const PetscInt *ai, *aj, *ajj, bs2 = aa->bs2;
415   PetscInt64      M, nz, idx = 0, rnz, i, j, k, m;
416   PetscInt        bs;
417   PetscMUMPSInt  *row, *col;
418 
419   PetscFunctionBegin;
420   PetscCall(MatGetBlockSize(A, &bs));
421   M          = A->rmap->N / bs;
422   mumps->val = aa->a;
423   if (reuse == MAT_INITIAL_MATRIX) {
424     ai = aa->i;
425     aj = aa->j;
426     nz = bs2 * aa->nz;
427     PetscCall(PetscMalloc2(nz, &row, nz, &col));
428     for (i = 0; i < M; i++) {
429       ajj = aj + ai[i];
430       rnz = ai[i + 1] - ai[i];
431       for (k = 0; k < rnz; k++) {
432         for (j = 0; j < bs; j++) {
433           for (m = 0; m < bs; m++) {
434             PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[idx]));
435             PetscCall(PetscMUMPSIntCast(bs * ajj[k] + j + shift, &col[idx]));
436             idx++;
437           }
438         }
439       }
440     }
441     mumps->irn = row;
442     mumps->jcn = col;
443     mumps->nnz = nz;
444   }
445   PetscFunctionReturn(PETSC_SUCCESS);
446 }
447 
448 PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
449 {
450   const PetscInt *ai, *aj, *ajj;
451   PetscInt        bs;
452   PetscInt64      nz, rnz, i, j, k, m;
453   PetscMUMPSInt  *row, *col;
454   PetscScalar    *val;
455   Mat_SeqSBAIJ   *aa  = (Mat_SeqSBAIJ *)A->data;
456   const PetscInt  bs2 = aa->bs2, mbs = aa->mbs;
457 #if defined(PETSC_USE_COMPLEX)
458   PetscBool isset, hermitian;
459 #endif
460 
461   PetscFunctionBegin;
462 #if defined(PETSC_USE_COMPLEX)
463   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
464   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
465 #endif
466   ai = aa->i;
467   aj = aa->j;
468   PetscCall(MatGetBlockSize(A, &bs));
469   if (reuse == MAT_INITIAL_MATRIX) {
470     const PetscInt64 alloc_size = aa->nz * bs2;
471 
472     PetscCall(PetscMalloc2(alloc_size, &row, alloc_size, &col));
473     if (bs > 1) {
474       PetscCall(PetscMalloc1(alloc_size, &mumps->val_alloc));
475       mumps->val = mumps->val_alloc;
476     } else {
477       mumps->val = aa->a;
478     }
479     mumps->irn = row;
480     mumps->jcn = col;
481   } else {
482     if (bs == 1) mumps->val = aa->a;
483     row = mumps->irn;
484     col = mumps->jcn;
485   }
486   val = mumps->val;
487 
488   nz = 0;
489   if (bs > 1) {
490     for (i = 0; i < mbs; i++) {
491       rnz = ai[i + 1] - ai[i];
492       ajj = aj + ai[i];
493       for (j = 0; j < rnz; j++) {
494         for (k = 0; k < bs; k++) {
495           for (m = 0; m < bs; m++) {
496             if (ajj[j] > i || k >= m) {
497               if (reuse == MAT_INITIAL_MATRIX) {
498                 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[nz]));
499                 PetscCall(PetscMUMPSIntCast(ajj[j] * bs + k + shift, &col[nz]));
500               }
501               val[nz++] = aa->a[(ai[i] + j) * bs2 + m + k * bs];
502             }
503           }
504         }
505       }
506     }
507   } else if (reuse == MAT_INITIAL_MATRIX) {
508     for (i = 0; i < mbs; i++) {
509       rnz = ai[i + 1] - ai[i];
510       ajj = aj + ai[i];
511       for (j = 0; j < rnz; j++) {
512         PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
513         PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz]));
514         nz++;
515       }
516     }
517     PetscCheck(nz == aa->nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different numbers of nonzeros %" PetscInt64_FMT " != %" PetscInt_FMT, nz, aa->nz);
518   }
519   if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz;
520   PetscFunctionReturn(PETSC_SUCCESS);
521 }
522 
523 PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
524 {
525   const PetscInt    *ai, *aj, *ajj, *adiag, M = A->rmap->n;
526   PetscInt64         nz, rnz, i, j;
527   const PetscScalar *av, *v1;
528   PetscScalar       *val;
529   PetscMUMPSInt     *row, *col;
530   Mat_SeqAIJ        *aa = (Mat_SeqAIJ *)A->data;
531   PetscBool          missing;
532 #if defined(PETSC_USE_COMPLEX)
533   PetscBool hermitian, isset;
534 #endif
535 
536   PetscFunctionBegin;
537 #if defined(PETSC_USE_COMPLEX)
538   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
539   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
540 #endif
541   PetscCall(MatSeqAIJGetArrayRead(A, &av));
542   ai    = aa->i;
543   aj    = aa->j;
544   adiag = aa->diag;
545   PetscCall(MatMissingDiagonal_SeqAIJ(A, &missing, NULL));
546   if (reuse == MAT_INITIAL_MATRIX) {
547     /* count nz in the upper triangular part of A */
548     nz = 0;
549     if (missing) {
550       for (i = 0; i < M; i++) {
551         if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
552           for (j = ai[i]; j < ai[i + 1]; j++) {
553             if (aj[j] < i) continue;
554             nz++;
555           }
556         } else {
557           nz += ai[i + 1] - adiag[i];
558         }
559       }
560     } else {
561       for (i = 0; i < M; i++) nz += ai[i + 1] - adiag[i];
562     }
563     PetscCall(PetscMalloc2(nz, &row, nz, &col));
564     PetscCall(PetscMalloc1(nz, &val));
565     mumps->nnz = nz;
566     mumps->irn = row;
567     mumps->jcn = col;
568     mumps->val = mumps->val_alloc = val;
569 
570     nz = 0;
571     if (missing) {
572       for (i = 0; i < M; i++) {
573         if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
574           for (j = ai[i]; j < ai[i + 1]; j++) {
575             if (aj[j] < i) continue;
576             PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
577             PetscCall(PetscMUMPSIntCast(aj[j] + shift, &col[nz]));
578             val[nz] = av[j];
579             nz++;
580           }
581         } else {
582           rnz = ai[i + 1] - adiag[i];
583           ajj = aj + adiag[i];
584           v1  = av + adiag[i];
585           for (j = 0; j < rnz; j++) {
586             PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
587             PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz]));
588             val[nz++] = v1[j];
589           }
590         }
591       }
592     } else {
593       for (i = 0; i < M; i++) {
594         rnz = ai[i + 1] - adiag[i];
595         ajj = aj + adiag[i];
596         v1  = av + adiag[i];
597         for (j = 0; j < rnz; j++) {
598           PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
599           PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz]));
600           val[nz++] = v1[j];
601         }
602       }
603     }
604   } else {
605     nz  = 0;
606     val = mumps->val;
607     if (missing) {
608       for (i = 0; i < M; i++) {
609         if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
610           for (j = ai[i]; j < ai[i + 1]; j++) {
611             if (aj[j] < i) continue;
612             val[nz++] = av[j];
613           }
614         } else {
615           rnz = ai[i + 1] - adiag[i];
616           v1  = av + adiag[i];
617           for (j = 0; j < rnz; j++) val[nz++] = v1[j];
618         }
619       }
620     } else {
621       for (i = 0; i < M; i++) {
622         rnz = ai[i + 1] - adiag[i];
623         v1  = av + adiag[i];
624         for (j = 0; j < rnz; j++) val[nz++] = v1[j];
625       }
626     }
627   }
628   PetscCall(MatSeqAIJRestoreArrayRead(A, &av));
629   PetscFunctionReturn(PETSC_SUCCESS);
630 }
631 
632 PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
633 {
634   const PetscInt    *ai, *aj, *bi, *bj, *garray, *ajj, *bjj;
635   PetscInt           bs;
636   PetscInt64         rstart, nz, i, j, k, m, jj, irow, countA, countB;
637   PetscMUMPSInt     *row, *col;
638   const PetscScalar *av, *bv, *v1, *v2;
639   PetscScalar       *val;
640   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ *)A->data;
641   Mat_SeqSBAIJ      *aa  = (Mat_SeqSBAIJ *)(mat->A)->data;
642   Mat_SeqBAIJ       *bb  = (Mat_SeqBAIJ *)(mat->B)->data;
643   const PetscInt     bs2 = aa->bs2, mbs = aa->mbs;
644 #if defined(PETSC_USE_COMPLEX)
645   PetscBool hermitian, isset;
646 #endif
647 
648   PetscFunctionBegin;
649 #if defined(PETSC_USE_COMPLEX)
650   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
651   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
652 #endif
653   PetscCall(MatGetBlockSize(A, &bs));
654   rstart = A->rmap->rstart;
655   ai     = aa->i;
656   aj     = aa->j;
657   bi     = bb->i;
658   bj     = bb->j;
659   av     = aa->a;
660   bv     = bb->a;
661 
662   garray = mat->garray;
663 
664   if (reuse == MAT_INITIAL_MATRIX) {
665     nz = (aa->nz + bb->nz) * bs2; /* just a conservative estimate */
666     PetscCall(PetscMalloc2(nz, &row, nz, &col));
667     PetscCall(PetscMalloc1(nz, &val));
668     /* can not decide the exact mumps->nnz now because of the SBAIJ */
669     mumps->irn = row;
670     mumps->jcn = col;
671     mumps->val = mumps->val_alloc = val;
672   } else {
673     val = mumps->val;
674   }
675 
676   jj   = 0;
677   irow = rstart;
678   for (i = 0; i < mbs; i++) {
679     ajj    = aj + ai[i]; /* ptr to the beginning of this row */
680     countA = ai[i + 1] - ai[i];
681     countB = bi[i + 1] - bi[i];
682     bjj    = bj + bi[i];
683     v1     = av + ai[i] * bs2;
684     v2     = bv + bi[i] * bs2;
685 
686     if (bs > 1) {
687       /* A-part */
688       for (j = 0; j < countA; j++) {
689         for (k = 0; k < bs; k++) {
690           for (m = 0; m < bs; m++) {
691             if (rstart + ajj[j] * bs > irow || k >= m) {
692               if (reuse == MAT_INITIAL_MATRIX) {
693                 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj]));
694                 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] * bs + k + shift, &col[jj]));
695               }
696               val[jj++] = v1[j * bs2 + m + k * bs];
697             }
698           }
699         }
700       }
701 
702       /* B-part */
703       for (j = 0; j < countB; j++) {
704         for (k = 0; k < bs; k++) {
705           for (m = 0; m < bs; m++) {
706             if (reuse == MAT_INITIAL_MATRIX) {
707               PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj]));
708               PetscCall(PetscMUMPSIntCast(garray[bjj[j]] * bs + k + shift, &col[jj]));
709             }
710             val[jj++] = v2[j * bs2 + m + k * bs];
711           }
712         }
713       }
714     } else {
715       /* A-part */
716       for (j = 0; j < countA; j++) {
717         if (reuse == MAT_INITIAL_MATRIX) {
718           PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
719           PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj]));
720         }
721         val[jj++] = v1[j];
722       }
723 
724       /* B-part */
725       for (j = 0; j < countB; j++) {
726         if (reuse == MAT_INITIAL_MATRIX) {
727           PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
728           PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]));
729         }
730         val[jj++] = v2[j];
731       }
732     }
733     irow += bs;
734   }
735   mumps->nnz = jj;
736   PetscFunctionReturn(PETSC_SUCCESS);
737 }
738 
739 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
740 {
741   const PetscInt    *ai, *aj, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj;
742   PetscInt64         rstart, nz, i, j, jj, irow, countA, countB;
743   PetscMUMPSInt     *row, *col;
744   const PetscScalar *av, *bv, *v1, *v2;
745   PetscScalar       *val;
746   Mat                Ad, Ao;
747   Mat_SeqAIJ        *aa;
748   Mat_SeqAIJ        *bb;
749 
750   PetscFunctionBegin;
751   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray));
752   PetscCall(MatSeqAIJGetArrayRead(Ad, &av));
753   PetscCall(MatSeqAIJGetArrayRead(Ao, &bv));
754 
755   aa = (Mat_SeqAIJ *)(Ad)->data;
756   bb = (Mat_SeqAIJ *)(Ao)->data;
757   ai = aa->i;
758   aj = aa->j;
759   bi = bb->i;
760   bj = bb->j;
761 
762   rstart = A->rmap->rstart;
763 
764   if (reuse == MAT_INITIAL_MATRIX) {
765     nz = (PetscInt64)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */
766     PetscCall(PetscMalloc2(nz, &row, nz, &col));
767     PetscCall(PetscMalloc1(nz, &val));
768     mumps->nnz = nz;
769     mumps->irn = row;
770     mumps->jcn = col;
771     mumps->val = mumps->val_alloc = val;
772   } else {
773     val = mumps->val;
774   }
775 
776   jj   = 0;
777   irow = rstart;
778   for (i = 0; i < m; i++) {
779     ajj    = aj + ai[i]; /* ptr to the beginning of this row */
780     countA = ai[i + 1] - ai[i];
781     countB = bi[i + 1] - bi[i];
782     bjj    = bj + bi[i];
783     v1     = av + ai[i];
784     v2     = bv + bi[i];
785 
786     /* A-part */
787     for (j = 0; j < countA; j++) {
788       if (reuse == MAT_INITIAL_MATRIX) {
789         PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
790         PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj]));
791       }
792       val[jj++] = v1[j];
793     }
794 
795     /* B-part */
796     for (j = 0; j < countB; j++) {
797       if (reuse == MAT_INITIAL_MATRIX) {
798         PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
799         PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]));
800       }
801       val[jj++] = v2[j];
802     }
803     irow++;
804   }
805   PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av));
806   PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv));
807   PetscFunctionReturn(PETSC_SUCCESS);
808 }
809 
810 PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
811 {
812   Mat_MPIBAIJ       *mat = (Mat_MPIBAIJ *)A->data;
813   Mat_SeqBAIJ       *aa  = (Mat_SeqBAIJ *)(mat->A)->data;
814   Mat_SeqBAIJ       *bb  = (Mat_SeqBAIJ *)(mat->B)->data;
815   const PetscInt    *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j, *ajj, *bjj;
816   const PetscInt    *garray = mat->garray, mbs = mat->mbs, rstart = A->rmap->rstart;
817   const PetscInt     bs2 = mat->bs2;
818   PetscInt           bs;
819   PetscInt64         nz, i, j, k, n, jj, irow, countA, countB, idx;
820   PetscMUMPSInt     *row, *col;
821   const PetscScalar *av = aa->a, *bv = bb->a, *v1, *v2;
822   PetscScalar       *val;
823 
824   PetscFunctionBegin;
825   PetscCall(MatGetBlockSize(A, &bs));
826   if (reuse == MAT_INITIAL_MATRIX) {
827     nz = bs2 * (aa->nz + bb->nz);
828     PetscCall(PetscMalloc2(nz, &row, nz, &col));
829     PetscCall(PetscMalloc1(nz, &val));
830     mumps->nnz = nz;
831     mumps->irn = row;
832     mumps->jcn = col;
833     mumps->val = mumps->val_alloc = val;
834   } else {
835     val = mumps->val;
836   }
837 
838   jj   = 0;
839   irow = rstart;
840   for (i = 0; i < mbs; i++) {
841     countA = ai[i + 1] - ai[i];
842     countB = bi[i + 1] - bi[i];
843     ajj    = aj + ai[i];
844     bjj    = bj + bi[i];
845     v1     = av + bs2 * ai[i];
846     v2     = bv + bs2 * bi[i];
847 
848     idx = 0;
849     /* A-part */
850     for (k = 0; k < countA; k++) {
851       for (j = 0; j < bs; j++) {
852         for (n = 0; n < bs; n++) {
853           if (reuse == MAT_INITIAL_MATRIX) {
854             PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj]));
855             PetscCall(PetscMUMPSIntCast(rstart + bs * ajj[k] + j + shift, &col[jj]));
856           }
857           val[jj++] = v1[idx++];
858         }
859       }
860     }
861 
862     idx = 0;
863     /* B-part */
864     for (k = 0; k < countB; k++) {
865       for (j = 0; j < bs; j++) {
866         for (n = 0; n < bs; n++) {
867           if (reuse == MAT_INITIAL_MATRIX) {
868             PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj]));
869             PetscCall(PetscMUMPSIntCast(bs * garray[bjj[k]] + j + shift, &col[jj]));
870           }
871           val[jj++] = v2[idx++];
872         }
873       }
874     }
875     irow += bs;
876   }
877   PetscFunctionReturn(PETSC_SUCCESS);
878 }
879 
880 PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
881 {
882   const PetscInt    *ai, *aj, *adiag, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj;
883   PetscInt64         rstart, nz, nza, nzb, i, j, jj, irow, countA, countB;
884   PetscMUMPSInt     *row, *col;
885   const PetscScalar *av, *bv, *v1, *v2;
886   PetscScalar       *val;
887   Mat                Ad, Ao;
888   Mat_SeqAIJ        *aa;
889   Mat_SeqAIJ        *bb;
890 #if defined(PETSC_USE_COMPLEX)
891   PetscBool hermitian, isset;
892 #endif
893 
894   PetscFunctionBegin;
895 #if defined(PETSC_USE_COMPLEX)
896   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
897   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
898 #endif
899   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray));
900   PetscCall(MatSeqAIJGetArrayRead(Ad, &av));
901   PetscCall(MatSeqAIJGetArrayRead(Ao, &bv));
902 
903   aa    = (Mat_SeqAIJ *)(Ad)->data;
904   bb    = (Mat_SeqAIJ *)(Ao)->data;
905   ai    = aa->i;
906   aj    = aa->j;
907   adiag = aa->diag;
908   bi    = bb->i;
909   bj    = bb->j;
910 
911   rstart = A->rmap->rstart;
912 
913   if (reuse == MAT_INITIAL_MATRIX) {
914     nza = 0; /* num of upper triangular entries in mat->A, including diagonals */
915     nzb = 0; /* num of upper triangular entries in mat->B */
916     for (i = 0; i < m; i++) {
917       nza += (ai[i + 1] - adiag[i]);
918       countB = bi[i + 1] - bi[i];
919       bjj    = bj + bi[i];
920       for (j = 0; j < countB; j++) {
921         if (garray[bjj[j]] > rstart) nzb++;
922       }
923     }
924 
925     nz = nza + nzb; /* total nz of upper triangular part of mat */
926     PetscCall(PetscMalloc2(nz, &row, nz, &col));
927     PetscCall(PetscMalloc1(nz, &val));
928     mumps->nnz = nz;
929     mumps->irn = row;
930     mumps->jcn = col;
931     mumps->val = mumps->val_alloc = val;
932   } else {
933     val = mumps->val;
934   }
935 
936   jj   = 0;
937   irow = rstart;
938   for (i = 0; i < m; i++) {
939     ajj    = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */
940     v1     = av + adiag[i];
941     countA = ai[i + 1] - adiag[i];
942     countB = bi[i + 1] - bi[i];
943     bjj    = bj + bi[i];
944     v2     = bv + bi[i];
945 
946     /* A-part */
947     for (j = 0; j < countA; j++) {
948       if (reuse == MAT_INITIAL_MATRIX) {
949         PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
950         PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj]));
951       }
952       val[jj++] = v1[j];
953     }
954 
955     /* B-part */
956     for (j = 0; j < countB; j++) {
957       if (garray[bjj[j]] > rstart) {
958         if (reuse == MAT_INITIAL_MATRIX) {
959           PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
960           PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]));
961         }
962         val[jj++] = v2[j];
963       }
964     }
965     irow++;
966   }
967   PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av));
968   PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv));
969   PetscFunctionReturn(PETSC_SUCCESS);
970 }
971 
972 PetscErrorCode MatDestroy_MUMPS(Mat A)
973 {
974   Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;
975 
976   PetscFunctionBegin;
977   PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc));
978   PetscCall(VecScatterDestroy(&mumps->scat_rhs));
979   PetscCall(VecScatterDestroy(&mumps->scat_sol));
980   PetscCall(VecDestroy(&mumps->b_seq));
981   PetscCall(VecDestroy(&mumps->x_seq));
982   PetscCall(PetscFree(mumps->id.perm_in));
983   PetscCall(PetscFree2(mumps->irn, mumps->jcn));
984   PetscCall(PetscFree(mumps->val_alloc));
985   PetscCall(PetscFree(mumps->info));
986   PetscCall(PetscFree(mumps->ICNTL_pre));
987   PetscCall(PetscFree(mumps->CNTL_pre));
988   PetscCall(MatMumpsResetSchur_Private(mumps));
989   if (mumps->id.job != JOB_NULL) { /* cannot call PetscMUMPS_c() if JOB_INIT has never been called for this instance */
990     mumps->id.job = JOB_END;
991     PetscMUMPS_c(mumps);
992     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in MatDestroy_MUMPS: INFOG(1)=%d", mumps->id.INFOG(1));
993     if (mumps->mumps_comm != MPI_COMM_NULL) {
994       if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) PetscCallMPI(MPI_Comm_free(&mumps->mumps_comm));
995       else PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A), &mumps->mumps_comm));
996     }
997   }
998 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
999   if (mumps->use_petsc_omp_support) {
1000     PetscCall(PetscOmpCtrlDestroy(&mumps->omp_ctrl));
1001     PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf));
1002     PetscCall(PetscFree3(mumps->rhs_nrow, mumps->rhs_recvcounts, mumps->rhs_disps));
1003   }
1004 #endif
1005   PetscCall(PetscFree(mumps->ia_alloc));
1006   PetscCall(PetscFree(mumps->ja_alloc));
1007   PetscCall(PetscFree(mumps->recvcount));
1008   PetscCall(PetscFree(mumps->reqs));
1009   PetscCall(PetscFree(mumps->irhs_loc));
1010   PetscCall(PetscFree(A->data));
1011 
1012   /* clear composed functions */
1013   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
1014   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorSetSchurIS_C", NULL));
1015   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorCreateSchurComplement_C", NULL));
1016   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetIcntl_C", NULL));
1017   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetIcntl_C", NULL));
1018   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetCntl_C", NULL));
1019   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetCntl_C", NULL));
1020   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfo_C", NULL));
1021   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfog_C", NULL));
1022   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfo_C", NULL));
1023   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfog_C", NULL));
1024   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverse_C", NULL));
1025   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverseTranspose_C", NULL));
1026   PetscFunctionReturn(PETSC_SUCCESS);
1027 }
1028 
1029 /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */
1030 static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A, PetscInt nrhs, const PetscScalar *array)
1031 {
1032   Mat_MUMPS        *mumps   = (Mat_MUMPS *)A->data;
1033   const PetscMPIInt ompsize = mumps->omp_comm_size;
1034   PetscInt          i, m, M, rstart;
1035 
1036   PetscFunctionBegin;
1037   PetscCall(MatGetSize(A, &M, NULL));
1038   PetscCall(MatGetLocalSize(A, &m, NULL));
1039   PetscCheck(M <= PETSC_MUMPS_INT_MAX, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt");
1040   if (ompsize == 1) {
1041     if (!mumps->irhs_loc) {
1042       mumps->nloc_rhs = m;
1043       PetscCall(PetscMalloc1(m, &mumps->irhs_loc));
1044       PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
1045       for (i = 0; i < m; i++) mumps->irhs_loc[i] = rstart + i + 1; /* use 1-based indices */
1046     }
1047     mumps->id.rhs_loc = (MumpsScalar *)array;
1048   } else {
1049 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1050     const PetscInt *ranges;
1051     PetscMPIInt     j, k, sendcount, *petsc_ranks, *omp_ranks;
1052     MPI_Group       petsc_group, omp_group;
1053     PetscScalar    *recvbuf = NULL;
1054 
1055     if (mumps->is_omp_master) {
1056       /* Lazily initialize the omp stuff for distributed rhs */
1057       if (!mumps->irhs_loc) {
1058         PetscCall(PetscMalloc2(ompsize, &omp_ranks, ompsize, &petsc_ranks));
1059         PetscCall(PetscMalloc3(ompsize, &mumps->rhs_nrow, ompsize, &mumps->rhs_recvcounts, ompsize, &mumps->rhs_disps));
1060         PetscCallMPI(MPI_Comm_group(mumps->petsc_comm, &petsc_group));
1061         PetscCallMPI(MPI_Comm_group(mumps->omp_comm, &omp_group));
1062         for (j = 0; j < ompsize; j++) omp_ranks[j] = j;
1063         PetscCallMPI(MPI_Group_translate_ranks(omp_group, ompsize, omp_ranks, petsc_group, petsc_ranks));
1064 
1065         /* Populate mumps->irhs_loc[], rhs_nrow[] */
1066         mumps->nloc_rhs = 0;
1067         PetscCall(MatGetOwnershipRanges(A, &ranges));
1068         for (j = 0; j < ompsize; j++) {
1069           mumps->rhs_nrow[j] = ranges[petsc_ranks[j] + 1] - ranges[petsc_ranks[j]];
1070           mumps->nloc_rhs += mumps->rhs_nrow[j];
1071         }
1072         PetscCall(PetscMalloc1(mumps->nloc_rhs, &mumps->irhs_loc));
1073         for (j = k = 0; j < ompsize; j++) {
1074           for (i = ranges[petsc_ranks[j]]; i < ranges[petsc_ranks[j] + 1]; i++, k++) mumps->irhs_loc[k] = i + 1; /* uses 1-based indices */
1075         }
1076 
1077         PetscCall(PetscFree2(omp_ranks, petsc_ranks));
1078         PetscCallMPI(MPI_Group_free(&petsc_group));
1079         PetscCallMPI(MPI_Group_free(&omp_group));
1080       }
1081 
1082       /* Realloc buffers when current nrhs is bigger than what we have met */
1083       if (nrhs > mumps->max_nrhs) {
1084         PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf));
1085         PetscCall(PetscMalloc2(mumps->nloc_rhs * nrhs, &mumps->rhs_loc, mumps->nloc_rhs * nrhs, &mumps->rhs_recvbuf));
1086         mumps->max_nrhs = nrhs;
1087       }
1088 
1089       /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */
1090       for (j = 0; j < ompsize; j++) PetscCall(PetscMPIIntCast(mumps->rhs_nrow[j] * nrhs, &mumps->rhs_recvcounts[j]));
1091       mumps->rhs_disps[0] = 0;
1092       for (j = 1; j < ompsize; j++) {
1093         mumps->rhs_disps[j] = mumps->rhs_disps[j - 1] + mumps->rhs_recvcounts[j - 1];
1094         PetscCheck(mumps->rhs_disps[j] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscMPIInt overflow!");
1095       }
1096       recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */
1097     }
1098 
1099     PetscCall(PetscMPIIntCast(m * nrhs, &sendcount));
1100     PetscCallMPI(MPI_Gatherv(array, sendcount, MPIU_SCALAR, recvbuf, mumps->rhs_recvcounts, mumps->rhs_disps, MPIU_SCALAR, 0, mumps->omp_comm));
1101 
1102     if (mumps->is_omp_master) {
1103       if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */
1104         PetscScalar *dst, *dstbase = mumps->rhs_loc;
1105         for (j = 0; j < ompsize; j++) {
1106           const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j];
1107           dst                    = dstbase;
1108           for (i = 0; i < nrhs; i++) {
1109             PetscCall(PetscArraycpy(dst, src, mumps->rhs_nrow[j]));
1110             src += mumps->rhs_nrow[j];
1111             dst += mumps->nloc_rhs;
1112           }
1113           dstbase += mumps->rhs_nrow[j];
1114         }
1115       }
1116       mumps->id.rhs_loc = (MumpsScalar *)mumps->rhs_loc;
1117     }
1118 #endif /* PETSC_HAVE_OPENMP_SUPPORT */
1119   }
1120   mumps->id.nrhs     = nrhs;
1121   mumps->id.nloc_rhs = mumps->nloc_rhs;
1122   mumps->id.lrhs_loc = mumps->nloc_rhs;
1123   mumps->id.irhs_loc = mumps->irhs_loc;
1124   PetscFunctionReturn(PETSC_SUCCESS);
1125 }
1126 
1127 PetscErrorCode MatSolve_MUMPS(Mat A, Vec b, Vec x)
1128 {
1129   Mat_MUMPS         *mumps  = (Mat_MUMPS *)A->data;
1130   const PetscScalar *rarray = NULL;
1131   PetscScalar       *array;
1132   IS                 is_iden, is_petsc;
1133   PetscInt           i;
1134   PetscBool          second_solve = PETSC_FALSE;
1135   static PetscBool   cite1 = PETSC_FALSE, cite2 = PETSC_FALSE;
1136 
1137   PetscFunctionBegin;
1138   PetscCall(PetscCitationsRegister("@article{MUMPS01,\n  author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n  title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n  journal = {SIAM "
1139                                    "Journal on Matrix Analysis and Applications},\n  volume = {23},\n  number = {1},\n  pages = {15--41},\n  year = {2001}\n}\n",
1140                                    &cite1));
1141   PetscCall(PetscCitationsRegister("@article{MUMPS02,\n  author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n  title = {Hybrid scheduling for the parallel solution of linear systems},\n  journal = {Parallel "
1142                                    "Computing},\n  volume = {32},\n  number = {2},\n  pages = {136--156},\n  year = {2006}\n}\n",
1143                                    &cite2));
1144 
1145   if (A->factorerrortype) {
1146     PetscCall(PetscInfo(A, "MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1147     PetscCall(VecSetInf(x));
1148     PetscFunctionReturn(PETSC_SUCCESS);
1149   }
1150 
1151   mumps->id.nrhs = 1;
1152   if (mumps->petsc_size > 1) {
1153     if (mumps->ICNTL20 == 10) {
1154       mumps->id.ICNTL(20) = 10; /* dense distributed RHS */
1155       PetscCall(VecGetArrayRead(b, &rarray));
1156       PetscCall(MatMumpsSetUpDistRHSInfo(A, 1, rarray));
1157     } else {
1158       mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential rhs vector*/
1159       PetscCall(VecScatterBegin(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD));
1160       PetscCall(VecScatterEnd(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD));
1161       if (!mumps->myid) {
1162         PetscCall(VecGetArray(mumps->b_seq, &array));
1163         mumps->id.rhs = (MumpsScalar *)array;
1164       }
1165     }
1166   } else {                   /* petsc_size == 1 */
1167     mumps->id.ICNTL(20) = 0; /* dense centralized RHS */
1168     PetscCall(VecCopy(b, x));
1169     PetscCall(VecGetArray(x, &array));
1170     mumps->id.rhs = (MumpsScalar *)array;
1171   }
1172 
1173   /*
1174      handle condensation step of Schur complement (if any)
1175      We set by default ICNTL(26) == -1 when Schur indices have been provided by the user.
1176      According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase
1177      Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system.
1178      This requires an extra call to PetscMUMPS_c and the computation of the factors for S
1179   */
1180   if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) {
1181     PetscCheck(mumps->petsc_size <= 1, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc");
1182     second_solve = PETSC_TRUE;
1183     PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE));
1184   }
1185   /* solve phase */
1186   mumps->id.job = JOB_SOLVE;
1187   PetscMUMPS_c(mumps);
1188   PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1));
1189 
1190   /* handle expansion step of Schur complement (if any) */
1191   if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE));
1192 
1193   if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */
1194     if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) {
1195       /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */
1196       PetscCall(VecScatterDestroy(&mumps->scat_sol));
1197     }
1198     if (!mumps->scat_sol) { /* create scatter scat_sol */
1199       PetscInt *isol2_loc = NULL;
1200       PetscCall(ISCreateStride(PETSC_COMM_SELF, mumps->id.lsol_loc, 0, 1, &is_iden)); /* from */
1201       PetscCall(PetscMalloc1(mumps->id.lsol_loc, &isol2_loc));
1202       for (i = 0; i < mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i] - 1;                        /* change Fortran style to C style */
1203       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, mumps->id.lsol_loc, isol2_loc, PETSC_OWN_POINTER, &is_petsc)); /* to */
1204       PetscCall(VecScatterCreate(mumps->x_seq, is_iden, x, is_petsc, &mumps->scat_sol));
1205       PetscCall(ISDestroy(&is_iden));
1206       PetscCall(ISDestroy(&is_petsc));
1207       mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */
1208     }
1209 
1210     PetscCall(VecScatterBegin(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD));
1211     PetscCall(VecScatterEnd(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD));
1212   }
1213 
1214   if (mumps->petsc_size > 1) {
1215     if (mumps->ICNTL20 == 10) {
1216       PetscCall(VecRestoreArrayRead(b, &rarray));
1217     } else if (!mumps->myid) {
1218       PetscCall(VecRestoreArray(mumps->b_seq, &array));
1219     }
1220   } else PetscCall(VecRestoreArray(x, &array));
1221 
1222   PetscCall(PetscLogFlops(2.0 * mumps->id.RINFO(3)));
1223   PetscFunctionReturn(PETSC_SUCCESS);
1224 }
1225 
1226 PetscErrorCode MatSolveTranspose_MUMPS(Mat A, Vec b, Vec x)
1227 {
1228   Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;
1229 
1230   PetscFunctionBegin;
1231   mumps->id.ICNTL(9) = 0;
1232   PetscCall(MatSolve_MUMPS(A, b, x));
1233   mumps->id.ICNTL(9) = 1;
1234   PetscFunctionReturn(PETSC_SUCCESS);
1235 }
1236 
1237 PetscErrorCode MatMatSolve_MUMPS(Mat A, Mat B, Mat X)
1238 {
1239   Mat                Bt = NULL;
1240   PetscBool          denseX, denseB, flg, flgT;
1241   Mat_MUMPS         *mumps = (Mat_MUMPS *)A->data;
1242   PetscInt           i, nrhs, M;
1243   PetscScalar       *array;
1244   const PetscScalar *rbray;
1245   PetscInt           lsol_loc, nlsol_loc, *idxx, iidx = 0;
1246   PetscMUMPSInt     *isol_loc, *isol_loc_save;
1247   PetscScalar       *bray, *sol_loc, *sol_loc_save;
1248   IS                 is_to, is_from;
1249   PetscInt           k, proc, j, m, myrstart;
1250   const PetscInt    *rstart;
1251   Vec                v_mpi, msol_loc;
1252   VecScatter         scat_sol;
1253   Vec                b_seq;
1254   VecScatter         scat_rhs;
1255   PetscScalar       *aa;
1256   PetscInt           spnr, *ia, *ja;
1257   Mat_MPIAIJ        *b = NULL;
1258 
1259   PetscFunctionBegin;
1260   PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &denseX, MATSEQDENSE, MATMPIDENSE, NULL));
1261   PetscCheck(denseX, PetscObjectComm((PetscObject)X), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix");
1262 
1263   PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &denseB, MATSEQDENSE, MATMPIDENSE, NULL));
1264   if (denseB) {
1265     PetscCheck(B->rmap->n == X->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Matrix B and X must have same row distribution");
1266     mumps->id.ICNTL(20) = 0; /* dense RHS */
1267   } else {                   /* sparse B */
1268     PetscCheck(X != B, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_IDN, "X and B must be different matrices");
1269     PetscCall(PetscObjectTypeCompare((PetscObject)B, MATTRANSPOSEVIRTUAL, &flgT));
1270     if (flgT) { /* input B is transpose of actual RHS matrix,
1271                  because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */
1272       PetscCall(MatTransposeGetMat(B, &Bt));
1273     } else SETERRQ(PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_WRONG, "Matrix B must be MATTRANSPOSEVIRTUAL matrix");
1274     mumps->id.ICNTL(20) = 1; /* sparse RHS */
1275   }
1276 
1277   PetscCall(MatGetSize(B, &M, &nrhs));
1278   mumps->id.nrhs = nrhs;
1279   mumps->id.lrhs = M;
1280   mumps->id.rhs  = NULL;
1281 
1282   if (mumps->petsc_size == 1) {
1283     PetscScalar *aa;
1284     PetscInt     spnr, *ia, *ja;
1285     PetscBool    second_solve = PETSC_FALSE;
1286 
1287     PetscCall(MatDenseGetArray(X, &array));
1288     mumps->id.rhs = (MumpsScalar *)array;
1289 
1290     if (denseB) {
1291       /* copy B to X */
1292       PetscCall(MatDenseGetArrayRead(B, &rbray));
1293       PetscCall(PetscArraycpy(array, rbray, M * nrhs));
1294       PetscCall(MatDenseRestoreArrayRead(B, &rbray));
1295     } else { /* sparse B */
1296       PetscCall(MatSeqAIJGetArray(Bt, &aa));
1297       PetscCall(MatGetRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
1298       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
1299       PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs));
1300       mumps->id.rhs_sparse = (MumpsScalar *)aa;
1301     }
1302     /* handle condensation step of Schur complement (if any) */
1303     if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) {
1304       second_solve = PETSC_TRUE;
1305       PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE));
1306     }
1307     /* solve phase */
1308     mumps->id.job = JOB_SOLVE;
1309     PetscMUMPS_c(mumps);
1310     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1));
1311 
1312     /* handle expansion step of Schur complement (if any) */
1313     if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE));
1314     if (!denseB) { /* sparse B */
1315       PetscCall(MatSeqAIJRestoreArray(Bt, &aa));
1316       PetscCall(MatRestoreRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
1317       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure");
1318     }
1319     PetscCall(MatDenseRestoreArray(X, &array));
1320     PetscFunctionReturn(PETSC_SUCCESS);
1321   }
1322 
1323   /* parallel case: MUMPS requires rhs B to be centralized on the host! */
1324   PetscCheck(mumps->petsc_size <= 1 || !mumps->id.ICNTL(19), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc");
1325 
1326   /* create msol_loc to hold mumps local solution */
1327   isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */
1328   sol_loc_save  = (PetscScalar *)mumps->id.sol_loc;
1329 
1330   lsol_loc  = mumps->id.lsol_loc;
1331   nlsol_loc = nrhs * lsol_loc; /* length of sol_loc */
1332   PetscCall(PetscMalloc2(nlsol_loc, &sol_loc, lsol_loc, &isol_loc));
1333   mumps->id.sol_loc  = (MumpsScalar *)sol_loc;
1334   mumps->id.isol_loc = isol_loc;
1335 
1336   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nlsol_loc, (PetscScalar *)sol_loc, &msol_loc));
1337 
1338   if (denseB) {
1339     if (mumps->ICNTL20 == 10) {
1340       mumps->id.ICNTL(20) = 10; /* dense distributed RHS */
1341       PetscCall(MatDenseGetArrayRead(B, &rbray));
1342       PetscCall(MatMumpsSetUpDistRHSInfo(A, nrhs, rbray));
1343       PetscCall(MatDenseRestoreArrayRead(B, &rbray));
1344       PetscCall(MatGetLocalSize(B, &m, NULL));
1345       PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhs * M, NULL, &v_mpi));
1346     } else {
1347       mumps->id.ICNTL(20) = 0; /* dense centralized RHS */
1348       /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in
1349         very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank
1350         0, re-arrange B into desired order, which is a local operation.
1351       */
1352 
1353       /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */
1354       /* wrap dense rhs matrix B into a vector v_mpi */
1355       PetscCall(MatGetLocalSize(B, &m, NULL));
1356       PetscCall(MatDenseGetArray(B, &bray));
1357       PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhs * M, (const PetscScalar *)bray, &v_mpi));
1358       PetscCall(MatDenseRestoreArray(B, &bray));
1359 
1360       /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */
1361       if (!mumps->myid) {
1362         PetscInt *idx;
1363         /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */
1364         PetscCall(PetscMalloc1(nrhs * M, &idx));
1365         PetscCall(MatGetOwnershipRanges(B, &rstart));
1366         k = 0;
1367         for (proc = 0; proc < mumps->petsc_size; proc++) {
1368           for (j = 0; j < nrhs; j++) {
1369             for (i = rstart[proc]; i < rstart[proc + 1]; i++) idx[k++] = j * M + i;
1370           }
1371         }
1372 
1373         PetscCall(VecCreateSeq(PETSC_COMM_SELF, nrhs * M, &b_seq));
1374         PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nrhs * M, idx, PETSC_OWN_POINTER, &is_to));
1375         PetscCall(ISCreateStride(PETSC_COMM_SELF, nrhs * M, 0, 1, &is_from));
1376       } else {
1377         PetscCall(VecCreateSeq(PETSC_COMM_SELF, 0, &b_seq));
1378         PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_to));
1379         PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_from));
1380       }
1381       PetscCall(VecScatterCreate(v_mpi, is_from, b_seq, is_to, &scat_rhs));
1382       PetscCall(VecScatterBegin(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD));
1383       PetscCall(ISDestroy(&is_to));
1384       PetscCall(ISDestroy(&is_from));
1385       PetscCall(VecScatterEnd(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD));
1386 
1387       if (!mumps->myid) { /* define rhs on the host */
1388         PetscCall(VecGetArray(b_seq, &bray));
1389         mumps->id.rhs = (MumpsScalar *)bray;
1390         PetscCall(VecRestoreArray(b_seq, &bray));
1391       }
1392     }
1393   } else { /* sparse B */
1394     b = (Mat_MPIAIJ *)Bt->data;
1395 
1396     /* wrap dense X into a vector v_mpi */
1397     PetscCall(MatGetLocalSize(X, &m, NULL));
1398     PetscCall(MatDenseGetArray(X, &bray));
1399     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)X), 1, nrhs * m, nrhs * M, (const PetscScalar *)bray, &v_mpi));
1400     PetscCall(MatDenseRestoreArray(X, &bray));
1401 
1402     if (!mumps->myid) {
1403       PetscCall(MatSeqAIJGetArray(b->A, &aa));
1404       PetscCall(MatGetRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
1405       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
1406       PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs));
1407       mumps->id.rhs_sparse = (MumpsScalar *)aa;
1408     } else {
1409       mumps->id.irhs_ptr    = NULL;
1410       mumps->id.irhs_sparse = NULL;
1411       mumps->id.nz_rhs      = 0;
1412       mumps->id.rhs_sparse  = NULL;
1413     }
1414   }
1415 
1416   /* solve phase */
1417   mumps->id.job = JOB_SOLVE;
1418   PetscMUMPS_c(mumps);
1419   PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d", mumps->id.INFOG(1));
1420 
1421   /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */
1422   PetscCall(MatDenseGetArray(X, &array));
1423   PetscCall(VecPlaceArray(v_mpi, array));
1424 
1425   /* create scatter scat_sol */
1426   PetscCall(MatGetOwnershipRanges(X, &rstart));
1427   /* iidx: index for scatter mumps solution to petsc X */
1428 
1429   PetscCall(ISCreateStride(PETSC_COMM_SELF, nlsol_loc, 0, 1, &is_from));
1430   PetscCall(PetscMalloc1(nlsol_loc, &idxx));
1431   for (i = 0; i < lsol_loc; i++) {
1432     isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */
1433 
1434     for (proc = 0; proc < mumps->petsc_size; proc++) {
1435       if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc + 1]) {
1436         myrstart = rstart[proc];
1437         k        = isol_loc[i] - myrstart;          /* local index on 1st column of petsc vector X */
1438         iidx     = k + myrstart * nrhs;             /* maps mumps isol_loc[i] to petsc index in X */
1439         m        = rstart[proc + 1] - rstart[proc]; /* rows of X for this proc */
1440         break;
1441       }
1442     }
1443 
1444     for (j = 0; j < nrhs; j++) idxx[i + j * lsol_loc] = iidx + j * m;
1445   }
1446   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nlsol_loc, idxx, PETSC_COPY_VALUES, &is_to));
1447   PetscCall(VecScatterCreate(msol_loc, is_from, v_mpi, is_to, &scat_sol));
1448   PetscCall(VecScatterBegin(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD));
1449   PetscCall(ISDestroy(&is_from));
1450   PetscCall(ISDestroy(&is_to));
1451   PetscCall(VecScatterEnd(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD));
1452   PetscCall(MatDenseRestoreArray(X, &array));
1453 
1454   /* free spaces */
1455   mumps->id.sol_loc  = (MumpsScalar *)sol_loc_save;
1456   mumps->id.isol_loc = isol_loc_save;
1457 
1458   PetscCall(PetscFree2(sol_loc, isol_loc));
1459   PetscCall(PetscFree(idxx));
1460   PetscCall(VecDestroy(&msol_loc));
1461   PetscCall(VecDestroy(&v_mpi));
1462   if (!denseB) {
1463     if (!mumps->myid) {
1464       b = (Mat_MPIAIJ *)Bt->data;
1465       PetscCall(MatSeqAIJRestoreArray(b->A, &aa));
1466       PetscCall(MatRestoreRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
1467       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure");
1468     }
1469   } else {
1470     if (mumps->ICNTL20 == 0) {
1471       PetscCall(VecDestroy(&b_seq));
1472       PetscCall(VecScatterDestroy(&scat_rhs));
1473     }
1474   }
1475   PetscCall(VecScatterDestroy(&scat_sol));
1476   PetscCall(PetscLogFlops(2.0 * nrhs * mumps->id.RINFO(3)));
1477   PetscFunctionReturn(PETSC_SUCCESS);
1478 }
1479 
1480 PetscErrorCode MatMatSolveTranspose_MUMPS(Mat A, Mat B, Mat X)
1481 {
1482   Mat_MUMPS    *mumps    = (Mat_MUMPS *)A->data;
1483   PetscMUMPSInt oldvalue = mumps->id.ICNTL(9);
1484 
1485   PetscFunctionBegin;
1486   mumps->id.ICNTL(9) = 0;
1487   PetscCall(MatMatSolve_MUMPS(A, B, X));
1488   mumps->id.ICNTL(9) = oldvalue;
1489   PetscFunctionReturn(PETSC_SUCCESS);
1490 }
1491 
1492 PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A, Mat Bt, Mat X)
1493 {
1494   PetscBool flg;
1495   Mat       B;
1496 
1497   PetscFunctionBegin;
1498   PetscCall(PetscObjectTypeCompareAny((PetscObject)Bt, &flg, MATSEQAIJ, MATMPIAIJ, NULL));
1499   PetscCheck(flg, PetscObjectComm((PetscObject)Bt), PETSC_ERR_ARG_WRONG, "Matrix Bt must be MATAIJ matrix");
1500 
1501   /* Create B=Bt^T that uses Bt's data structure */
1502   PetscCall(MatCreateTranspose(Bt, &B));
1503 
1504   PetscCall(MatMatSolve_MUMPS(A, B, X));
1505   PetscCall(MatDestroy(&B));
1506   PetscFunctionReturn(PETSC_SUCCESS);
1507 }
1508 
1509 #if !defined(PETSC_USE_COMPLEX)
1510 /*
1511   input:
1512    F:        numeric factor
1513   output:
1514    nneg:     total number of negative pivots
1515    nzero:    total number of zero pivots
1516    npos:     (global dimension of F) - nneg - nzero
1517 */
1518 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos)
1519 {
1520   Mat_MUMPS  *mumps = (Mat_MUMPS *)F->data;
1521   PetscMPIInt size;
1522 
1523   PetscFunctionBegin;
1524   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)F), &size));
1525   /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */
1526   PetscCheck(size <= 1 || mumps->id.ICNTL(13) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia", mumps->id.INFOG(13));
1527 
1528   if (nneg) *nneg = mumps->id.INFOG(12);
1529   if (nzero || npos) {
1530     PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection");
1531     if (nzero) *nzero = mumps->id.INFOG(28);
1532     if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28));
1533   }
1534   PetscFunctionReturn(PETSC_SUCCESS);
1535 }
1536 #endif
1537 
1538 PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse, Mat_MUMPS *mumps)
1539 {
1540   PetscInt       i, nreqs;
1541   PetscMUMPSInt *irn, *jcn;
1542   PetscMPIInt    count;
1543   PetscInt64     totnnz, remain;
1544   const PetscInt osize = mumps->omp_comm_size;
1545   PetscScalar   *val;
1546 
1547   PetscFunctionBegin;
1548   if (osize > 1) {
1549     if (reuse == MAT_INITIAL_MATRIX) {
1550       /* master first gathers counts of nonzeros to receive */
1551       if (mumps->is_omp_master) PetscCall(PetscMalloc1(osize, &mumps->recvcount));
1552       PetscCallMPI(MPI_Gather(&mumps->nnz, 1, MPIU_INT64, mumps->recvcount, 1, MPIU_INT64, 0 /*master*/, mumps->omp_comm));
1553 
1554       /* Then each computes number of send/recvs */
1555       if (mumps->is_omp_master) {
1556         /* Start from 1 since self communication is not done in MPI */
1557         nreqs = 0;
1558         for (i = 1; i < osize; i++) nreqs += (mumps->recvcount[i] + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX;
1559       } else {
1560         nreqs = (mumps->nnz + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX;
1561       }
1562       PetscCall(PetscMalloc1(nreqs * 3, &mumps->reqs)); /* Triple the requests since we send irn, jcn and val separately */
1563 
1564       /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others.
1565          MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz
1566          might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size
1567          is very small, the current approach should have no extra overhead compared to MPI_Gatherv.
1568        */
1569       nreqs = 0; /* counter for actual send/recvs */
1570       if (mumps->is_omp_master) {
1571         for (i = 0, totnnz = 0; i < osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */
1572         PetscCall(PetscMalloc2(totnnz, &irn, totnnz, &jcn));
1573         PetscCall(PetscMalloc1(totnnz, &val));
1574 
1575         /* Self communication */
1576         PetscCall(PetscArraycpy(irn, mumps->irn, mumps->nnz));
1577         PetscCall(PetscArraycpy(jcn, mumps->jcn, mumps->nnz));
1578         PetscCall(PetscArraycpy(val, mumps->val, mumps->nnz));
1579 
1580         /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */
1581         PetscCall(PetscFree2(mumps->irn, mumps->jcn));
1582         PetscCall(PetscFree(mumps->val_alloc));
1583         mumps->nnz = totnnz;
1584         mumps->irn = irn;
1585         mumps->jcn = jcn;
1586         mumps->val = mumps->val_alloc = val;
1587 
1588         irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */
1589         jcn += mumps->recvcount[0];
1590         val += mumps->recvcount[0];
1591 
1592         /* Remote communication */
1593         for (i = 1; i < osize; i++) {
1594           count  = PetscMin(mumps->recvcount[i], PETSC_MPI_INT_MAX);
1595           remain = mumps->recvcount[i] - count;
1596           while (count > 0) {
1597             PetscCallMPI(MPI_Irecv(irn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1598             PetscCallMPI(MPI_Irecv(jcn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1599             PetscCallMPI(MPI_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1600             irn += count;
1601             jcn += count;
1602             val += count;
1603             count = PetscMin(remain, PETSC_MPI_INT_MAX);
1604             remain -= count;
1605           }
1606         }
1607       } else {
1608         irn    = mumps->irn;
1609         jcn    = mumps->jcn;
1610         val    = mumps->val;
1611         count  = PetscMin(mumps->nnz, PETSC_MPI_INT_MAX);
1612         remain = mumps->nnz - count;
1613         while (count > 0) {
1614           PetscCallMPI(MPI_Isend(irn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1615           PetscCallMPI(MPI_Isend(jcn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1616           PetscCallMPI(MPI_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1617           irn += count;
1618           jcn += count;
1619           val += count;
1620           count = PetscMin(remain, PETSC_MPI_INT_MAX);
1621           remain -= count;
1622         }
1623       }
1624     } else {
1625       nreqs = 0;
1626       if (mumps->is_omp_master) {
1627         val = mumps->val + mumps->recvcount[0];
1628         for (i = 1; i < osize; i++) { /* Remote communication only since self data is already in place */
1629           count  = PetscMin(mumps->recvcount[i], PETSC_MPI_INT_MAX);
1630           remain = mumps->recvcount[i] - count;
1631           while (count > 0) {
1632             PetscCallMPI(MPI_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1633             val += count;
1634             count = PetscMin(remain, PETSC_MPI_INT_MAX);
1635             remain -= count;
1636           }
1637         }
1638       } else {
1639         val    = mumps->val;
1640         count  = PetscMin(mumps->nnz, PETSC_MPI_INT_MAX);
1641         remain = mumps->nnz - count;
1642         while (count > 0) {
1643           PetscCallMPI(MPI_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1644           val += count;
1645           count = PetscMin(remain, PETSC_MPI_INT_MAX);
1646           remain -= count;
1647         }
1648       }
1649     }
1650     PetscCallMPI(MPI_Waitall(nreqs, mumps->reqs, MPI_STATUSES_IGNORE));
1651     mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */
1652   }
1653   PetscFunctionReturn(PETSC_SUCCESS);
1654 }
1655 
1656 PetscErrorCode MatFactorNumeric_MUMPS(Mat F, Mat A, const MatFactorInfo *info)
1657 {
1658   Mat_MUMPS *mumps = (Mat_MUMPS *)(F)->data;
1659   PetscBool  isMPIAIJ;
1660 
1661   PetscFunctionBegin;
1662   if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) {
1663     if (mumps->id.INFOG(1) == -6) PetscCall(PetscInfo(A, "MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1664     PetscCall(PetscInfo(A, "MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1665     PetscFunctionReturn(PETSC_SUCCESS);
1666   }
1667 
1668   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps));
1669   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX, mumps));
1670 
1671   /* numerical factorization phase */
1672   mumps->id.job = JOB_FACTNUMERIC;
1673   if (!mumps->id.ICNTL(18)) { /* A is centralized */
1674     if (!mumps->myid) mumps->id.a = (MumpsScalar *)mumps->val;
1675   } else {
1676     mumps->id.a_loc = (MumpsScalar *)mumps->val;
1677   }
1678   PetscMUMPS_c(mumps);
1679   if (mumps->id.INFOG(1) < 0) {
1680     PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d", mumps->id.INFOG(1), mumps->id.INFO(2));
1681     if (mumps->id.INFOG(1) == -10) { /* numerically singular matrix */
1682       PetscCall(PetscInfo(F, "matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1683       F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
1684     } else if (mumps->id.INFOG(1) == -13) {
1685       PetscCall(PetscInfo(F, "MUMPS in numerical factorization phase: INFOG(1)=%d, cannot allocate required memory %d megabytes\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1686       F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1687     } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) {
1688       PetscCall(PetscInfo(F, "MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d, problem with workarray\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1689       F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1690     } else {
1691       PetscCall(PetscInfo(F, "MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1692       F->factorerrortype = MAT_FACTOR_OTHER;
1693     }
1694   }
1695   PetscCheck(mumps->myid || mumps->id.ICNTL(16) <= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "  mumps->id.ICNTL(16):=%d", mumps->id.INFOG(16));
1696 
1697   F->assembled = PETSC_TRUE;
1698 
1699   if (F->schur) { /* reset Schur status to unfactored */
1700 #if defined(PETSC_HAVE_CUDA)
1701     F->schur->offloadmask = PETSC_OFFLOAD_CPU;
1702 #endif
1703     if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
1704       mumps->id.ICNTL(19) = 2;
1705       PetscCall(MatTranspose(F->schur, MAT_INPLACE_MATRIX, &F->schur));
1706     }
1707     PetscCall(MatFactorRestoreSchurComplement(F, NULL, MAT_FACTOR_SCHUR_UNFACTORED));
1708   }
1709 
1710   /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */
1711   if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3;
1712 
1713   if (!mumps->is_omp_master) mumps->id.INFO(23) = 0;
1714   if (mumps->petsc_size > 1) {
1715     PetscInt     lsol_loc;
1716     PetscScalar *sol_loc;
1717 
1718     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &isMPIAIJ));
1719 
1720     /* distributed solution; Create x_seq=sol_loc for repeated use */
1721     if (mumps->x_seq) {
1722       PetscCall(VecScatterDestroy(&mumps->scat_sol));
1723       PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc));
1724       PetscCall(VecDestroy(&mumps->x_seq));
1725     }
1726     lsol_loc = mumps->id.INFO(23); /* length of sol_loc */
1727     PetscCall(PetscMalloc2(lsol_loc, &sol_loc, lsol_loc, &mumps->id.isol_loc));
1728     mumps->id.lsol_loc = lsol_loc;
1729     mumps->id.sol_loc  = (MumpsScalar *)sol_loc;
1730     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, lsol_loc, sol_loc, &mumps->x_seq));
1731   }
1732   PetscCall(PetscLogFlops(mumps->id.RINFO(2)));
1733   PetscFunctionReturn(PETSC_SUCCESS);
1734 }
1735 
1736 /* Sets MUMPS options from the options database */
1737 PetscErrorCode MatSetFromOptions_MUMPS(Mat F, Mat A)
1738 {
1739   Mat_MUMPS    *mumps = (Mat_MUMPS *)F->data;
1740   PetscMUMPSInt icntl = 0, size, *listvar_schur;
1741   PetscInt      info[80], i, ninfo = 80, rbs, cbs;
1742   PetscBool     flg = PETSC_FALSE, schur = (PetscBool)(mumps->id.ICNTL(26) == -1);
1743   MumpsScalar  *arr;
1744 
1745   PetscFunctionBegin;
1746   PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "MUMPS Options", "Mat");
1747   if (mumps->id.job == JOB_NULL) { /* MatSetFromOptions_MUMPS() has never been called before */
1748     PetscInt nthreads   = 0;
1749     PetscInt nCNTL_pre  = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0;
1750     PetscInt nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0;
1751 
1752     mumps->petsc_comm = PetscObjectComm((PetscObject)A);
1753     PetscCallMPI(MPI_Comm_size(mumps->petsc_comm, &mumps->petsc_size));
1754     PetscCallMPI(MPI_Comm_rank(mumps->petsc_comm, &mumps->myid)); /* "if (!myid)" still works even if mumps_comm is different */
1755 
1756     PetscCall(PetscOptionsName("-mat_mumps_use_omp_threads", "Convert MPI processes into OpenMP threads", "None", &mumps->use_petsc_omp_support));
1757     if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */
1758     /* do not use PetscOptionsInt() so that the option -mat_mumps_use_omp_threads is not displayed twice in the help */
1759     PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)F)->prefix, "-mat_mumps_use_omp_threads", &nthreads, NULL));
1760     if (mumps->use_petsc_omp_support) {
1761       PetscCheck(PetscDefined(HAVE_OPENMP_SUPPORT), PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "The system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual",
1762                  ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : "");
1763       PetscCheck(!schur, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use -%smat_mumps_use_omp_threads with the Schur complement feature", ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : "");
1764 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1765       PetscCall(PetscOmpCtrlCreate(mumps->petsc_comm, nthreads, &mumps->omp_ctrl));
1766       PetscCall(PetscOmpCtrlGetOmpComms(mumps->omp_ctrl, &mumps->omp_comm, &mumps->mumps_comm, &mumps->is_omp_master));
1767 #endif
1768     } else {
1769       mumps->omp_comm      = PETSC_COMM_SELF;
1770       mumps->mumps_comm    = mumps->petsc_comm;
1771       mumps->is_omp_master = PETSC_TRUE;
1772     }
1773     PetscCallMPI(MPI_Comm_size(mumps->omp_comm, &mumps->omp_comm_size));
1774     mumps->reqs = NULL;
1775     mumps->tag  = 0;
1776 
1777     if (mumps->mumps_comm != MPI_COMM_NULL) {
1778       if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) {
1779         /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */
1780         MPI_Comm comm;
1781         PetscCallMPI(MPI_Comm_dup(mumps->mumps_comm, &comm));
1782         mumps->mumps_comm = comm;
1783       } else PetscCall(PetscCommGetComm(mumps->petsc_comm, &mumps->mumps_comm));
1784     }
1785 
1786     mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm);
1787     mumps->id.job          = JOB_INIT;
1788     mumps->id.par          = 1; /* host participates factorizaton and solve */
1789     mumps->id.sym          = mumps->sym;
1790 
1791     size          = mumps->id.size_schur;
1792     arr           = mumps->id.schur;
1793     listvar_schur = mumps->id.listvar_schur;
1794     PetscMUMPS_c(mumps);
1795     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS: INFOG(1)=%d", mumps->id.INFOG(1));
1796     /* restore cached ICNTL and CNTL values */
1797     for (icntl = 0; icntl < nICNTL_pre; ++icntl) mumps->id.ICNTL(mumps->ICNTL_pre[1 + 2 * icntl]) = mumps->ICNTL_pre[2 + 2 * icntl];
1798     for (icntl = 0; icntl < nCNTL_pre; ++icntl) mumps->id.CNTL((PetscInt)mumps->CNTL_pre[1 + 2 * icntl]) = mumps->CNTL_pre[2 + 2 * icntl];
1799     PetscCall(PetscFree(mumps->ICNTL_pre));
1800     PetscCall(PetscFree(mumps->CNTL_pre));
1801 
1802     if (schur) {
1803       mumps->id.size_schur    = size;
1804       mumps->id.schur_lld     = size;
1805       mumps->id.schur         = arr;
1806       mumps->id.listvar_schur = listvar_schur;
1807       if (mumps->petsc_size > 1) {
1808         PetscBool gs; /* gs is false if any rank other than root has non-empty IS */
1809 
1810         mumps->id.ICNTL(19) = 1;                                                                            /* MUMPS returns Schur centralized on the host */
1811         gs                  = mumps->myid ? (mumps->id.size_schur ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */
1812         PetscCallMPI(MPI_Allreduce(MPI_IN_PLACE, &gs, 1, MPIU_BOOL, MPI_LAND, mumps->petsc_comm));
1813         PetscCheck(gs, PETSC_COMM_SELF, PETSC_ERR_SUP, "MUMPS distributed parallel Schur complements not yet supported from PETSc");
1814       } else {
1815         if (F->factortype == MAT_FACTOR_LU) {
1816           mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */
1817         } else {
1818           mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */
1819         }
1820       }
1821       mumps->id.ICNTL(26) = -1;
1822     }
1823 
1824     /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code.
1825        For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS.
1826      */
1827     PetscCallMPI(MPI_Bcast(mumps->id.icntl, 40, MPI_INT, 0, mumps->omp_comm));
1828     PetscCallMPI(MPI_Bcast(mumps->id.cntl, 15, MPIU_REAL, 0, mumps->omp_comm));
1829 
1830     mumps->scat_rhs = NULL;
1831     mumps->scat_sol = NULL;
1832 
1833     /* set PETSc-MUMPS default options - override MUMPS default */
1834     mumps->id.ICNTL(3) = 0;
1835     mumps->id.ICNTL(4) = 0;
1836     if (mumps->petsc_size == 1) {
1837       mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */
1838       mumps->id.ICNTL(7)  = 7; /* automatic choice of ordering done by the package */
1839     } else {
1840       mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */
1841       mumps->id.ICNTL(21) = 1; /* distributed solution */
1842     }
1843   }
1844   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_1", "ICNTL(1): output stream for error messages", "None", mumps->id.ICNTL(1), &icntl, &flg));
1845   if (flg) mumps->id.ICNTL(1) = icntl;
1846   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_2", "ICNTL(2): output stream for diagnostic printing, statistics, and warning", "None", mumps->id.ICNTL(2), &icntl, &flg));
1847   if (flg) mumps->id.ICNTL(2) = icntl;
1848   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_3", "ICNTL(3): output stream for global information, collected on the host", "None", mumps->id.ICNTL(3), &icntl, &flg));
1849   if (flg) mumps->id.ICNTL(3) = icntl;
1850 
1851   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_4", "ICNTL(4): level of printing (0 to 4)", "None", mumps->id.ICNTL(4), &icntl, &flg));
1852   if (flg) mumps->id.ICNTL(4) = icntl;
1853   if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */
1854 
1855   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_6", "ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)", "None", mumps->id.ICNTL(6), &icntl, &flg));
1856   if (flg) mumps->id.ICNTL(6) = icntl;
1857 
1858   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_7", "ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)", "None", mumps->id.ICNTL(7), &icntl, &flg));
1859   if (flg) {
1860     PetscCheck(icntl != 1 && icntl >= 0 && icntl <= 7, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto");
1861     mumps->id.ICNTL(7) = icntl;
1862   }
1863 
1864   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_8", "ICNTL(8): scaling strategy (-2 to 8 or 77)", "None", mumps->id.ICNTL(8), &mumps->id.ICNTL(8), NULL));
1865   /* PetscCall(PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL)); handled by MatSolveTranspose_MUMPS() */
1866   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_10", "ICNTL(10): max num of refinements", "None", mumps->id.ICNTL(10), &mumps->id.ICNTL(10), NULL));
1867   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_11", "ICNTL(11): statistics related to an error analysis (via -ksp_view)", "None", mumps->id.ICNTL(11), &mumps->id.ICNTL(11), NULL));
1868   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_12", "ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)", "None", mumps->id.ICNTL(12), &mumps->id.ICNTL(12), NULL));
1869   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_13", "ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting", "None", mumps->id.ICNTL(13), &mumps->id.ICNTL(13), NULL));
1870   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_14", "ICNTL(14): percentage increase in the estimated working space", "None", mumps->id.ICNTL(14), &mumps->id.ICNTL(14), NULL));
1871   PetscCall(MatGetBlockSizes(A, &rbs, &cbs));
1872   if (rbs == cbs && rbs > 1) mumps->id.ICNTL(15) = -rbs;
1873   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_15", "ICNTL(15): compression of the input matrix resulting from a block format", "None", mumps->id.ICNTL(15), &mumps->id.ICNTL(15), &flg));
1874   if (flg) {
1875     PetscCheck(mumps->id.ICNTL(15) <= 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Positive -mat_mumps_icntl_15 not handled");
1876     PetscCheck((-mumps->id.ICNTL(15) % cbs == 0) && (-mumps->id.ICNTL(15) % rbs == 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "The opposite of -mat_mumps_icntl_15 must be a multiple of the column and row blocksizes");
1877   }
1878   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_19", "ICNTL(19): computes the Schur complement", "None", mumps->id.ICNTL(19), &mumps->id.ICNTL(19), NULL));
1879   if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */
1880     PetscCall(MatDestroy(&F->schur));
1881     PetscCall(MatMumpsResetSchur_Private(mumps));
1882   }
1883 
1884   /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps",
1885      and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran
1886      and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...'
1887      This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug,
1888      see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590.
1889      In short, we could not use distributed RHS with MPICH until v4.0b1.
1890    */
1891 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) || (defined(PETSC_HAVE_MPICH_NUMVERSION) && (PETSC_HAVE_MPICH_NUMVERSION < 40000101))
1892   mumps->ICNTL20 = 0; /* Centralized dense RHS*/
1893 #else
1894   mumps->ICNTL20     = 10; /* Distributed dense RHS*/
1895 #endif
1896   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_20", "ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides", "None", mumps->ICNTL20, &mumps->ICNTL20, &flg));
1897   PetscCheck(!flg || mumps->ICNTL20 == 10 || mumps->ICNTL20 == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10", (int)mumps->ICNTL20);
1898 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0)
1899   PetscCheck(!flg || mumps->ICNTL20 != 10, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=10 is not supported before MUMPS-5.3.0");
1900 #endif
1901   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL)); we only use distributed solution vector */
1902 
1903   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_22", "ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)", "None", mumps->id.ICNTL(22), &mumps->id.ICNTL(22), NULL));
1904   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_23", "ICNTL(23): max size of the working memory (MB) that can allocate per processor", "None", mumps->id.ICNTL(23), &mumps->id.ICNTL(23), NULL));
1905   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_24", "ICNTL(24): detection of null pivot rows (0 or 1)", "None", mumps->id.ICNTL(24), &mumps->id.ICNTL(24), NULL));
1906   if (mumps->id.ICNTL(24)) { mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ }
1907 
1908   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_25", "ICNTL(25): computes a solution of a deficient matrix and a null space basis", "None", mumps->id.ICNTL(25), &mumps->id.ICNTL(25), NULL));
1909   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_26", "ICNTL(26): drives the solution phase if a Schur complement matrix", "None", mumps->id.ICNTL(26), &mumps->id.ICNTL(26), NULL));
1910   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_27", "ICNTL(27): controls the blocking size for multiple right-hand sides", "None", mumps->id.ICNTL(27), &mumps->id.ICNTL(27), NULL));
1911   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_28", "ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering", "None", mumps->id.ICNTL(28), &mumps->id.ICNTL(28), NULL));
1912   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_29", "ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis", "None", mumps->id.ICNTL(29), &mumps->id.ICNTL(29), NULL));
1913   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL)); */ /* call MatMumpsGetInverse() directly */
1914   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_31", "ICNTL(31): indicates which factors may be discarded during factorization", "None", mumps->id.ICNTL(31), &mumps->id.ICNTL(31), NULL));
1915   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elemination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL));  -- not supported by PETSc API */
1916   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_33", "ICNTL(33): compute determinant", "None", mumps->id.ICNTL(33), &mumps->id.ICNTL(33), NULL));
1917   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_35", "ICNTL(35): activates Block Low Rank (BLR) based factorization", "None", mumps->id.ICNTL(35), &mumps->id.ICNTL(35), NULL));
1918   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_36", "ICNTL(36): choice of BLR factorization variant", "None", mumps->id.ICNTL(36), &mumps->id.ICNTL(36), NULL));
1919   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_38", "ICNTL(38): estimated compression rate of LU factors with BLR", "None", mumps->id.ICNTL(38), &mumps->id.ICNTL(38), NULL));
1920 
1921   PetscCall(PetscOptionsReal("-mat_mumps_cntl_1", "CNTL(1): relative pivoting threshold", "None", mumps->id.CNTL(1), &mumps->id.CNTL(1), NULL));
1922   PetscCall(PetscOptionsReal("-mat_mumps_cntl_2", "CNTL(2): stopping criterion of refinement", "None", mumps->id.CNTL(2), &mumps->id.CNTL(2), NULL));
1923   PetscCall(PetscOptionsReal("-mat_mumps_cntl_3", "CNTL(3): absolute pivoting threshold", "None", mumps->id.CNTL(3), &mumps->id.CNTL(3), NULL));
1924   PetscCall(PetscOptionsReal("-mat_mumps_cntl_4", "CNTL(4): value for static pivoting", "None", mumps->id.CNTL(4), &mumps->id.CNTL(4), NULL));
1925   PetscCall(PetscOptionsReal("-mat_mumps_cntl_5", "CNTL(5): fixation for null pivots", "None", mumps->id.CNTL(5), &mumps->id.CNTL(5), NULL));
1926   PetscCall(PetscOptionsReal("-mat_mumps_cntl_7", "CNTL(7): dropping parameter used during BLR", "None", mumps->id.CNTL(7), &mumps->id.CNTL(7), NULL));
1927 
1928   PetscCall(PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL));
1929 
1930   PetscCall(PetscOptionsIntArray("-mat_mumps_view_info", "request INFO local to each processor", "", info, &ninfo, NULL));
1931   if (ninfo) {
1932     PetscCheck(ninfo <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "number of INFO %" PetscInt_FMT " must <= 80", ninfo);
1933     PetscCall(PetscMalloc1(ninfo, &mumps->info));
1934     mumps->ninfo = ninfo;
1935     for (i = 0; i < ninfo; i++) {
1936       PetscCheck(info[i] >= 0 && info[i] <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "index of INFO %" PetscInt_FMT " must between 1 and 80", ninfo);
1937       mumps->info[i] = info[i];
1938     }
1939   }
1940   PetscOptionsEnd();
1941   PetscFunctionReturn(PETSC_SUCCESS);
1942 }
1943 
1944 PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F, Mat A, const MatFactorInfo *info, Mat_MUMPS *mumps)
1945 {
1946   PetscFunctionBegin;
1947   if (mumps->id.INFOG(1) < 0) {
1948     PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in analysis phase: INFOG(1)=%d", mumps->id.INFOG(1));
1949     if (mumps->id.INFOG(1) == -6) {
1950       PetscCall(PetscInfo(F, "matrix is singular in structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1951       F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT;
1952     } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) {
1953       PetscCall(PetscInfo(F, "problem of workspace, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1954       F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1955     } else if (mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0) {
1956       PetscCall(PetscInfo(F, "Empty matrix\n"));
1957     } else {
1958       PetscCall(PetscInfo(F, "Error reported by MUMPS in analysis phase: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1959       F->factorerrortype = MAT_FACTOR_OTHER;
1960     }
1961   }
1962   PetscFunctionReturn(PETSC_SUCCESS);
1963 }
1964 
1965 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info)
1966 {
1967   Mat_MUMPS     *mumps = (Mat_MUMPS *)F->data;
1968   Vec            b;
1969   const PetscInt M = A->rmap->N;
1970 
1971   PetscFunctionBegin;
1972   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
1973     /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */
1974     PetscFunctionReturn(PETSC_SUCCESS);
1975   }
1976 
1977   /* Set MUMPS options from the options database */
1978   PetscCall(MatSetFromOptions_MUMPS(F, A));
1979 
1980   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
1981   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps));
1982 
1983   /* analysis phase */
1984   mumps->id.job = JOB_FACTSYMBOLIC;
1985   mumps->id.n   = M;
1986   switch (mumps->id.ICNTL(18)) {
1987   case 0: /* centralized assembled matrix input */
1988     if (!mumps->myid) {
1989       mumps->id.nnz = mumps->nnz;
1990       mumps->id.irn = mumps->irn;
1991       mumps->id.jcn = mumps->jcn;
1992       if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val;
1993       if (r) {
1994         mumps->id.ICNTL(7) = 1;
1995         if (!mumps->myid) {
1996           const PetscInt *idx;
1997           PetscInt        i;
1998 
1999           PetscCall(PetscMalloc1(M, &mumps->id.perm_in));
2000           PetscCall(ISGetIndices(r, &idx));
2001           for (i = 0; i < M; i++) PetscCall(PetscMUMPSIntCast(idx[i] + 1, &(mumps->id.perm_in[i]))); /* perm_in[]: start from 1, not 0! */
2002           PetscCall(ISRestoreIndices(r, &idx));
2003         }
2004       }
2005     }
2006     break;
2007   case 3: /* distributed assembled matrix input (size>1) */
2008     mumps->id.nnz_loc = mumps->nnz;
2009     mumps->id.irn_loc = mumps->irn;
2010     mumps->id.jcn_loc = mumps->jcn;
2011     if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val;
2012     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2013       PetscCall(MatCreateVecs(A, NULL, &b));
2014       PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq));
2015       PetscCall(VecDestroy(&b));
2016     }
2017     break;
2018   }
2019   PetscMUMPS_c(mumps);
2020   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps));
2021 
2022   F->ops->lufactornumeric   = MatFactorNumeric_MUMPS;
2023   F->ops->solve             = MatSolve_MUMPS;
2024   F->ops->solvetranspose    = MatSolveTranspose_MUMPS;
2025   F->ops->matsolve          = MatMatSolve_MUMPS;
2026   F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS;
2027   F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS;
2028 
2029   mumps->matstruc = SAME_NONZERO_PATTERN;
2030   PetscFunctionReturn(PETSC_SUCCESS);
2031 }
2032 
2033 /* Note the Petsc r and c permutations are ignored */
2034 PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info)
2035 {
2036   Mat_MUMPS     *mumps = (Mat_MUMPS *)F->data;
2037   Vec            b;
2038   const PetscInt M = A->rmap->N;
2039 
2040   PetscFunctionBegin;
2041   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
2042     /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */
2043     PetscFunctionReturn(PETSC_SUCCESS);
2044   }
2045 
2046   /* Set MUMPS options from the options database */
2047   PetscCall(MatSetFromOptions_MUMPS(F, A));
2048 
2049   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
2050   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps));
2051 
2052   /* analysis phase */
2053   mumps->id.job = JOB_FACTSYMBOLIC;
2054   mumps->id.n   = M;
2055   switch (mumps->id.ICNTL(18)) {
2056   case 0: /* centralized assembled matrix input */
2057     if (!mumps->myid) {
2058       mumps->id.nnz = mumps->nnz;
2059       mumps->id.irn = mumps->irn;
2060       mumps->id.jcn = mumps->jcn;
2061       if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val;
2062     }
2063     break;
2064   case 3: /* distributed assembled matrix input (size>1) */
2065     mumps->id.nnz_loc = mumps->nnz;
2066     mumps->id.irn_loc = mumps->irn;
2067     mumps->id.jcn_loc = mumps->jcn;
2068     if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val;
2069     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2070       PetscCall(MatCreateVecs(A, NULL, &b));
2071       PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq));
2072       PetscCall(VecDestroy(&b));
2073     }
2074     break;
2075   }
2076   PetscMUMPS_c(mumps);
2077   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps));
2078 
2079   F->ops->lufactornumeric   = MatFactorNumeric_MUMPS;
2080   F->ops->solve             = MatSolve_MUMPS;
2081   F->ops->solvetranspose    = MatSolveTranspose_MUMPS;
2082   F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS;
2083 
2084   mumps->matstruc = SAME_NONZERO_PATTERN;
2085   PetscFunctionReturn(PETSC_SUCCESS);
2086 }
2087 
2088 /* Note the Petsc r permutation and factor info are ignored */
2089 PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F, Mat A, IS r, const MatFactorInfo *info)
2090 {
2091   Mat_MUMPS     *mumps = (Mat_MUMPS *)F->data;
2092   Vec            b;
2093   const PetscInt M = A->rmap->N;
2094 
2095   PetscFunctionBegin;
2096   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
2097     /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */
2098     PetscFunctionReturn(PETSC_SUCCESS);
2099   }
2100 
2101   /* Set MUMPS options from the options database */
2102   PetscCall(MatSetFromOptions_MUMPS(F, A));
2103 
2104   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
2105   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps));
2106 
2107   /* analysis phase */
2108   mumps->id.job = JOB_FACTSYMBOLIC;
2109   mumps->id.n   = M;
2110   switch (mumps->id.ICNTL(18)) {
2111   case 0: /* centralized assembled matrix input */
2112     if (!mumps->myid) {
2113       mumps->id.nnz = mumps->nnz;
2114       mumps->id.irn = mumps->irn;
2115       mumps->id.jcn = mumps->jcn;
2116       if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val;
2117     }
2118     break;
2119   case 3: /* distributed assembled matrix input (size>1) */
2120     mumps->id.nnz_loc = mumps->nnz;
2121     mumps->id.irn_loc = mumps->irn;
2122     mumps->id.jcn_loc = mumps->jcn;
2123     if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val;
2124     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2125       PetscCall(MatCreateVecs(A, NULL, &b));
2126       PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq));
2127       PetscCall(VecDestroy(&b));
2128     }
2129     break;
2130   }
2131   PetscMUMPS_c(mumps);
2132   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps));
2133 
2134   F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS;
2135   F->ops->solve                 = MatSolve_MUMPS;
2136   F->ops->solvetranspose        = MatSolve_MUMPS;
2137   F->ops->matsolve              = MatMatSolve_MUMPS;
2138   F->ops->mattransposesolve     = MatMatTransposeSolve_MUMPS;
2139   F->ops->matsolvetranspose     = MatMatSolveTranspose_MUMPS;
2140 #if defined(PETSC_USE_COMPLEX)
2141   F->ops->getinertia = NULL;
2142 #else
2143   F->ops->getinertia = MatGetInertia_SBAIJMUMPS;
2144 #endif
2145 
2146   mumps->matstruc = SAME_NONZERO_PATTERN;
2147   PetscFunctionReturn(PETSC_SUCCESS);
2148 }
2149 
2150 PetscErrorCode MatView_MUMPS(Mat A, PetscViewer viewer)
2151 {
2152   PetscBool         iascii;
2153   PetscViewerFormat format;
2154   Mat_MUMPS        *mumps = (Mat_MUMPS *)A->data;
2155 
2156   PetscFunctionBegin;
2157   /* check if matrix is mumps type */
2158   if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(PETSC_SUCCESS);
2159 
2160   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
2161   if (iascii) {
2162     PetscCall(PetscViewerGetFormat(viewer, &format));
2163     if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
2164       PetscCall(PetscViewerASCIIPrintf(viewer, "MUMPS run parameters:\n"));
2165       if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
2166         PetscCall(PetscViewerASCIIPrintf(viewer, "  SYM (matrix type):                   %d\n", mumps->id.sym));
2167         PetscCall(PetscViewerASCIIPrintf(viewer, "  PAR (host participation):            %d\n", mumps->id.par));
2168         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(1) (output for error):         %d\n", mumps->id.ICNTL(1)));
2169         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(2) (output of diagnostic msg): %d\n", mumps->id.ICNTL(2)));
2170         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(3) (output for global info):   %d\n", mumps->id.ICNTL(3)));
2171         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(4) (level of printing):        %d\n", mumps->id.ICNTL(4)));
2172         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(5) (input mat struct):         %d\n", mumps->id.ICNTL(5)));
2173         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(6) (matrix prescaling):        %d\n", mumps->id.ICNTL(6)));
2174         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(7) (sequential matrix ordering):%d\n", mumps->id.ICNTL(7)));
2175         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(8) (scaling strategy):         %d\n", mumps->id.ICNTL(8)));
2176         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(10) (max num of refinements):  %d\n", mumps->id.ICNTL(10)));
2177         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(11) (error analysis):          %d\n", mumps->id.ICNTL(11)));
2178         if (mumps->id.ICNTL(11) > 0) {
2179           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(4) (inf norm of input mat):        %g\n", mumps->id.RINFOG(4)));
2180           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(5) (inf norm of solution):         %g\n", mumps->id.RINFOG(5)));
2181           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(6) (inf norm of residual):         %g\n", mumps->id.RINFOG(6)));
2182           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(7),RINFOG(8) (backward error est): %g, %g\n", mumps->id.RINFOG(7), mumps->id.RINFOG(8)));
2183           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(9) (error estimate):               %g\n", mumps->id.RINFOG(9)));
2184           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n", mumps->id.RINFOG(10), mumps->id.RINFOG(11)));
2185         }
2186         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(12) (efficiency control):                         %d\n", mumps->id.ICNTL(12)));
2187         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(13) (sequential factorization of the root node):  %d\n", mumps->id.ICNTL(13)));
2188         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(14) (percentage of estimated workspace increase): %d\n", mumps->id.ICNTL(14)));
2189         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(15) (compression of the input matrix):            %d\n", mumps->id.ICNTL(15)));
2190         /* ICNTL(15-17) not used */
2191         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(18) (input mat struct):                           %d\n", mumps->id.ICNTL(18)));
2192         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(19) (Schur complement info):                      %d\n", mumps->id.ICNTL(19)));
2193         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(20) (RHS sparse pattern):                         %d\n", mumps->id.ICNTL(20)));
2194         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(21) (solution struct):                            %d\n", mumps->id.ICNTL(21)));
2195         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(22) (in-core/out-of-core facility):               %d\n", mumps->id.ICNTL(22)));
2196         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(23) (max size of memory can be allocated locally):%d\n", mumps->id.ICNTL(23)));
2197 
2198         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(24) (detection of null pivot rows):               %d\n", mumps->id.ICNTL(24)));
2199         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(25) (computation of a null space basis):          %d\n", mumps->id.ICNTL(25)));
2200         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(26) (Schur options for RHS or solution):          %d\n", mumps->id.ICNTL(26)));
2201         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(27) (blocking size for multiple RHS):             %d\n", mumps->id.ICNTL(27)));
2202         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(28) (use parallel or sequential ordering):        %d\n", mumps->id.ICNTL(28)));
2203         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(29) (parallel ordering):                          %d\n", mumps->id.ICNTL(29)));
2204 
2205         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(30) (user-specified set of entries in inv(A)):    %d\n", mumps->id.ICNTL(30)));
2206         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(31) (factors is discarded in the solve phase):    %d\n", mumps->id.ICNTL(31)));
2207         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(33) (compute determinant):                        %d\n", mumps->id.ICNTL(33)));
2208         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(35) (activate BLR based factorization):           %d\n", mumps->id.ICNTL(35)));
2209         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(36) (choice of BLR factorization variant):        %d\n", mumps->id.ICNTL(36)));
2210         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(38) (estimated compression rate of LU factors):   %d\n", mumps->id.ICNTL(38)));
2211 
2212         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(1) (relative pivoting threshold):      %g\n", mumps->id.CNTL(1)));
2213         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(2) (stopping criterion of refinement): %g\n", mumps->id.CNTL(2)));
2214         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(3) (absolute pivoting threshold):      %g\n", mumps->id.CNTL(3)));
2215         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(4) (value of static pivoting):         %g\n", mumps->id.CNTL(4)));
2216         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(5) (fixation for null pivots):         %g\n", mumps->id.CNTL(5)));
2217         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(7) (dropping parameter for BLR):       %g\n", mumps->id.CNTL(7)));
2218 
2219         /* information local to each processor */
2220         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(1) (local estimated flops for the elimination after analysis):\n"));
2221         PetscCall(PetscViewerASCIIPushSynchronized(viewer));
2222         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %g\n", mumps->myid, mumps->id.RINFO(1)));
2223         PetscCall(PetscViewerFlush(viewer));
2224         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(2) (local estimated flops for the assembly after factorization):\n"));
2225         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %g\n", mumps->myid, mumps->id.RINFO(2)));
2226         PetscCall(PetscViewerFlush(viewer));
2227         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(3) (local estimated flops for the elimination after factorization):\n"));
2228         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %g\n", mumps->myid, mumps->id.RINFO(3)));
2229         PetscCall(PetscViewerFlush(viewer));
2230 
2231         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization):\n"));
2232         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(15)));
2233         PetscCall(PetscViewerFlush(viewer));
2234 
2235         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization):\n"));
2236         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(16)));
2237         PetscCall(PetscViewerFlush(viewer));
2238 
2239         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(23) (num of pivots eliminated on this processor after factorization):\n"));
2240         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(23)));
2241         PetscCall(PetscViewerFlush(viewer));
2242 
2243         if (mumps->ninfo && mumps->ninfo <= 80) {
2244           PetscInt i;
2245           for (i = 0; i < mumps->ninfo; i++) {
2246             PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(%" PetscInt_FMT "):\n", mumps->info[i]));
2247             PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(mumps->info[i])));
2248             PetscCall(PetscViewerFlush(viewer));
2249           }
2250         }
2251         PetscCall(PetscViewerASCIIPopSynchronized(viewer));
2252       } else PetscCall(PetscViewerASCIIPrintf(viewer, "  Use -%sksp_view ::ascii_info_detail to display information for all processes\n", ((PetscObject)A)->prefix ? ((PetscObject)A)->prefix : ""));
2253 
2254       if (mumps->myid == 0) { /* information from the host */
2255         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFOG(1) (global estimated flops for the elimination after analysis): %g\n", mumps->id.RINFOG(1)));
2256         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFOG(2) (global estimated flops for the assembly after factorization): %g\n", mumps->id.RINFOG(2)));
2257         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFOG(3) (global estimated flops for the elimination after factorization): %g\n", mumps->id.RINFOG(3)));
2258         PetscCall(PetscViewerASCIIPrintf(viewer, "  (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n", mumps->id.RINFOG(12), mumps->id.RINFOG(13), mumps->id.INFOG(34)));
2259 
2260         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(3)));
2261         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(4)));
2262         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(5) (estimated maximum front size in the complete tree): %d\n", mumps->id.INFOG(5)));
2263         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(6) (number of nodes in the complete tree): %d\n", mumps->id.INFOG(6)));
2264         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(7) (ordering option effectively used after analysis): %d\n", mumps->id.INFOG(7)));
2265         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n", mumps->id.INFOG(8)));
2266         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n", mumps->id.INFOG(9)));
2267         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(10) (total integer space store the matrix factors after factorization): %d\n", mumps->id.INFOG(10)));
2268         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(11) (order of largest frontal matrix after factorization): %d\n", mumps->id.INFOG(11)));
2269         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(12) (number of off-diagonal pivots): %d\n", mumps->id.INFOG(12)));
2270         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(13) (number of delayed pivots after factorization): %d\n", mumps->id.INFOG(13)));
2271         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(14) (number of memory compress after factorization): %d\n", mumps->id.INFOG(14)));
2272         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(15) (number of steps of iterative refinement after solution): %d\n", mumps->id.INFOG(15)));
2273         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n", mumps->id.INFOG(16)));
2274         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n", mumps->id.INFOG(17)));
2275         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n", mumps->id.INFOG(18)));
2276         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n", mumps->id.INFOG(19)));
2277         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(20) (estimated number of entries in the factors): %d\n", mumps->id.INFOG(20)));
2278         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n", mumps->id.INFOG(21)));
2279         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n", mumps->id.INFOG(22)));
2280         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n", mumps->id.INFOG(23)));
2281         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n", mumps->id.INFOG(24)));
2282         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n", mumps->id.INFOG(25)));
2283         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(28) (after factorization: number of null pivots encountered): %d\n", mumps->id.INFOG(28)));
2284         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n", mumps->id.INFOG(29)));
2285         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n", mumps->id.INFOG(30), mumps->id.INFOG(31)));
2286         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(32) (after analysis: type of analysis done): %d\n", mumps->id.INFOG(32)));
2287         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(33) (value used for ICNTL(8)): %d\n", mumps->id.INFOG(33)));
2288         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(34) (exponent of the determinant if determinant is requested): %d\n", mumps->id.INFOG(34)));
2289         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n", mumps->id.INFOG(35)));
2290         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(36)));
2291         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n", mumps->id.INFOG(37)));
2292         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(38)));
2293         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n", mumps->id.INFOG(39)));
2294       }
2295     }
2296   }
2297   PetscFunctionReturn(PETSC_SUCCESS);
2298 }
2299 
2300 PetscErrorCode MatGetInfo_MUMPS(Mat A, MatInfoType flag, MatInfo *info)
2301 {
2302   Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;
2303 
2304   PetscFunctionBegin;
2305   info->block_size        = 1.0;
2306   info->nz_allocated      = mumps->id.INFOG(20);
2307   info->nz_used           = mumps->id.INFOG(20);
2308   info->nz_unneeded       = 0.0;
2309   info->assemblies        = 0.0;
2310   info->mallocs           = 0.0;
2311   info->memory            = 0.0;
2312   info->fill_ratio_given  = 0;
2313   info->fill_ratio_needed = 0;
2314   info->factor_mallocs    = 0;
2315   PetscFunctionReturn(PETSC_SUCCESS);
2316 }
2317 
2318 PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is)
2319 {
2320   Mat_MUMPS         *mumps = (Mat_MUMPS *)F->data;
2321   const PetscScalar *arr;
2322   const PetscInt    *idxs;
2323   PetscInt           size, i;
2324 
2325   PetscFunctionBegin;
2326   PetscCall(ISGetLocalSize(is, &size));
2327   /* Schur complement matrix */
2328   PetscCall(MatDestroy(&F->schur));
2329   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size, size, NULL, &F->schur));
2330   PetscCall(MatDenseGetArrayRead(F->schur, &arr));
2331   mumps->id.schur      = (MumpsScalar *)arr;
2332   mumps->id.size_schur = size;
2333   mumps->id.schur_lld  = size;
2334   PetscCall(MatDenseRestoreArrayRead(F->schur, &arr));
2335   if (mumps->sym == 1) PetscCall(MatSetOption(F->schur, MAT_SPD, PETSC_TRUE));
2336 
2337   /* MUMPS expects Fortran style indices */
2338   PetscCall(PetscFree(mumps->id.listvar_schur));
2339   PetscCall(PetscMalloc1(size, &mumps->id.listvar_schur));
2340   PetscCall(ISGetIndices(is, &idxs));
2341   for (i = 0; i < size; i++) PetscCall(PetscMUMPSIntCast(idxs[i] + 1, &(mumps->id.listvar_schur[i])));
2342   PetscCall(ISRestoreIndices(is, &idxs));
2343   /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */
2344   mumps->id.ICNTL(26) = -1;
2345   PetscFunctionReturn(PETSC_SUCCESS);
2346 }
2347 
2348 PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F, Mat *S)
2349 {
2350   Mat          St;
2351   Mat_MUMPS   *mumps = (Mat_MUMPS *)F->data;
2352   PetscScalar *array;
2353 #if defined(PETSC_USE_COMPLEX)
2354   PetscScalar im = PetscSqrtScalar((PetscScalar)-1.0);
2355 #endif
2356 
2357   PetscFunctionBegin;
2358   PetscCheck(mumps->id.ICNTL(19), PetscObjectComm((PetscObject)F), PETSC_ERR_ORDER, "Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it");
2359   PetscCall(MatCreate(PETSC_COMM_SELF, &St));
2360   PetscCall(MatSetSizes(St, PETSC_DECIDE, PETSC_DECIDE, mumps->id.size_schur, mumps->id.size_schur));
2361   PetscCall(MatSetType(St, MATDENSE));
2362   PetscCall(MatSetUp(St));
2363   PetscCall(MatDenseGetArray(St, &array));
2364   if (!mumps->sym) {                /* MUMPS always return a full matrix */
2365     if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
2366       PetscInt i, j, N = mumps->id.size_schur;
2367       for (i = 0; i < N; i++) {
2368         for (j = 0; j < N; j++) {
2369 #if !defined(PETSC_USE_COMPLEX)
2370           PetscScalar val = mumps->id.schur[i * N + j];
2371 #else
2372           PetscScalar val = mumps->id.schur[i * N + j].r + im * mumps->id.schur[i * N + j].i;
2373 #endif
2374           array[j * N + i] = val;
2375         }
2376       }
2377     } else { /* stored by columns */
2378       PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur));
2379     }
2380   } else {                          /* either full or lower-triangular (not packed) */
2381     if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */
2382       PetscInt i, j, N = mumps->id.size_schur;
2383       for (i = 0; i < N; i++) {
2384         for (j = i; j < N; j++) {
2385 #if !defined(PETSC_USE_COMPLEX)
2386           PetscScalar val = mumps->id.schur[i * N + j];
2387 #else
2388           PetscScalar val = mumps->id.schur[i * N + j].r + im * mumps->id.schur[i * N + j].i;
2389 #endif
2390           array[i * N + j] = val;
2391           array[j * N + i] = val;
2392         }
2393       }
2394     } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */
2395       PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur));
2396     } else { /* ICNTL(19) == 1 lower triangular stored by rows */
2397       PetscInt i, j, N = mumps->id.size_schur;
2398       for (i = 0; i < N; i++) {
2399         for (j = 0; j < i + 1; j++) {
2400 #if !defined(PETSC_USE_COMPLEX)
2401           PetscScalar val = mumps->id.schur[i * N + j];
2402 #else
2403           PetscScalar val = mumps->id.schur[i * N + j].r + im * mumps->id.schur[i * N + j].i;
2404 #endif
2405           array[i * N + j] = val;
2406           array[j * N + i] = val;
2407         }
2408       }
2409     }
2410   }
2411   PetscCall(MatDenseRestoreArray(St, &array));
2412   *S = St;
2413   PetscFunctionReturn(PETSC_SUCCESS);
2414 }
2415 
2416 PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt ival)
2417 {
2418   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2419 
2420   PetscFunctionBegin;
2421   if (mumps->id.job == JOB_NULL) {                                       /* need to cache icntl and ival since PetscMUMPS_c() has never been called */
2422     PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; /* number of already cached ICNTL */
2423     for (i = 0; i < nICNTL_pre; ++i)
2424       if (mumps->ICNTL_pre[1 + 2 * i] == icntl) break; /* is this ICNTL already cached? */
2425     if (i == nICNTL_pre) {                             /* not already cached */
2426       if (i > 0) PetscCall(PetscRealloc(sizeof(PetscMUMPSInt) * (2 * nICNTL_pre + 3), &mumps->ICNTL_pre));
2427       else PetscCall(PetscCalloc(sizeof(PetscMUMPSInt) * 3, &mumps->ICNTL_pre));
2428       mumps->ICNTL_pre[0]++;
2429     }
2430     mumps->ICNTL_pre[1 + 2 * i] = icntl;
2431     PetscCall(PetscMUMPSIntCast(ival, mumps->ICNTL_pre + 2 + 2 * i));
2432   } else PetscCall(PetscMUMPSIntCast(ival, &mumps->id.ICNTL(icntl)));
2433   PetscFunctionReturn(PETSC_SUCCESS);
2434 }
2435 
2436 PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt *ival)
2437 {
2438   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2439 
2440   PetscFunctionBegin;
2441   *ival = mumps->id.ICNTL(icntl);
2442   PetscFunctionReturn(PETSC_SUCCESS);
2443 }
2444 
2445 /*@
2446   MatMumpsSetIcntl - Set MUMPS parameter ICNTL()
2447 
2448    Logically Collective
2449 
2450    Input Parameters:
2451 +  F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2452 .  icntl - index of MUMPS parameter array ICNTL()
2453 -  ival - value of MUMPS ICNTL(icntl)
2454 
2455   Options Database Key:
2456 .   -mat_mumps_icntl_<icntl> <ival> - change the option numbered icntl to ival
2457 
2458    Level: beginner
2459 
2460    References:
2461 .  * - MUMPS Users' Guide
2462 
2463 .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2464 @*/
2465 PetscErrorCode MatMumpsSetIcntl(Mat F, PetscInt icntl, PetscInt ival)
2466 {
2467   PetscFunctionBegin;
2468   PetscValidType(F, 1);
2469   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2470   PetscValidLogicalCollectiveInt(F, icntl, 2);
2471   PetscValidLogicalCollectiveInt(F, ival, 3);
2472   PetscCheck(icntl >= 1 && icntl <= 38, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl);
2473   PetscTryMethod(F, "MatMumpsSetIcntl_C", (Mat, PetscInt, PetscInt), (F, icntl, ival));
2474   PetscFunctionReturn(PETSC_SUCCESS);
2475 }
2476 
2477 /*@
2478   MatMumpsGetIcntl - Get MUMPS parameter ICNTL()
2479 
2480    Logically Collective
2481 
2482    Input Parameters:
2483 +  F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2484 -  icntl - index of MUMPS parameter array ICNTL()
2485 
2486   Output Parameter:
2487 .  ival - value of MUMPS ICNTL(icntl)
2488 
2489    Level: beginner
2490 
2491    References:
2492 .  * - MUMPS Users' Guide
2493 
2494 .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2495 @*/
2496 PetscErrorCode MatMumpsGetIcntl(Mat F, PetscInt icntl, PetscInt *ival)
2497 {
2498   PetscFunctionBegin;
2499   PetscValidType(F, 1);
2500   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2501   PetscValidLogicalCollectiveInt(F, icntl, 2);
2502   PetscValidIntPointer(ival, 3);
2503   PetscCheck(icntl >= 1 && icntl <= 38, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl);
2504   PetscUseMethod(F, "MatMumpsGetIcntl_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival));
2505   PetscFunctionReturn(PETSC_SUCCESS);
2506 }
2507 
2508 PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal val)
2509 {
2510   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2511 
2512   PetscFunctionBegin;
2513   if (mumps->id.job == JOB_NULL) {
2514     PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0;
2515     for (i = 0; i < nCNTL_pre; ++i)
2516       if (mumps->CNTL_pre[1 + 2 * i] == icntl) break;
2517     if (i == nCNTL_pre) {
2518       if (i > 0) PetscCall(PetscRealloc(sizeof(PetscReal) * (2 * nCNTL_pre + 3), &mumps->CNTL_pre));
2519       else PetscCall(PetscCalloc(sizeof(PetscReal) * 3, &mumps->CNTL_pre));
2520       mumps->CNTL_pre[0]++;
2521     }
2522     mumps->CNTL_pre[1 + 2 * i] = icntl;
2523     mumps->CNTL_pre[2 + 2 * i] = val;
2524   } else mumps->id.CNTL(icntl) = val;
2525   PetscFunctionReturn(PETSC_SUCCESS);
2526 }
2527 
2528 PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal *val)
2529 {
2530   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2531 
2532   PetscFunctionBegin;
2533   *val = mumps->id.CNTL(icntl);
2534   PetscFunctionReturn(PETSC_SUCCESS);
2535 }
2536 
2537 /*@
2538   MatMumpsSetCntl - Set MUMPS parameter CNTL()
2539 
2540    Logically Collective
2541 
2542    Input Parameters:
2543 +  F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2544 .  icntl - index of MUMPS parameter array CNTL()
2545 -  val - value of MUMPS CNTL(icntl)
2546 
2547   Options Database Key:
2548 .   -mat_mumps_cntl_<icntl> <val>  - change the option numbered icntl to ival
2549 
2550    Level: beginner
2551 
2552    References:
2553 .  * - MUMPS Users' Guide
2554 
2555 .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2556 @*/
2557 PetscErrorCode MatMumpsSetCntl(Mat F, PetscInt icntl, PetscReal val)
2558 {
2559   PetscFunctionBegin;
2560   PetscValidType(F, 1);
2561   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2562   PetscValidLogicalCollectiveInt(F, icntl, 2);
2563   PetscValidLogicalCollectiveReal(F, val, 3);
2564   PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl);
2565   PetscTryMethod(F, "MatMumpsSetCntl_C", (Mat, PetscInt, PetscReal), (F, icntl, val));
2566   PetscFunctionReturn(PETSC_SUCCESS);
2567 }
2568 
2569 /*@
2570   MatMumpsGetCntl - Get MUMPS parameter CNTL()
2571 
2572    Logically Collective
2573 
2574    Input Parameters:
2575 +  F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2576 -  icntl - index of MUMPS parameter array CNTL()
2577 
2578   Output Parameter:
2579 .  val - value of MUMPS CNTL(icntl)
2580 
2581    Level: beginner
2582 
2583    References:
2584 .  * - MUMPS Users' Guide
2585 
2586 .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2587 @*/
2588 PetscErrorCode MatMumpsGetCntl(Mat F, PetscInt icntl, PetscReal *val)
2589 {
2590   PetscFunctionBegin;
2591   PetscValidType(F, 1);
2592   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2593   PetscValidLogicalCollectiveInt(F, icntl, 2);
2594   PetscValidRealPointer(val, 3);
2595   PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl);
2596   PetscUseMethod(F, "MatMumpsGetCntl_C", (Mat, PetscInt, PetscReal *), (F, icntl, val));
2597   PetscFunctionReturn(PETSC_SUCCESS);
2598 }
2599 
2600 PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F, PetscInt icntl, PetscInt *info)
2601 {
2602   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2603 
2604   PetscFunctionBegin;
2605   *info = mumps->id.INFO(icntl);
2606   PetscFunctionReturn(PETSC_SUCCESS);
2607 }
2608 
2609 PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F, PetscInt icntl, PetscInt *infog)
2610 {
2611   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2612 
2613   PetscFunctionBegin;
2614   *infog = mumps->id.INFOG(icntl);
2615   PetscFunctionReturn(PETSC_SUCCESS);
2616 }
2617 
2618 PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfo)
2619 {
2620   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2621 
2622   PetscFunctionBegin;
2623   *rinfo = mumps->id.RINFO(icntl);
2624   PetscFunctionReturn(PETSC_SUCCESS);
2625 }
2626 
2627 PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfog)
2628 {
2629   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2630 
2631   PetscFunctionBegin;
2632   *rinfog = mumps->id.RINFOG(icntl);
2633   PetscFunctionReturn(PETSC_SUCCESS);
2634 }
2635 
2636 PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F, Mat spRHS)
2637 {
2638   Mat          Bt = NULL, Btseq = NULL;
2639   PetscBool    flg;
2640   Mat_MUMPS   *mumps = (Mat_MUMPS *)F->data;
2641   PetscScalar *aa;
2642   PetscInt     spnr, *ia, *ja, M, nrhs;
2643 
2644   PetscFunctionBegin;
2645   PetscValidPointer(spRHS, 2);
2646   PetscCall(PetscObjectTypeCompare((PetscObject)spRHS, MATTRANSPOSEVIRTUAL, &flg));
2647   if (flg) {
2648     PetscCall(MatTransposeGetMat(spRHS, &Bt));
2649   } else SETERRQ(PetscObjectComm((PetscObject)spRHS), PETSC_ERR_ARG_WRONG, "Matrix spRHS must be type MATTRANSPOSEVIRTUAL matrix");
2650 
2651   PetscCall(MatMumpsSetIcntl(F, 30, 1));
2652 
2653   if (mumps->petsc_size > 1) {
2654     Mat_MPIAIJ *b = (Mat_MPIAIJ *)Bt->data;
2655     Btseq         = b->A;
2656   } else {
2657     Btseq = Bt;
2658   }
2659 
2660   PetscCall(MatGetSize(spRHS, &M, &nrhs));
2661   mumps->id.nrhs = nrhs;
2662   mumps->id.lrhs = M;
2663   mumps->id.rhs  = NULL;
2664 
2665   if (!mumps->myid) {
2666     PetscCall(MatSeqAIJGetArray(Btseq, &aa));
2667     PetscCall(MatGetRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
2668     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
2669     PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs));
2670     mumps->id.rhs_sparse = (MumpsScalar *)aa;
2671   } else {
2672     mumps->id.irhs_ptr    = NULL;
2673     mumps->id.irhs_sparse = NULL;
2674     mumps->id.nz_rhs      = 0;
2675     mumps->id.rhs_sparse  = NULL;
2676   }
2677   mumps->id.ICNTL(20) = 1; /* rhs is sparse */
2678   mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */
2679 
2680   /* solve phase */
2681   mumps->id.job = JOB_SOLVE;
2682   PetscMUMPS_c(mumps);
2683   PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MUMPS in solve phase: INFOG(1)=%d INFO(2)=%d", mumps->id.INFOG(1), mumps->id.INFO(2));
2684 
2685   if (!mumps->myid) {
2686     PetscCall(MatSeqAIJRestoreArray(Btseq, &aa));
2687     PetscCall(MatRestoreRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
2688     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
2689   }
2690   PetscFunctionReturn(PETSC_SUCCESS);
2691 }
2692 
2693 /*@
2694   MatMumpsGetInverse - Get user-specified set of entries in inverse of `A`
2695 
2696    Logically Collective
2697 
2698    Input Parameter:
2699 .  F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2700 
2701   Output Parameter:
2702 . spRHS - sequential sparse matrix in `MATTRANSPOSEVIRTUAL` format with requested entries of inverse of `A`
2703 
2704    Level: beginner
2705 
2706    References:
2707 .  * - MUMPS Users' Guide
2708 
2709 .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()`
2710 @*/
2711 PetscErrorCode MatMumpsGetInverse(Mat F, Mat spRHS)
2712 {
2713   PetscFunctionBegin;
2714   PetscValidType(F, 1);
2715   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2716   PetscUseMethod(F, "MatMumpsGetInverse_C", (Mat, Mat), (F, spRHS));
2717   PetscFunctionReturn(PETSC_SUCCESS);
2718 }
2719 
2720 PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F, Mat spRHST)
2721 {
2722   Mat spRHS;
2723 
2724   PetscFunctionBegin;
2725   PetscCall(MatCreateTranspose(spRHST, &spRHS));
2726   PetscCall(MatMumpsGetInverse_MUMPS(F, spRHS));
2727   PetscCall(MatDestroy(&spRHS));
2728   PetscFunctionReturn(PETSC_SUCCESS);
2729 }
2730 
2731 /*@
2732   MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix `A`^T
2733 
2734    Logically Collective
2735 
2736    Input Parameter:
2737 .  F - the factored matrix of A obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2738 
2739   Output Parameter:
2740 . spRHST - sequential sparse matrix in `MATAIJ` format containing the requested entries of inverse of `A`^T
2741 
2742    Level: beginner
2743 
2744    References:
2745 .  * - MUMPS Users' Guide
2746 
2747 .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()`, `MatMumpsGetInverse()`
2748 @*/
2749 PetscErrorCode MatMumpsGetInverseTranspose(Mat F, Mat spRHST)
2750 {
2751   PetscBool flg;
2752 
2753   PetscFunctionBegin;
2754   PetscValidType(F, 1);
2755   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2756   PetscCall(PetscObjectTypeCompareAny((PetscObject)spRHST, &flg, MATSEQAIJ, MATMPIAIJ, NULL));
2757   PetscCheck(flg, PetscObjectComm((PetscObject)spRHST), PETSC_ERR_ARG_WRONG, "Matrix spRHST must be MATAIJ matrix");
2758 
2759   PetscUseMethod(F, "MatMumpsGetInverseTranspose_C", (Mat, Mat), (F, spRHST));
2760   PetscFunctionReturn(PETSC_SUCCESS);
2761 }
2762 
2763 /*@
2764   MatMumpsGetInfo - Get MUMPS parameter INFO()
2765 
2766    Logically Collective
2767 
2768    Input Parameters:
2769 +  F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2770 -  icntl - index of MUMPS parameter array INFO()
2771 
2772   Output Parameter:
2773 .  ival - value of MUMPS INFO(icntl)
2774 
2775    Level: beginner
2776 
2777    References:
2778 .  * - MUMPS Users' Guide
2779 
2780 .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2781 @*/
2782 PetscErrorCode MatMumpsGetInfo(Mat F, PetscInt icntl, PetscInt *ival)
2783 {
2784   PetscFunctionBegin;
2785   PetscValidType(F, 1);
2786   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2787   PetscValidIntPointer(ival, 3);
2788   PetscUseMethod(F, "MatMumpsGetInfo_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival));
2789   PetscFunctionReturn(PETSC_SUCCESS);
2790 }
2791 
2792 /*@
2793   MatMumpsGetInfog - Get MUMPS parameter INFOG()
2794 
2795    Logically Collective
2796 
2797    Input Parameters:
2798 +  F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2799 -  icntl - index of MUMPS parameter array INFOG()
2800 
2801   Output Parameter:
2802 .  ival - value of MUMPS INFOG(icntl)
2803 
2804    Level: beginner
2805 
2806    References:
2807 .  * - MUMPS Users' Guide
2808 
2809 .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2810 @*/
2811 PetscErrorCode MatMumpsGetInfog(Mat F, PetscInt icntl, PetscInt *ival)
2812 {
2813   PetscFunctionBegin;
2814   PetscValidType(F, 1);
2815   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2816   PetscValidIntPointer(ival, 3);
2817   PetscUseMethod(F, "MatMumpsGetInfog_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival));
2818   PetscFunctionReturn(PETSC_SUCCESS);
2819 }
2820 
2821 /*@
2822   MatMumpsGetRinfo - Get MUMPS parameter RINFO()
2823 
2824    Logically Collective
2825 
2826    Input Parameters:
2827 +  F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2828 -  icntl - index of MUMPS parameter array RINFO()
2829 
2830   Output Parameter:
2831 .  val - value of MUMPS RINFO(icntl)
2832 
2833    Level: beginner
2834 
2835    References:
2836 .  * - MUMPS Users' Guide
2837 
2838 .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfog()`
2839 @*/
2840 PetscErrorCode MatMumpsGetRinfo(Mat F, PetscInt icntl, PetscReal *val)
2841 {
2842   PetscFunctionBegin;
2843   PetscValidType(F, 1);
2844   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2845   PetscValidRealPointer(val, 3);
2846   PetscUseMethod(F, "MatMumpsGetRinfo_C", (Mat, PetscInt, PetscReal *), (F, icntl, val));
2847   PetscFunctionReturn(PETSC_SUCCESS);
2848 }
2849 
2850 /*@
2851   MatMumpsGetRinfog - Get MUMPS parameter RINFOG()
2852 
2853    Logically Collective
2854 
2855    Input Parameters:
2856 +  F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2857 -  icntl - index of MUMPS parameter array RINFOG()
2858 
2859   Output Parameter:
2860 .  val - value of MUMPS RINFOG(icntl)
2861 
2862    Level: beginner
2863 
2864    References:
2865 .  * - MUMPS Users' Guide
2866 
2867 .seealso: [](chapter_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`
2868 @*/
2869 PetscErrorCode MatMumpsGetRinfog(Mat F, PetscInt icntl, PetscReal *val)
2870 {
2871   PetscFunctionBegin;
2872   PetscValidType(F, 1);
2873   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2874   PetscValidRealPointer(val, 3);
2875   PetscUseMethod(F, "MatMumpsGetRinfog_C", (Mat, PetscInt, PetscReal *), (F, icntl, val));
2876   PetscFunctionReturn(PETSC_SUCCESS);
2877 }
2878 
2879 /*MC
2880   MATSOLVERMUMPS -  A matrix type providing direct solvers (LU and Cholesky) for
2881   distributed and sequential matrices via the external package MUMPS.
2882 
2883   Works with `MATAIJ` and `MATSBAIJ` matrices
2884 
2885   Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS
2886 
2887   Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode.
2888   See details below.
2889 
2890   Use `-pc_type cholesky` or `lu` `-pc_factor_mat_solver_type mumps` to use this direct solver
2891 
2892   Options Database Keys:
2893 +  -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages
2894 .  -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning
2895 .  -mat_mumps_icntl_3 -  ICNTL(3): output stream for global information, collected on the host
2896 .  -mat_mumps_icntl_4 -  ICNTL(4): level of printing (0 to 4)
2897 .  -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)
2898 .  -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto
2899                         Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only)
2900 .  -mat_mumps_icntl_8  - ICNTL(8): scaling strategy (-2 to 8 or 77)
2901 .  -mat_mumps_icntl_10  - ICNTL(10): max num of refinements
2902 .  -mat_mumps_icntl_11  - ICNTL(11): statistics related to an error analysis (via -ksp_view)
2903 .  -mat_mumps_icntl_12  - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)
2904 .  -mat_mumps_icntl_13  - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting
2905 .  -mat_mumps_icntl_14  - ICNTL(14): percentage increase in the estimated working space
2906 .  -mat_mumps_icntl_15  - ICNTL(15): compression of the input matrix resulting from a block format
2907 .  -mat_mumps_icntl_19  - ICNTL(19): computes the Schur complement
2908 .  -mat_mumps_icntl_20  - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS
2909 .  -mat_mumps_icntl_22  - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)
2910 .  -mat_mumps_icntl_23  - ICNTL(23): max size of the working memory (MB) that can allocate per processor
2911 .  -mat_mumps_icntl_24  - ICNTL(24): detection of null pivot rows (0 or 1)
2912 .  -mat_mumps_icntl_25  - ICNTL(25): compute a solution of a deficient matrix and a null space basis
2913 .  -mat_mumps_icntl_26  - ICNTL(26): drives the solution phase if a Schur complement matrix
2914 .  -mat_mumps_icntl_28  - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering
2915 .  -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis
2916 .  -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A)
2917 .  -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization
2918 .  -mat_mumps_icntl_33 - ICNTL(33): compute determinant
2919 .  -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature
2920 .  -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant
2921 .  -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR
2922 .  -mat_mumps_cntl_1  - CNTL(1): relative pivoting threshold
2923 .  -mat_mumps_cntl_2  -  CNTL(2): stopping criterion of refinement
2924 .  -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold
2925 .  -mat_mumps_cntl_4 - CNTL(4): value for static pivoting
2926 .  -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots
2927 .  -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization
2928 -  -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS.
2929                                    Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual.
2930 
2931   Level: beginner
2932 
2933     Notes:
2934     MUMPS Cholesky does not handle (complex) Hermitian matrices (see User's Guide at https://mumps-solver.org/index.php?page=doc) so using it will
2935     error if the matrix is Hermitian.
2936 
2937     When used within a `KSP`/`PC` solve the options are prefixed with that of the `PC`. Otherwise one can set the options prefix by calling
2938     `MatSetOptionsPrefixFactor()` on the matrix from which the factor was obtained or `MatSetOptionsPrefix()` on the factor matrix.
2939 
2940     When a MUMPS factorization fails inside a KSP solve, for example with a `KSP_DIVERGED_PC_FAILED`, one can find the MUMPS information about
2941     the failure with
2942 .vb
2943           KSPGetPC(ksp,&pc);
2944           PCFactorGetMatrix(pc,&mat);
2945           MatMumpsGetInfo(mat,....);
2946           MatMumpsGetInfog(mat,....); etc.
2947 .ve
2948     Or run with `-ksp_error_if_not_converged` and the program will be stopped and the information printed in the error message.
2949 
2950     MUMPS provides 64-bit integer support in two build modes:
2951       full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and
2952       requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI).
2953 
2954       selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices,
2955       MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and
2956       columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit
2957       integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS.
2958 
2959     With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc.
2960 
2961   Two modes to run MUMPS/PETSc with OpenMP
2962 .vb
2963      Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP
2964      threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test".
2965 .ve
2966 
2967 .vb
2968      -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example,
2969     if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16"
2970 .ve
2971 
2972    To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part
2973    (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with `--with-openmp` `--download-hwloc`
2974    (or `--with-hwloc`), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS
2975    libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS
2976    (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided).
2977 
2978    If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI
2979    processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of
2980    size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm
2981    are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set
2982    by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs.
2983    In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets,
2984    if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind
2985    MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half
2986    cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the
2987    problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding.
2988    For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and
2989    examine the mapping result.
2990 
2991    PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set `OMP_PROC_BIND` and `OMP_PLACES` in job scripts,
2992    for example, export `OMP_PLACES`=threads and export `OMP_PROC_BIND`=spread. One does not need to export `OMP_NUM_THREADS`=m in job scripts as PETSc
2993    calls `omp_set_num_threads`(m) internally before calling MUMPS.
2994 
2995    References:
2996 +  * - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011).
2997 -  * - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017.
2998 
2999 .seealso: [](chapter_matrices), `Mat`, `PCFactorSetMatSolverType()`, `MatSolverType`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`, `KSPGetPC()`, `PCFactorGetMatrix()`
3000 M*/
3001 
3002 static PetscErrorCode MatFactorGetSolverType_mumps(Mat A, MatSolverType *type)
3003 {
3004   PetscFunctionBegin;
3005   *type = MATSOLVERMUMPS;
3006   PetscFunctionReturn(PETSC_SUCCESS);
3007 }
3008 
3009 /* MatGetFactor for Seq and MPI AIJ matrices */
3010 static PetscErrorCode MatGetFactor_aij_mumps(Mat A, MatFactorType ftype, Mat *F)
3011 {
3012   Mat         B;
3013   Mat_MUMPS  *mumps;
3014   PetscBool   isSeqAIJ;
3015   PetscMPIInt size;
3016 
3017   PetscFunctionBegin;
3018 #if defined(PETSC_USE_COMPLEX)
3019   PetscCheck(A->hermitian != PETSC_BOOL3_TRUE || A->symmetric == PETSC_BOOL3_TRUE || ftype != MAT_FACTOR_CHOLESKY, PETSC_COMM_SELF, PETSC_ERR_SUP, "Hermitian CHOLESKY Factor is not supported");
3020 #endif
3021   /* Create the factorization matrix */
3022   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isSeqAIJ));
3023   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
3024   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
3025   PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name));
3026   PetscCall(MatSetUp(B));
3027 
3028   PetscCall(PetscNew(&mumps));
3029 
3030   B->ops->view    = MatView_MUMPS;
3031   B->ops->getinfo = MatGetInfo_MUMPS;
3032 
3033   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
3034   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
3035   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
3036   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
3037   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
3038   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
3039   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
3040   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
3041   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
3042   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
3043   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
3044   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
3045   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
3046 
3047   if (ftype == MAT_FACTOR_LU) {
3048     B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
3049     B->factortype            = MAT_FACTOR_LU;
3050     if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij;
3051     else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij;
3052     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]));
3053     mumps->sym = 0;
3054   } else {
3055     B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
3056     B->factortype                  = MAT_FACTOR_CHOLESKY;
3057     if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij;
3058     else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij;
3059     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY]));
3060 #if defined(PETSC_USE_COMPLEX)
3061     mumps->sym = 2;
3062 #else
3063     if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1;
3064     else mumps->sym = 2;
3065 #endif
3066   }
3067 
3068   /* set solvertype */
3069   PetscCall(PetscFree(B->solvertype));
3070   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
3071   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
3072   if (size == 1) {
3073     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3074     B->canuseordering = PETSC_TRUE;
3075   }
3076   B->ops->destroy = MatDestroy_MUMPS;
3077   B->data         = (void *)mumps;
3078 
3079   *F               = B;
3080   mumps->id.job    = JOB_NULL;
3081   mumps->ICNTL_pre = NULL;
3082   mumps->CNTL_pre  = NULL;
3083   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
3084   PetscFunctionReturn(PETSC_SUCCESS);
3085 }
3086 
3087 /* MatGetFactor for Seq and MPI SBAIJ matrices */
3088 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A, MatFactorType ftype, Mat *F)
3089 {
3090   Mat         B;
3091   Mat_MUMPS  *mumps;
3092   PetscBool   isSeqSBAIJ;
3093   PetscMPIInt size;
3094 
3095   PetscFunctionBegin;
3096 #if defined(PETSC_USE_COMPLEX)
3097   PetscCheck(A->hermitian != PETSC_BOOL3_TRUE || A->symmetric == PETSC_BOOL3_TRUE, PETSC_COMM_SELF, PETSC_ERR_SUP, "Hermitian CHOLESKY Factor is not supported");
3098 #endif
3099   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
3100   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
3101   PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name));
3102   PetscCall(MatSetUp(B));
3103 
3104   PetscCall(PetscNew(&mumps));
3105   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &isSeqSBAIJ));
3106   if (isSeqSBAIJ) {
3107     mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij;
3108   } else {
3109     mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij;
3110   }
3111 
3112   B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
3113   B->ops->view                   = MatView_MUMPS;
3114   B->ops->getinfo                = MatGetInfo_MUMPS;
3115 
3116   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
3117   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
3118   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
3119   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
3120   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
3121   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
3122   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
3123   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
3124   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
3125   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
3126   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
3127   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
3128   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
3129 
3130   B->factortype = MAT_FACTOR_CHOLESKY;
3131 #if defined(PETSC_USE_COMPLEX)
3132   mumps->sym = 2;
3133 #else
3134   if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1;
3135   else mumps->sym = 2;
3136 #endif
3137 
3138   /* set solvertype */
3139   PetscCall(PetscFree(B->solvertype));
3140   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
3141   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
3142   if (size == 1) {
3143     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3144     B->canuseordering = PETSC_TRUE;
3145   }
3146   PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY]));
3147   B->ops->destroy = MatDestroy_MUMPS;
3148   B->data         = (void *)mumps;
3149 
3150   *F               = B;
3151   mumps->id.job    = JOB_NULL;
3152   mumps->ICNTL_pre = NULL;
3153   mumps->CNTL_pre  = NULL;
3154   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
3155   PetscFunctionReturn(PETSC_SUCCESS);
3156 }
3157 
3158 static PetscErrorCode MatGetFactor_baij_mumps(Mat A, MatFactorType ftype, Mat *F)
3159 {
3160   Mat         B;
3161   Mat_MUMPS  *mumps;
3162   PetscBool   isSeqBAIJ;
3163   PetscMPIInt size;
3164 
3165   PetscFunctionBegin;
3166   /* Create the factorization matrix */
3167   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &isSeqBAIJ));
3168   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
3169   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
3170   PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name));
3171   PetscCall(MatSetUp(B));
3172 
3173   PetscCall(PetscNew(&mumps));
3174   if (ftype == MAT_FACTOR_LU) {
3175     B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS;
3176     B->factortype            = MAT_FACTOR_LU;
3177     if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij;
3178     else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij;
3179     mumps->sym = 0;
3180     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]));
3181   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead");
3182 
3183   B->ops->view    = MatView_MUMPS;
3184   B->ops->getinfo = MatGetInfo_MUMPS;
3185 
3186   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
3187   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
3188   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
3189   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
3190   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
3191   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
3192   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
3193   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
3194   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
3195   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
3196   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
3197   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
3198   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
3199 
3200   /* set solvertype */
3201   PetscCall(PetscFree(B->solvertype));
3202   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
3203   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
3204   if (size == 1) {
3205     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3206     B->canuseordering = PETSC_TRUE;
3207   }
3208   B->ops->destroy = MatDestroy_MUMPS;
3209   B->data         = (void *)mumps;
3210 
3211   *F               = B;
3212   mumps->id.job    = JOB_NULL;
3213   mumps->ICNTL_pre = NULL;
3214   mumps->CNTL_pre  = NULL;
3215   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
3216   PetscFunctionReturn(PETSC_SUCCESS);
3217 }
3218 
3219 /* MatGetFactor for Seq and MPI SELL matrices */
3220 static PetscErrorCode MatGetFactor_sell_mumps(Mat A, MatFactorType ftype, Mat *F)
3221 {
3222   Mat         B;
3223   Mat_MUMPS  *mumps;
3224   PetscBool   isSeqSELL;
3225   PetscMPIInt size;
3226 
3227   PetscFunctionBegin;
3228   /* Create the factorization matrix */
3229   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSELL, &isSeqSELL));
3230   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
3231   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
3232   PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name));
3233   PetscCall(MatSetUp(B));
3234 
3235   PetscCall(PetscNew(&mumps));
3236 
3237   B->ops->view    = MatView_MUMPS;
3238   B->ops->getinfo = MatGetInfo_MUMPS;
3239 
3240   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
3241   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
3242   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
3243   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
3244   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
3245   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
3246   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
3247   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
3248   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
3249   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
3250   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
3251 
3252   if (ftype == MAT_FACTOR_LU) {
3253     B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
3254     B->factortype            = MAT_FACTOR_LU;
3255     if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij;
3256     else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented");
3257     mumps->sym = 0;
3258     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]));
3259   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented");
3260 
3261   /* set solvertype */
3262   PetscCall(PetscFree(B->solvertype));
3263   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
3264   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
3265   if (size == 1) {
3266     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization  */
3267     B->canuseordering = PETSC_TRUE;
3268   }
3269   B->ops->destroy = MatDestroy_MUMPS;
3270   B->data         = (void *)mumps;
3271 
3272   *F               = B;
3273   mumps->id.job    = JOB_NULL;
3274   mumps->ICNTL_pre = NULL;
3275   mumps->CNTL_pre  = NULL;
3276   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
3277   PetscFunctionReturn(PETSC_SUCCESS);
3278 }
3279 
3280 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void)
3281 {
3282   PetscFunctionBegin;
3283   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
3284   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
3285   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps));
3286   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps));
3287   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPISBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps));
3288   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
3289   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
3290   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps));
3291   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps));
3292   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps));
3293   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSELL, MAT_FACTOR_LU, MatGetFactor_sell_mumps));
3294   PetscFunctionReturn(PETSC_SUCCESS);
3295 }
3296