xref: /petsc/src/mat/impls/aij/mpi/mumps/mumps.c (revision 34603f5552fddd5047afaa40fefd5abda7ed272a)
1 
2 /*
3     Provides an interface to the MUMPS sparse solver
4 */
5 #include <petscpkg_version.h>
6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I  "petscmat.h"  I*/
7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
8 #include <../src/mat/impls/sell/mpi/mpisell.h>
9 
10 EXTERN_C_BEGIN
11 #if defined(PETSC_USE_COMPLEX)
12 #if defined(PETSC_USE_REAL_SINGLE)
13 #include <cmumps_c.h>
14 #else
15 #include <zmumps_c.h>
16 #endif
17 #else
18 #if defined(PETSC_USE_REAL_SINGLE)
19 #include <smumps_c.h>
20 #else
21 #include <dmumps_c.h>
22 #endif
23 #endif
24 EXTERN_C_END
25 #define JOB_INIT -1
26 #define JOB_NULL 0
27 #define JOB_FACTSYMBOLIC 1
28 #define JOB_FACTNUMERIC 2
29 #define JOB_SOLVE 3
30 #define JOB_END -2
31 
32 /* calls to MUMPS */
33 #if defined(PETSC_USE_COMPLEX)
34 #if defined(PETSC_USE_REAL_SINGLE)
35 #define MUMPS_c cmumps_c
36 #else
37 #define MUMPS_c zmumps_c
38 #endif
39 #else
40 #if defined(PETSC_USE_REAL_SINGLE)
41 #define MUMPS_c smumps_c
42 #else
43 #define MUMPS_c dmumps_c
44 #endif
45 #endif
46 
47 /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for
48    number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the
49    naming convention in PetscMPIInt, PetscBLASInt etc.
50 */
51 typedef MUMPS_INT PetscMUMPSInt;
52 
53 #if PETSC_PKG_MUMPS_VERSION_GE(5,3,0)
54   #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */
55     #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out"
56   #endif
57 #else
58   #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */
59     #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out"
60   #endif
61 #endif
62 
63 #define MPIU_MUMPSINT             MPI_INT
64 #define PETSC_MUMPS_INT_MAX       2147483647
65 #define PETSC_MUMPS_INT_MIN       -2147483648
66 
67 /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/
68 static inline PetscErrorCode PetscMUMPSIntCast(PetscInt a,PetscMUMPSInt *b)
69 {
70   PetscFunctionBegin;
71 #if PetscDefined(USE_64BIT_INDICES)
72   PetscAssert(a <= PETSC_MUMPS_INT_MAX && a >= PETSC_MUMPS_INT_MIN,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscInt too long for PetscMUMPSInt");
73 #endif
74   *b = (PetscMUMPSInt)(a);
75   PetscFunctionReturn(0);
76 }
77 
78 /* Put these utility routines here since they are only used in this file */
79 static inline PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems *PetscOptionsObject,const char opt[],const char text[],const char man[],PetscMUMPSInt currentvalue,PetscMUMPSInt *value,PetscBool *set,PetscMUMPSInt lb,PetscMUMPSInt ub)
80 {
81   PetscInt       myval;
82   PetscBool      myset;
83   PetscFunctionBegin;
84   /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */
85   PetscCall(PetscOptionsInt_Private(PetscOptionsObject,opt,text,man,(PetscInt)currentvalue,&myval,&myset,lb,ub));
86   if (myset) PetscCall(PetscMUMPSIntCast(myval,value));
87   if (set) *set = myset;
88   PetscFunctionReturn(0);
89 }
90 #define PetscOptionsMUMPSInt(a,b,c,d,e,f) PetscOptionsMUMPSInt_Private(PetscOptionsObject,a,b,c,d,e,f,PETSC_MUMPS_INT_MIN,PETSC_MUMPS_INT_MAX)
91 
92 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */
93 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
94 #define PetscMUMPS_c(mumps) \
95   do { \
96     if (mumps->use_petsc_omp_support) { \
97       if (mumps->is_omp_master) { \
98         PetscCall(PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl)); \
99         MUMPS_c(&mumps->id); \
100         PetscCall(PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl)); \
101       } \
102       PetscCall(PetscOmpCtrlBarrier(mumps->omp_ctrl)); \
103       /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific      \
104          to processes, so we only Bcast info[1], an error code and leave others (since they do not have   \
105          an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82.                   \
106          omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \
107       */ \
108       PetscCallMPI(MPI_Bcast(mumps->id.infog, 40,MPIU_MUMPSINT, 0,mumps->omp_comm));\
109       PetscCallMPI(MPI_Bcast(mumps->id.rinfog,20,MPIU_REAL,     0,mumps->omp_comm));\
110       PetscCallMPI(MPI_Bcast(mumps->id.info,  1, MPIU_MUMPSINT, 0,mumps->omp_comm));\
111     } else { \
112       MUMPS_c(&mumps->id); \
113     } \
114   } while (0)
115 #else
116 #define PetscMUMPS_c(mumps) \
117   do { MUMPS_c(&mumps->id); } while (0)
118 #endif
119 
120 /* declare MumpsScalar */
121 #if defined(PETSC_USE_COMPLEX)
122 #if defined(PETSC_USE_REAL_SINGLE)
123 #define MumpsScalar mumps_complex
124 #else
125 #define MumpsScalar mumps_double_complex
126 #endif
127 #else
128 #define MumpsScalar PetscScalar
129 #endif
130 
131 /* macros s.t. indices match MUMPS documentation */
132 #define ICNTL(I) icntl[(I)-1]
133 #define CNTL(I) cntl[(I)-1]
134 #define INFOG(I) infog[(I)-1]
135 #define INFO(I) info[(I)-1]
136 #define RINFOG(I) rinfog[(I)-1]
137 #define RINFO(I) rinfo[(I)-1]
138 
139 typedef struct Mat_MUMPS Mat_MUMPS;
140 struct Mat_MUMPS {
141 #if defined(PETSC_USE_COMPLEX)
142 #if defined(PETSC_USE_REAL_SINGLE)
143   CMUMPS_STRUC_C id;
144 #else
145   ZMUMPS_STRUC_C id;
146 #endif
147 #else
148 #if defined(PETSC_USE_REAL_SINGLE)
149   SMUMPS_STRUC_C id;
150 #else
151   DMUMPS_STRUC_C id;
152 #endif
153 #endif
154 
155   MatStructure   matstruc;
156   PetscMPIInt    myid,petsc_size;
157   PetscMUMPSInt  *irn,*jcn;             /* the (i,j,v) triplets passed to mumps. */
158   PetscScalar    *val,*val_alloc;       /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */
159   PetscInt64     nnz;                   /* number of nonzeros. The type is called selective 64-bit in mumps */
160   PetscMUMPSInt  sym;
161   MPI_Comm       mumps_comm;
162   PetscMUMPSInt  *ICNTL_pre;
163   PetscReal      *CNTL_pre;
164   PetscMUMPSInt  ICNTL9_pre;            /* check if ICNTL(9) is changed from previous MatSolve */
165   VecScatter     scat_rhs, scat_sol;    /* used by MatSolve() */
166   PetscMUMPSInt  ICNTL20;               /* use centralized (0) or distributed (10) dense RHS */
167   PetscMUMPSInt  lrhs_loc,nloc_rhs,*irhs_loc;
168 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
169   PetscInt       *rhs_nrow,max_nrhs;
170   PetscMPIInt    *rhs_recvcounts,*rhs_disps;
171   PetscScalar    *rhs_loc,*rhs_recvbuf;
172 #endif
173   Vec            b_seq,x_seq;
174   PetscInt       ninfo,*info;           /* which INFO to display */
175   PetscInt       sizeredrhs;
176   PetscScalar    *schur_sol;
177   PetscInt       schur_sizesol;
178   PetscMUMPSInt  *ia_alloc,*ja_alloc;   /* work arrays used for the CSR struct for sparse rhs */
179   PetscInt64     cur_ilen,cur_jlen;     /* current len of ia_alloc[], ja_alloc[] */
180   PetscErrorCode (*ConvertToTriples)(Mat,PetscInt,MatReuse,Mat_MUMPS*);
181 
182   /* stuff used by petsc/mumps OpenMP support*/
183   PetscBool      use_petsc_omp_support;
184   PetscOmpCtrl   omp_ctrl;              /* an OpenMP controler that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */
185   MPI_Comm       petsc_comm,omp_comm;   /* petsc_comm is petsc matrix's comm */
186   PetscInt64     *recvcount;            /* a collection of nnz on omp_master */
187   PetscMPIInt    tag,omp_comm_size;
188   PetscBool      is_omp_master;         /* is this rank the master of omp_comm */
189   MPI_Request    *reqs;
190 };
191 
192 /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt.
193    Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices.
194  */
195 static PetscErrorCode PetscMUMPSIntCSRCast(Mat_MUMPS *mumps,PetscInt nrow,PetscInt *ia,PetscInt *ja,PetscMUMPSInt **ia_mumps,PetscMUMPSInt **ja_mumps,PetscMUMPSInt *nnz_mumps)
196 {
197   PetscInt       nnz=ia[nrow]-1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscInt64 since mumps only uses PetscMUMPSInt for rhs */
198 
199   PetscFunctionBegin;
200 #if defined(PETSC_USE_64BIT_INDICES)
201   {
202     PetscInt i;
203     if (nrow+1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */
204       PetscCall(PetscFree(mumps->ia_alloc));
205       PetscCall(PetscMalloc1(nrow+1,&mumps->ia_alloc));
206       mumps->cur_ilen = nrow+1;
207     }
208     if (nnz > mumps->cur_jlen) {
209       PetscCall(PetscFree(mumps->ja_alloc));
210       PetscCall(PetscMalloc1(nnz,&mumps->ja_alloc));
211       mumps->cur_jlen = nnz;
212     }
213     for (i=0; i<nrow+1; i++) PetscCall(PetscMUMPSIntCast(ia[i],&(mumps->ia_alloc[i])));
214     for (i=0; i<nnz; i++)    PetscCall(PetscMUMPSIntCast(ja[i],&(mumps->ja_alloc[i])));
215     *ia_mumps = mumps->ia_alloc;
216     *ja_mumps = mumps->ja_alloc;
217   }
218 #else
219   *ia_mumps = ia;
220   *ja_mumps = ja;
221 #endif
222   PetscCall(PetscMUMPSIntCast(nnz,nnz_mumps));
223   PetscFunctionReturn(0);
224 }
225 
226 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS* mumps)
227 {
228   PetscFunctionBegin;
229   PetscCall(PetscFree(mumps->id.listvar_schur));
230   PetscCall(PetscFree(mumps->id.redrhs));
231   PetscCall(PetscFree(mumps->schur_sol));
232   mumps->id.size_schur = 0;
233   mumps->id.schur_lld  = 0;
234   mumps->id.ICNTL(19)  = 0;
235   PetscFunctionReturn(0);
236 }
237 
238 /* solve with rhs in mumps->id.redrhs and return in the same location */
239 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F)
240 {
241   Mat_MUMPS            *mumps=(Mat_MUMPS*)F->data;
242   Mat                  S,B,X;
243   MatFactorSchurStatus schurstatus;
244   PetscInt             sizesol;
245 
246   PetscFunctionBegin;
247   PetscCall(MatFactorFactorizeSchurComplement(F));
248   PetscCall(MatFactorGetSchurComplement(F,&S,&schurstatus));
249   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&B));
250   PetscCall(MatSetType(B,((PetscObject)S)->type_name));
251 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
252   PetscCall(MatBindToCPU(B,S->boundtocpu));
253 #endif
254   switch (schurstatus) {
255   case MAT_FACTOR_SCHUR_FACTORED:
256     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&X));
257     PetscCall(MatSetType(X,((PetscObject)S)->type_name));
258 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
259     PetscCall(MatBindToCPU(X,S->boundtocpu));
260 #endif
261     if (!mumps->id.ICNTL(9)) { /* transpose solve */
262       PetscCall(MatMatSolveTranspose(S,B,X));
263     } else {
264       PetscCall(MatMatSolve(S,B,X));
265     }
266     break;
267   case MAT_FACTOR_SCHUR_INVERTED:
268     sizesol = mumps->id.nrhs*mumps->id.size_schur;
269     if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) {
270       PetscCall(PetscFree(mumps->schur_sol));
271       PetscCall(PetscMalloc1(sizesol,&mumps->schur_sol));
272       mumps->schur_sizesol = sizesol;
273     }
274     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,mumps->schur_sol,&X));
275     PetscCall(MatSetType(X,((PetscObject)S)->type_name));
276 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
277     PetscCall(MatBindToCPU(X,S->boundtocpu));
278 #endif
279     PetscCall(MatProductCreateWithMat(S,B,NULL,X));
280     if (!mumps->id.ICNTL(9)) { /* transpose solve */
281       PetscCall(MatProductSetType(X,MATPRODUCT_AtB));
282     } else {
283       PetscCall(MatProductSetType(X,MATPRODUCT_AB));
284     }
285     PetscCall(MatProductSetFromOptions(X));
286     PetscCall(MatProductSymbolic(X));
287     PetscCall(MatProductNumeric(X));
288 
289     PetscCall(MatCopy(X,B,SAME_NONZERO_PATTERN));
290     break;
291   default:
292     SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_SUP,"Unhandled MatFactorSchurStatus %d",F->schur_status);
293   }
294   PetscCall(MatFactorRestoreSchurComplement(F,&S,schurstatus));
295   PetscCall(MatDestroy(&B));
296   PetscCall(MatDestroy(&X));
297   PetscFunctionReturn(0);
298 }
299 
300 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion)
301 {
302   Mat_MUMPS     *mumps=(Mat_MUMPS*)F->data;
303 
304   PetscFunctionBegin;
305   if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */
306     PetscFunctionReturn(0);
307   }
308   if (!expansion) { /* prepare for the condensation step */
309     PetscInt sizeredrhs = mumps->id.nrhs*mumps->id.size_schur;
310     /* allocate MUMPS internal array to store reduced right-hand sides */
311     if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) {
312       PetscCall(PetscFree(mumps->id.redrhs));
313       mumps->id.lredrhs = mumps->id.size_schur;
314       PetscCall(PetscMalloc1(mumps->id.nrhs*mumps->id.lredrhs,&mumps->id.redrhs));
315       mumps->sizeredrhs = mumps->id.nrhs*mumps->id.lredrhs;
316     }
317     mumps->id.ICNTL(26) = 1; /* condensation phase */
318   } else { /* prepare for the expansion step */
319     /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */
320     PetscCall(MatMumpsSolveSchur_Private(F));
321     mumps->id.ICNTL(26) = 2; /* expansion phase */
322     PetscMUMPS_c(mumps);
323     PetscCheck(mumps->id.INFOG(1) >= 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d",mumps->id.INFOG(1));
324     /* restore defaults */
325     mumps->id.ICNTL(26) = -1;
326     /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */
327     if (mumps->id.nrhs > 1) {
328       PetscCall(PetscFree(mumps->id.redrhs));
329       mumps->id.lredrhs = 0;
330       mumps->sizeredrhs = 0;
331     }
332   }
333   PetscFunctionReturn(0);
334 }
335 
336 /*
337   MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz]
338 
339   input:
340     A       - matrix in aij,baij or sbaij format
341     shift   - 0: C style output triple; 1: Fortran style output triple.
342     reuse   - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple
343               MAT_REUSE_MATRIX:   only the values in v array are updated
344   output:
345     nnz     - dim of r, c, and v (number of local nonzero entries of A)
346     r, c, v - row and col index, matrix values (matrix triples)
347 
348   The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is
349   freed with PetscFree(mumps->irn);  This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means
350   that the PetscMalloc() cannot easily be replaced with a PetscMalloc3().
351 
352  */
353 
354 PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
355 {
356   const PetscScalar *av;
357   const PetscInt    *ai,*aj,*ajj,M=A->rmap->n;
358   PetscInt64        nz,rnz,i,j,k;
359   PetscMUMPSInt     *row,*col;
360   Mat_SeqAIJ        *aa=(Mat_SeqAIJ*)A->data;
361 
362   PetscFunctionBegin;
363   PetscCall(MatSeqAIJGetArrayRead(A,&av));
364   mumps->val = (PetscScalar*)av;
365   if (reuse == MAT_INITIAL_MATRIX) {
366     nz   = aa->nz;
367     ai   = aa->i;
368     aj   = aa->j;
369     PetscCall(PetscMalloc2(nz,&row,nz,&col));
370     for (i=k=0; i<M; i++) {
371       rnz = ai[i+1] - ai[i];
372       ajj = aj + ai[i];
373       for (j=0; j<rnz; j++) {
374         PetscCall(PetscMUMPSIntCast(i+shift,&row[k]));
375         PetscCall(PetscMUMPSIntCast(ajj[j] + shift,&col[k]));
376         k++;
377       }
378     }
379     mumps->irn = row;
380     mumps->jcn = col;
381     mumps->nnz = nz;
382   }
383   PetscCall(MatSeqAIJRestoreArrayRead(A,&av));
384   PetscFunctionReturn(0);
385 }
386 
387 PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
388 {
389   PetscInt64     nz,i,j,k,r;
390   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
391   PetscMUMPSInt  *row,*col;
392 
393   PetscFunctionBegin;
394   mumps->val = a->val;
395   if (reuse == MAT_INITIAL_MATRIX) {
396     nz   = a->sliidx[a->totalslices];
397     PetscCall(PetscMalloc2(nz,&row,nz,&col));
398     for (i=k=0; i<a->totalslices; i++) {
399       for (j=a->sliidx[i],r=0; j<a->sliidx[i+1]; j++,r=((r+1)&0x07)) {
400         PetscCall(PetscMUMPSIntCast(8*i+r+shift,&row[k++]));
401       }
402     }
403     for (i=0;i<nz;i++) PetscCall(PetscMUMPSIntCast(a->colidx[i]+shift,&col[i]));
404     mumps->irn = row;
405     mumps->jcn = col;
406     mumps->nnz = nz;
407   }
408   PetscFunctionReturn(0);
409 }
410 
411 PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
412 {
413   Mat_SeqBAIJ    *aa=(Mat_SeqBAIJ*)A->data;
414   const PetscInt *ai,*aj,*ajj,bs2 = aa->bs2;
415   PetscInt64     M,nz,idx=0,rnz,i,j,k,m;
416   PetscInt       bs;
417   PetscMUMPSInt  *row,*col;
418 
419   PetscFunctionBegin;
420   PetscCall(MatGetBlockSize(A,&bs));
421   M          = A->rmap->N/bs;
422   mumps->val = aa->a;
423   if (reuse == MAT_INITIAL_MATRIX) {
424     ai   = aa->i; aj = aa->j;
425     nz   = bs2*aa->nz;
426     PetscCall(PetscMalloc2(nz,&row,nz,&col));
427     for (i=0; i<M; i++) {
428       ajj = aj + ai[i];
429       rnz = ai[i+1] - ai[i];
430       for (k=0; k<rnz; k++) {
431         for (j=0; j<bs; j++) {
432           for (m=0; m<bs; m++) {
433             PetscCall(PetscMUMPSIntCast(i*bs + m + shift,&row[idx]));
434             PetscCall(PetscMUMPSIntCast(bs*ajj[k] + j + shift,&col[idx]));
435             idx++;
436           }
437         }
438       }
439     }
440     mumps->irn = row;
441     mumps->jcn = col;
442     mumps->nnz = nz;
443   }
444   PetscFunctionReturn(0);
445 }
446 
447 PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
448 {
449   const PetscInt *ai, *aj,*ajj;
450   PetscInt        bs;
451   PetscInt64      nz,rnz,i,j,k,m;
452   PetscMUMPSInt   *row,*col;
453   PetscScalar     *val;
454   Mat_SeqSBAIJ    *aa=(Mat_SeqSBAIJ*)A->data;
455   const PetscInt  bs2=aa->bs2,mbs=aa->mbs;
456 #if defined(PETSC_USE_COMPLEX)
457   PetscBool       isset,hermitian;
458 #endif
459 
460   PetscFunctionBegin;
461 #if defined(PETSC_USE_COMPLEX)
462   PetscCall(MatIsHermitianKnown(A,&isset,&hermitian));
463   PetscCheck(!isset || !hermitian,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy");
464 #endif
465   ai   = aa->i;
466   aj   = aa->j;
467   PetscCall(MatGetBlockSize(A,&bs));
468   if (reuse == MAT_INITIAL_MATRIX) {
469     nz   = aa->nz;
470     PetscCall(PetscMalloc2(bs2*nz,&row,bs2*nz,&col));
471     if (bs>1) {
472       PetscCall(PetscMalloc1(bs2*nz,&mumps->val_alloc));
473       mumps->val = mumps->val_alloc;
474     } else {
475       mumps->val = aa->a;
476     }
477     mumps->irn = row;
478     mumps->jcn = col;
479   } else {
480     if (bs == 1) mumps->val = aa->a;
481     row = mumps->irn;
482     col = mumps->jcn;
483   }
484   val = mumps->val;
485 
486   nz = 0;
487   if (bs>1) {
488     for (i=0; i<mbs; i++) {
489       rnz = ai[i+1] - ai[i];
490       ajj = aj + ai[i];
491       for (j=0; j<rnz; j++) {
492         for (k=0; k<bs; k++) {
493           for (m=0; m<bs; m++) {
494             if (ajj[j]>i || k>=m) {
495               if (reuse == MAT_INITIAL_MATRIX) {
496                 PetscCall(PetscMUMPSIntCast(i*bs + m + shift,&row[nz]));
497                 PetscCall(PetscMUMPSIntCast(ajj[j]*bs + k + shift,&col[nz]));
498               }
499               val[nz++] = aa->a[(ai[i]+j)*bs2 + m + k*bs];
500             }
501           }
502         }
503       }
504     }
505   } else if (reuse == MAT_INITIAL_MATRIX) {
506     for (i=0; i<mbs; i++) {
507       rnz = ai[i+1] - ai[i];
508       ajj = aj + ai[i];
509       for (j=0; j<rnz; j++) {
510         PetscCall(PetscMUMPSIntCast(i+shift,&row[nz]));
511         PetscCall(PetscMUMPSIntCast(ajj[j] + shift,&col[nz]));
512         nz++;
513       }
514     }
515     PetscCheck(nz == aa->nz,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Different numbers of nonzeros %" PetscInt64_FMT " != %" PetscInt_FMT,nz,aa->nz);
516   }
517   if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz;
518   PetscFunctionReturn(0);
519 }
520 
521 PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
522 {
523   const PetscInt    *ai,*aj,*ajj,*adiag,M=A->rmap->n;
524   PetscInt64        nz,rnz,i,j;
525   const PetscScalar *av,*v1;
526   PetscScalar       *val;
527   PetscMUMPSInt     *row,*col;
528   Mat_SeqAIJ        *aa=(Mat_SeqAIJ*)A->data;
529   PetscBool         missing;
530 #if defined(PETSC_USE_COMPLEX)
531   PetscBool         hermitian,isset;
532 #endif
533 
534   PetscFunctionBegin;
535 #if defined(PETSC_USE_COMPLEX)
536   PetscCall(MatIsHermitianKnown(A,&isset,&hermitian));
537   PetscCheck(!isset || !hermitian,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy");
538 #endif
539   PetscCall(MatSeqAIJGetArrayRead(A,&av));
540   ai    = aa->i; aj = aa->j;
541   adiag = aa->diag;
542   PetscCall(MatMissingDiagonal_SeqAIJ(A,&missing,NULL));
543   if (reuse == MAT_INITIAL_MATRIX) {
544     /* count nz in the upper triangular part of A */
545     nz = 0;
546     if (missing) {
547       for (i=0; i<M; i++) {
548         if (PetscUnlikely(adiag[i] >= ai[i+1])) {
549           for (j=ai[i];j<ai[i+1];j++) {
550             if (aj[j] < i) continue;
551             nz++;
552           }
553         } else {
554           nz += ai[i+1] - adiag[i];
555         }
556       }
557     } else {
558       for (i=0; i<M; i++) nz += ai[i+1] - adiag[i];
559     }
560     PetscCall(PetscMalloc2(nz,&row,nz,&col));
561     PetscCall(PetscMalloc1(nz,&val));
562     mumps->nnz = nz;
563     mumps->irn = row;
564     mumps->jcn = col;
565     mumps->val = mumps->val_alloc = val;
566 
567     nz = 0;
568     if (missing) {
569       for (i=0; i<M; i++) {
570         if (PetscUnlikely(adiag[i] >= ai[i+1])) {
571           for (j=ai[i];j<ai[i+1];j++) {
572             if (aj[j] < i) continue;
573             PetscCall(PetscMUMPSIntCast(i+shift,&row[nz]));
574             PetscCall(PetscMUMPSIntCast(aj[j]+shift,&col[nz]));
575             val[nz] = av[j];
576             nz++;
577           }
578         } else {
579           rnz = ai[i+1] - adiag[i];
580           ajj = aj + adiag[i];
581           v1  = av + adiag[i];
582           for (j=0; j<rnz; j++) {
583             PetscCall(PetscMUMPSIntCast(i+shift,&row[nz]));
584             PetscCall(PetscMUMPSIntCast(ajj[j] + shift,&col[nz]));
585             val[nz++] = v1[j];
586           }
587         }
588       }
589     } else {
590       for (i=0; i<M; i++) {
591         rnz = ai[i+1] - adiag[i];
592         ajj = aj + adiag[i];
593         v1  = av + adiag[i];
594         for (j=0; j<rnz; j++) {
595           PetscCall(PetscMUMPSIntCast(i+shift,&row[nz]));
596           PetscCall(PetscMUMPSIntCast(ajj[j] + shift,&col[nz]));
597           val[nz++] = v1[j];
598         }
599       }
600     }
601   } else {
602     nz = 0;
603     val = mumps->val;
604     if (missing) {
605       for (i=0; i <M; i++) {
606         if (PetscUnlikely(adiag[i] >= ai[i+1])) {
607           for (j=ai[i];j<ai[i+1];j++) {
608             if (aj[j] < i) continue;
609             val[nz++] = av[j];
610           }
611         } else {
612           rnz = ai[i+1] - adiag[i];
613           v1  = av + adiag[i];
614           for (j=0; j<rnz; j++) {
615             val[nz++] = v1[j];
616           }
617         }
618       }
619     } else {
620       for (i=0; i <M; i++) {
621         rnz = ai[i+1] - adiag[i];
622         v1  = av + adiag[i];
623         for (j=0; j<rnz; j++) {
624           val[nz++] = v1[j];
625         }
626       }
627     }
628   }
629   PetscCall(MatSeqAIJRestoreArrayRead(A,&av));
630   PetscFunctionReturn(0);
631 }
632 
633 PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
634 {
635   const PetscInt    *ai,*aj,*bi,*bj,*garray,*ajj,*bjj;
636   PetscInt          bs;
637   PetscInt64        rstart,nz,i,j,k,m,jj,irow,countA,countB;
638   PetscMUMPSInt     *row,*col;
639   const PetscScalar *av,*bv,*v1,*v2;
640   PetscScalar       *val;
641   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)A->data;
642   Mat_SeqSBAIJ      *aa  = (Mat_SeqSBAIJ*)(mat->A)->data;
643   Mat_SeqBAIJ       *bb  = (Mat_SeqBAIJ*)(mat->B)->data;
644   const PetscInt    bs2=aa->bs2,mbs=aa->mbs;
645 #if defined(PETSC_USE_COMPLEX)
646   PetscBool         hermitian,isset;
647 #endif
648 
649   PetscFunctionBegin;
650 #if defined(PETSC_USE_COMPLEX)
651   PetscCall(MatIsHermitianKnown(A,&isset,&hermitian));
652   PetscCheck(!isset || !hermitian,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy");
653 #endif
654   PetscCall(MatGetBlockSize(A,&bs));
655   rstart = A->rmap->rstart;
656   ai = aa->i;
657   aj = aa->j;
658   bi = bb->i;
659   bj = bb->j;
660   av = aa->a;
661   bv = bb->a;
662 
663   garray = mat->garray;
664 
665   if (reuse == MAT_INITIAL_MATRIX) {
666     nz   = (aa->nz+bb->nz)*bs2; /* just a conservative estimate */
667     PetscCall(PetscMalloc2(nz,&row,nz,&col));
668     PetscCall(PetscMalloc1(nz,&val));
669     /* can not decide the exact mumps->nnz now because of the SBAIJ */
670     mumps->irn = row;
671     mumps->jcn = col;
672     mumps->val = mumps->val_alloc = val;
673   } else {
674     val = mumps->val;
675   }
676 
677   jj = 0; irow = rstart;
678   for (i=0; i<mbs; i++) {
679     ajj    = aj + ai[i];                 /* ptr to the beginning of this row */
680     countA = ai[i+1] - ai[i];
681     countB = bi[i+1] - bi[i];
682     bjj    = bj + bi[i];
683     v1     = av + ai[i]*bs2;
684     v2     = bv + bi[i]*bs2;
685 
686     if (bs>1) {
687       /* A-part */
688       for (j=0; j<countA; j++) {
689         for (k=0; k<bs; k++) {
690           for (m=0; m<bs; m++) {
691             if (rstart + ajj[j]*bs>irow || k>=m) {
692               if (reuse == MAT_INITIAL_MATRIX) {
693                 PetscCall(PetscMUMPSIntCast(irow + m + shift,&row[jj]));
694                 PetscCall(PetscMUMPSIntCast(rstart + ajj[j]*bs + k + shift,&col[jj]));
695               }
696               val[jj++] = v1[j*bs2 + m + k*bs];
697             }
698           }
699         }
700       }
701 
702       /* B-part */
703       for (j=0; j < countB; j++) {
704         for (k=0; k<bs; k++) {
705           for (m=0; m<bs; m++) {
706             if (reuse == MAT_INITIAL_MATRIX) {
707               PetscCall(PetscMUMPSIntCast(irow + m + shift,&row[jj]));
708               PetscCall(PetscMUMPSIntCast(garray[bjj[j]]*bs + k + shift,&col[jj]));
709             }
710             val[jj++] = v2[j*bs2 + m + k*bs];
711           }
712         }
713       }
714     } else {
715       /* A-part */
716       for (j=0; j<countA; j++) {
717         if (reuse == MAT_INITIAL_MATRIX) {
718           PetscCall(PetscMUMPSIntCast(irow + shift,&row[jj]));
719           PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]));
720         }
721         val[jj++] = v1[j];
722       }
723 
724       /* B-part */
725       for (j=0; j < countB; j++) {
726         if (reuse == MAT_INITIAL_MATRIX) {
727           PetscCall(PetscMUMPSIntCast(irow + shift,&row[jj]));
728           PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]));
729         }
730         val[jj++] = v2[j];
731       }
732     }
733     irow+=bs;
734   }
735   mumps->nnz = jj;
736   PetscFunctionReturn(0);
737 }
738 
739 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
740 {
741   const PetscInt    *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj;
742   PetscInt64        rstart,nz,i,j,jj,irow,countA,countB;
743   PetscMUMPSInt     *row,*col;
744   const PetscScalar *av, *bv,*v1,*v2;
745   PetscScalar       *val;
746   Mat               Ad,Ao;
747   Mat_SeqAIJ        *aa;
748   Mat_SeqAIJ        *bb;
749 
750   PetscFunctionBegin;
751   PetscCall(MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&garray));
752   PetscCall(MatSeqAIJGetArrayRead(Ad,&av));
753   PetscCall(MatSeqAIJGetArrayRead(Ao,&bv));
754 
755   aa = (Mat_SeqAIJ*)(Ad)->data;
756   bb = (Mat_SeqAIJ*)(Ao)->data;
757   ai = aa->i;
758   aj = aa->j;
759   bi = bb->i;
760   bj = bb->j;
761 
762   rstart = A->rmap->rstart;
763 
764   if (reuse == MAT_INITIAL_MATRIX) {
765     nz   = (PetscInt64)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */
766     PetscCall(PetscMalloc2(nz,&row,nz,&col));
767     PetscCall(PetscMalloc1(nz,&val));
768     mumps->nnz = nz;
769     mumps->irn = row;
770     mumps->jcn = col;
771     mumps->val = mumps->val_alloc = val;
772   } else {
773     val = mumps->val;
774   }
775 
776   jj = 0; irow = rstart;
777   for (i=0; i<m; i++) {
778     ajj    = aj + ai[i];                 /* ptr to the beginning of this row */
779     countA = ai[i+1] - ai[i];
780     countB = bi[i+1] - bi[i];
781     bjj    = bj + bi[i];
782     v1     = av + ai[i];
783     v2     = bv + bi[i];
784 
785     /* A-part */
786     for (j=0; j<countA; j++) {
787       if (reuse == MAT_INITIAL_MATRIX) {
788         PetscCall(PetscMUMPSIntCast(irow + shift,&row[jj]));
789         PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]));
790       }
791       val[jj++] = v1[j];
792     }
793 
794     /* B-part */
795     for (j=0; j < countB; j++) {
796       if (reuse == MAT_INITIAL_MATRIX) {
797         PetscCall(PetscMUMPSIntCast(irow + shift,&row[jj]));
798         PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]));
799       }
800       val[jj++] = v2[j];
801     }
802     irow++;
803   }
804   PetscCall(MatSeqAIJRestoreArrayRead(Ad,&av));
805   PetscCall(MatSeqAIJRestoreArrayRead(Ao,&bv));
806   PetscFunctionReturn(0);
807 }
808 
809 PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
810 {
811   Mat_MPIBAIJ       *mat    = (Mat_MPIBAIJ*)A->data;
812   Mat_SeqBAIJ       *aa     = (Mat_SeqBAIJ*)(mat->A)->data;
813   Mat_SeqBAIJ       *bb     = (Mat_SeqBAIJ*)(mat->B)->data;
814   const PetscInt    *ai     = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j,*ajj, *bjj;
815   const PetscInt    *garray = mat->garray,mbs=mat->mbs,rstart=A->rmap->rstart;
816   const PetscInt    bs2=mat->bs2;
817   PetscInt          bs;
818   PetscInt64        nz,i,j,k,n,jj,irow,countA,countB,idx;
819   PetscMUMPSInt     *row,*col;
820   const PetscScalar *av=aa->a, *bv=bb->a,*v1,*v2;
821   PetscScalar       *val;
822 
823   PetscFunctionBegin;
824   PetscCall(MatGetBlockSize(A,&bs));
825   if (reuse == MAT_INITIAL_MATRIX) {
826     nz   = bs2*(aa->nz + bb->nz);
827     PetscCall(PetscMalloc2(nz,&row,nz,&col));
828     PetscCall(PetscMalloc1(nz,&val));
829     mumps->nnz = nz;
830     mumps->irn = row;
831     mumps->jcn = col;
832     mumps->val = mumps->val_alloc = val;
833   } else {
834     val = mumps->val;
835   }
836 
837   jj = 0; irow = rstart;
838   for (i=0; i<mbs; i++) {
839     countA = ai[i+1] - ai[i];
840     countB = bi[i+1] - bi[i];
841     ajj    = aj + ai[i];
842     bjj    = bj + bi[i];
843     v1     = av + bs2*ai[i];
844     v2     = bv + bs2*bi[i];
845 
846     idx = 0;
847     /* A-part */
848     for (k=0; k<countA; k++) {
849       for (j=0; j<bs; j++) {
850         for (n=0; n<bs; n++) {
851           if (reuse == MAT_INITIAL_MATRIX) {
852             PetscCall(PetscMUMPSIntCast(irow + n + shift,&row[jj]));
853             PetscCall(PetscMUMPSIntCast(rstart + bs*ajj[k] + j + shift,&col[jj]));
854           }
855           val[jj++] = v1[idx++];
856         }
857       }
858     }
859 
860     idx = 0;
861     /* B-part */
862     for (k=0; k<countB; k++) {
863       for (j=0; j<bs; j++) {
864         for (n=0; n<bs; n++) {
865           if (reuse == MAT_INITIAL_MATRIX) {
866             PetscCall(PetscMUMPSIntCast(irow + n + shift,&row[jj]));
867             PetscCall(PetscMUMPSIntCast(bs*garray[bjj[k]] + j + shift,&col[jj]));
868           }
869           val[jj++] = v2[idx++];
870         }
871       }
872     }
873     irow += bs;
874   }
875   PetscFunctionReturn(0);
876 }
877 
878 PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
879 {
880   const PetscInt    *ai, *aj,*adiag, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj;
881   PetscInt64        rstart,nz,nza,nzb,i,j,jj,irow,countA,countB;
882   PetscMUMPSInt     *row,*col;
883   const PetscScalar *av, *bv,*v1,*v2;
884   PetscScalar       *val;
885   Mat               Ad,Ao;
886   Mat_SeqAIJ        *aa;
887   Mat_SeqAIJ        *bb;
888 #if defined(PETSC_USE_COMPLEX)
889   PetscBool         hermitian,isset;
890 #endif
891 
892   PetscFunctionBegin;
893 #if defined(PETSC_USE_COMPLEX)
894   PetscCall(MatIsHermitianKnown(A,&isset,&hermitian));
895   PetscCheck(!isset || !hermitian,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy");
896 #endif
897   PetscCall(MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&garray));
898   PetscCall(MatSeqAIJGetArrayRead(Ad,&av));
899   PetscCall(MatSeqAIJGetArrayRead(Ao,&bv));
900 
901   aa    = (Mat_SeqAIJ*)(Ad)->data;
902   bb    = (Mat_SeqAIJ*)(Ao)->data;
903   ai    = aa->i;
904   aj    = aa->j;
905   adiag = aa->diag;
906   bi    = bb->i;
907   bj    = bb->j;
908 
909   rstart = A->rmap->rstart;
910 
911   if (reuse == MAT_INITIAL_MATRIX) {
912     nza = 0;    /* num of upper triangular entries in mat->A, including diagonals */
913     nzb = 0;    /* num of upper triangular entries in mat->B */
914     for (i=0; i<m; i++) {
915       nza   += (ai[i+1] - adiag[i]);
916       countB = bi[i+1] - bi[i];
917       bjj    = bj + bi[i];
918       for (j=0; j<countB; j++) {
919         if (garray[bjj[j]] > rstart) nzb++;
920       }
921     }
922 
923     nz   = nza + nzb; /* total nz of upper triangular part of mat */
924     PetscCall(PetscMalloc2(nz,&row,nz,&col));
925     PetscCall(PetscMalloc1(nz,&val));
926     mumps->nnz = nz;
927     mumps->irn = row;
928     mumps->jcn = col;
929     mumps->val = mumps->val_alloc = val;
930   } else {
931     val = mumps->val;
932   }
933 
934   jj = 0; irow = rstart;
935   for (i=0; i<m; i++) {
936     ajj    = aj + adiag[i];                 /* ptr to the beginning of the diagonal of this row */
937     v1     = av + adiag[i];
938     countA = ai[i+1] - adiag[i];
939     countB = bi[i+1] - bi[i];
940     bjj    = bj + bi[i];
941     v2     = bv + bi[i];
942 
943     /* A-part */
944     for (j=0; j<countA; j++) {
945       if (reuse == MAT_INITIAL_MATRIX) {
946         PetscCall(PetscMUMPSIntCast(irow + shift,&row[jj]));
947         PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]));
948       }
949       val[jj++] = v1[j];
950     }
951 
952     /* B-part */
953     for (j=0; j < countB; j++) {
954       if (garray[bjj[j]] > rstart) {
955         if (reuse == MAT_INITIAL_MATRIX) {
956           PetscCall(PetscMUMPSIntCast(irow + shift,&row[jj]));
957           PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]));
958         }
959         val[jj++] = v2[j];
960       }
961     }
962     irow++;
963   }
964   PetscCall(MatSeqAIJRestoreArrayRead(Ad,&av));
965   PetscCall(MatSeqAIJRestoreArrayRead(Ao,&bv));
966   PetscFunctionReturn(0);
967 }
968 
969 PetscErrorCode MatDestroy_MUMPS(Mat A)
970 {
971   Mat_MUMPS      *mumps=(Mat_MUMPS*)A->data;
972 
973   PetscFunctionBegin;
974   PetscCall(PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc));
975   PetscCall(VecScatterDestroy(&mumps->scat_rhs));
976   PetscCall(VecScatterDestroy(&mumps->scat_sol));
977   PetscCall(VecDestroy(&mumps->b_seq));
978   PetscCall(VecDestroy(&mumps->x_seq));
979   PetscCall(PetscFree(mumps->id.perm_in));
980   PetscCall(PetscFree2(mumps->irn,mumps->jcn));
981   PetscCall(PetscFree(mumps->val_alloc));
982   PetscCall(PetscFree(mumps->info));
983   PetscCall(PetscFree(mumps->ICNTL_pre));
984   PetscCall(PetscFree(mumps->CNTL_pre));
985   PetscCall(MatMumpsResetSchur_Private(mumps));
986   if (mumps->id.job != JOB_NULL) { /* cannot call PetscMUMPS_c() if JOB_INIT has never been called for this instance */
987     mumps->id.job = JOB_END;
988     PetscMUMPS_c(mumps);
989     PetscCheck(mumps->id.INFOG(1) >= 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in MatDestroy_MUMPS: INFOG(1)=%d",mumps->id.INFOG(1));
990     if (mumps->mumps_comm != MPI_COMM_NULL) {
991       if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) PetscCallMPI(MPI_Comm_free(&mumps->mumps_comm));
992       else PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A),&mumps->mumps_comm));
993     }
994   }
995 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
996   if (mumps->use_petsc_omp_support) {
997     PetscCall(PetscOmpCtrlDestroy(&mumps->omp_ctrl));
998     PetscCall(PetscFree2(mumps->rhs_loc,mumps->rhs_recvbuf));
999     PetscCall(PetscFree3(mumps->rhs_nrow,mumps->rhs_recvcounts,mumps->rhs_disps));
1000   }
1001 #endif
1002   PetscCall(PetscFree(mumps->ia_alloc));
1003   PetscCall(PetscFree(mumps->ja_alloc));
1004   PetscCall(PetscFree(mumps->recvcount));
1005   PetscCall(PetscFree(mumps->reqs));
1006   PetscCall(PetscFree(mumps->irhs_loc));
1007   PetscCall(PetscFree(A->data));
1008 
1009   /* clear composed functions */
1010   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL));
1011   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatFactorSetSchurIS_C",NULL));
1012   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatFactorCreateSchurComplement_C",NULL));
1013   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetIcntl_C",NULL));
1014   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetIcntl_C",NULL));
1015   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetCntl_C",NULL));
1016   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetCntl_C",NULL));
1017   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfo_C",NULL));
1018   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfog_C",NULL));
1019   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfo_C",NULL));
1020   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfog_C",NULL));
1021   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverse_C",NULL));
1022   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverseTranspose_C",NULL));
1023   PetscFunctionReturn(0);
1024 }
1025 
1026 /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */
1027 static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A,PetscInt nrhs,const PetscScalar *array)
1028 {
1029   Mat_MUMPS          *mumps=(Mat_MUMPS*)A->data;
1030   const PetscMPIInt  ompsize=mumps->omp_comm_size;
1031   PetscInt           i,m,M,rstart;
1032 
1033   PetscFunctionBegin;
1034   PetscCall(MatGetSize(A,&M,NULL));
1035   PetscCall(MatGetLocalSize(A,&m,NULL));
1036   PetscCheck(M <= PETSC_MUMPS_INT_MAX,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscInt too long for PetscMUMPSInt");
1037   if (ompsize == 1) {
1038     if (!mumps->irhs_loc) {
1039       mumps->nloc_rhs = m;
1040       PetscCall(PetscMalloc1(m,&mumps->irhs_loc));
1041       PetscCall(MatGetOwnershipRange(A,&rstart,NULL));
1042       for (i=0; i<m; i++) mumps->irhs_loc[i] = rstart+i+1; /* use 1-based indices */
1043     }
1044     mumps->id.rhs_loc = (MumpsScalar*)array;
1045   } else {
1046   #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1047     const PetscInt  *ranges;
1048     PetscMPIInt     j,k,sendcount,*petsc_ranks,*omp_ranks;
1049     MPI_Group       petsc_group,omp_group;
1050     PetscScalar     *recvbuf=NULL;
1051 
1052     if (mumps->is_omp_master) {
1053       /* Lazily initialize the omp stuff for distributed rhs */
1054       if (!mumps->irhs_loc) {
1055         PetscCall(PetscMalloc2(ompsize,&omp_ranks,ompsize,&petsc_ranks));
1056         PetscCall(PetscMalloc3(ompsize,&mumps->rhs_nrow,ompsize,&mumps->rhs_recvcounts,ompsize,&mumps->rhs_disps));
1057         PetscCallMPI(MPI_Comm_group(mumps->petsc_comm,&petsc_group));
1058         PetscCallMPI(MPI_Comm_group(mumps->omp_comm,&omp_group));
1059         for (j=0; j<ompsize; j++) omp_ranks[j] = j;
1060         PetscCallMPI(MPI_Group_translate_ranks(omp_group,ompsize,omp_ranks,petsc_group,petsc_ranks));
1061 
1062         /* Populate mumps->irhs_loc[], rhs_nrow[] */
1063         mumps->nloc_rhs = 0;
1064         PetscCall(MatGetOwnershipRanges(A,&ranges));
1065         for (j=0; j<ompsize; j++) {
1066           mumps->rhs_nrow[j] = ranges[petsc_ranks[j]+1] - ranges[petsc_ranks[j]];
1067           mumps->nloc_rhs   += mumps->rhs_nrow[j];
1068         }
1069         PetscCall(PetscMalloc1(mumps->nloc_rhs,&mumps->irhs_loc));
1070         for (j=k=0; j<ompsize; j++) {
1071           for (i=ranges[petsc_ranks[j]]; i<ranges[petsc_ranks[j]+1]; i++,k++) mumps->irhs_loc[k] = i+1; /* uses 1-based indices */
1072         }
1073 
1074         PetscCall(PetscFree2(omp_ranks,petsc_ranks));
1075         PetscCallMPI(MPI_Group_free(&petsc_group));
1076         PetscCallMPI(MPI_Group_free(&omp_group));
1077       }
1078 
1079       /* Realloc buffers when current nrhs is bigger than what we have met */
1080       if (nrhs > mumps->max_nrhs) {
1081         PetscCall(PetscFree2(mumps->rhs_loc,mumps->rhs_recvbuf));
1082         PetscCall(PetscMalloc2(mumps->nloc_rhs*nrhs,&mumps->rhs_loc,mumps->nloc_rhs*nrhs,&mumps->rhs_recvbuf));
1083         mumps->max_nrhs = nrhs;
1084       }
1085 
1086       /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */
1087       for (j=0; j<ompsize; j++) PetscCall(PetscMPIIntCast(mumps->rhs_nrow[j]*nrhs,&mumps->rhs_recvcounts[j]));
1088       mumps->rhs_disps[0] = 0;
1089       for (j=1; j<ompsize; j++) {
1090         mumps->rhs_disps[j] = mumps->rhs_disps[j-1] + mumps->rhs_recvcounts[j-1];
1091         PetscCheck(mumps->rhs_disps[j] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscMPIInt overflow!");
1092       }
1093       recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */
1094     }
1095 
1096     PetscCall(PetscMPIIntCast(m*nrhs,&sendcount));
1097     PetscCallMPI(MPI_Gatherv(array,sendcount,MPIU_SCALAR,recvbuf,mumps->rhs_recvcounts,mumps->rhs_disps,MPIU_SCALAR,0,mumps->omp_comm));
1098 
1099     if (mumps->is_omp_master) {
1100       if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */
1101         PetscScalar *dst,*dstbase = mumps->rhs_loc;
1102         for (j=0; j<ompsize; j++) {
1103           const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j];
1104           dst = dstbase;
1105           for (i=0; i<nrhs; i++) {
1106             PetscCall(PetscArraycpy(dst,src,mumps->rhs_nrow[j]));
1107             src += mumps->rhs_nrow[j];
1108             dst += mumps->nloc_rhs;
1109           }
1110           dstbase += mumps->rhs_nrow[j];
1111         }
1112       }
1113       mumps->id.rhs_loc = (MumpsScalar*)mumps->rhs_loc;
1114     }
1115   #endif /* PETSC_HAVE_OPENMP_SUPPORT */
1116   }
1117   mumps->id.nrhs     = nrhs;
1118   mumps->id.nloc_rhs = mumps->nloc_rhs;
1119   mumps->id.lrhs_loc = mumps->nloc_rhs;
1120   mumps->id.irhs_loc = mumps->irhs_loc;
1121   PetscFunctionReturn(0);
1122 }
1123 
1124 PetscErrorCode MatSolve_MUMPS(Mat A,Vec b,Vec x)
1125 {
1126   Mat_MUMPS          *mumps=(Mat_MUMPS*)A->data;
1127   const PetscScalar  *rarray = NULL;
1128   PetscScalar        *array;
1129   IS                 is_iden,is_petsc;
1130   PetscInt           i;
1131   PetscBool          second_solve = PETSC_FALSE;
1132   static PetscBool   cite1 = PETSC_FALSE,cite2 = PETSC_FALSE;
1133 
1134   PetscFunctionBegin;
1135   PetscCall(PetscCitationsRegister("@article{MUMPS01,\n  author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n  title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n  journal = {SIAM Journal on Matrix Analysis and Applications},\n  volume = {23},\n  number = {1},\n  pages = {15--41},\n  year = {2001}\n}\n",&cite1));
1136   PetscCall(PetscCitationsRegister("@article{MUMPS02,\n  author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n  title = {Hybrid scheduling for the parallel solution of linear systems},\n  journal = {Parallel Computing},\n  volume = {32},\n  number = {2},\n  pages = {136--156},\n  year = {2006}\n}\n",&cite2));
1137 
1138   if (A->factorerrortype) {
1139     PetscCall(PetscInfo(A,"MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)));
1140     PetscCall(VecSetInf(x));
1141     PetscFunctionReturn(0);
1142   }
1143 
1144   mumps->id.nrhs = 1;
1145   if (mumps->petsc_size > 1) {
1146     if (mumps->ICNTL20 == 10) {
1147       mumps->id.ICNTL(20) = 10; /* dense distributed RHS */
1148       PetscCall(VecGetArrayRead(b,&rarray));
1149       PetscCall(MatMumpsSetUpDistRHSInfo(A,1,rarray));
1150     } else {
1151       mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential rhs vector*/
1152       PetscCall(VecScatterBegin(mumps->scat_rhs,b,mumps->b_seq,INSERT_VALUES,SCATTER_FORWARD));
1153       PetscCall(VecScatterEnd(mumps->scat_rhs,b,mumps->b_seq,INSERT_VALUES,SCATTER_FORWARD));
1154       if (!mumps->myid) {
1155         PetscCall(VecGetArray(mumps->b_seq,&array));
1156         mumps->id.rhs = (MumpsScalar*)array;
1157       }
1158     }
1159   } else {  /* petsc_size == 1 */
1160     mumps->id.ICNTL(20) = 0; /* dense centralized RHS */
1161     PetscCall(VecCopy(b,x));
1162     PetscCall(VecGetArray(x,&array));
1163     mumps->id.rhs = (MumpsScalar*)array;
1164   }
1165 
1166   /*
1167      handle condensation step of Schur complement (if any)
1168      We set by default ICNTL(26) == -1 when Schur indices have been provided by the user.
1169      According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase
1170      Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system.
1171      This requires an extra call to PetscMUMPS_c and the computation of the factors for S
1172   */
1173   if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) {
1174     PetscCheck(mumps->petsc_size <= 1,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc");
1175     second_solve = PETSC_TRUE;
1176     PetscCall(MatMumpsHandleSchur_Private(A,PETSC_FALSE));
1177   }
1178   /* solve phase */
1179   /*-------------*/
1180   mumps->id.job = JOB_SOLVE;
1181   PetscMUMPS_c(mumps);
1182   PetscCheck(mumps->id.INFOG(1) >= 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d",mumps->id.INFOG(1));
1183 
1184   /* handle expansion step of Schur complement (if any) */
1185   if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A,PETSC_TRUE));
1186 
1187   if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */
1188     if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) {
1189       /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */
1190       PetscCall(VecScatterDestroy(&mumps->scat_sol));
1191     }
1192     if (!mumps->scat_sol) { /* create scatter scat_sol */
1193       PetscInt *isol2_loc=NULL;
1194       PetscCall(ISCreateStride(PETSC_COMM_SELF,mumps->id.lsol_loc,0,1,&is_iden)); /* from */
1195       PetscCall(PetscMalloc1(mumps->id.lsol_loc,&isol2_loc));
1196       for (i=0; i<mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i]-1; /* change Fortran style to C style */
1197       PetscCall(ISCreateGeneral(PETSC_COMM_SELF,mumps->id.lsol_loc,isol2_loc,PETSC_OWN_POINTER,&is_petsc));  /* to */
1198       PetscCall(VecScatterCreate(mumps->x_seq,is_iden,x,is_petsc,&mumps->scat_sol));
1199       PetscCall(ISDestroy(&is_iden));
1200       PetscCall(ISDestroy(&is_petsc));
1201       mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */
1202     }
1203 
1204     PetscCall(VecScatterBegin(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD));
1205     PetscCall(VecScatterEnd(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD));
1206   }
1207 
1208   if (mumps->petsc_size > 1) {
1209     if (mumps->ICNTL20 == 10) {
1210       PetscCall(VecRestoreArrayRead(b,&rarray));
1211     } else if (!mumps->myid) {
1212       PetscCall(VecRestoreArray(mumps->b_seq,&array));
1213     }
1214   } else PetscCall(VecRestoreArray(x,&array));
1215 
1216   PetscCall(PetscLogFlops(2.0*mumps->id.RINFO(3)));
1217   PetscFunctionReturn(0);
1218 }
1219 
1220 PetscErrorCode MatSolveTranspose_MUMPS(Mat A,Vec b,Vec x)
1221 {
1222   Mat_MUMPS      *mumps=(Mat_MUMPS*)A->data;
1223 
1224   PetscFunctionBegin;
1225   mumps->id.ICNTL(9) = 0;
1226   PetscCall(MatSolve_MUMPS(A,b,x));
1227   mumps->id.ICNTL(9) = 1;
1228   PetscFunctionReturn(0);
1229 }
1230 
1231 PetscErrorCode MatMatSolve_MUMPS(Mat A,Mat B,Mat X)
1232 {
1233   Mat               Bt = NULL;
1234   PetscBool         denseX,denseB,flg,flgT;
1235   Mat_MUMPS         *mumps=(Mat_MUMPS*)A->data;
1236   PetscInt          i,nrhs,M;
1237   PetscScalar       *array;
1238   const PetscScalar *rbray;
1239   PetscInt          lsol_loc,nlsol_loc,*idxx,iidx = 0;
1240   PetscMUMPSInt     *isol_loc,*isol_loc_save;
1241   PetscScalar       *bray,*sol_loc,*sol_loc_save;
1242   IS                is_to,is_from;
1243   PetscInt          k,proc,j,m,myrstart;
1244   const PetscInt    *rstart;
1245   Vec               v_mpi,msol_loc;
1246   VecScatter        scat_sol;
1247   Vec               b_seq;
1248   VecScatter        scat_rhs;
1249   PetscScalar       *aa;
1250   PetscInt          spnr,*ia,*ja;
1251   Mat_MPIAIJ        *b = NULL;
1252 
1253   PetscFunctionBegin;
1254   PetscCall(PetscObjectTypeCompareAny((PetscObject)X,&denseX,MATSEQDENSE,MATMPIDENSE,NULL));
1255   PetscCheck(denseX,PetscObjectComm((PetscObject)X),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix");
1256 
1257   PetscCall(PetscObjectTypeCompareAny((PetscObject)B,&denseB,MATSEQDENSE,MATMPIDENSE,NULL));
1258   if (denseB) {
1259     PetscCheck(B->rmap->n == X->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Matrix B and X must have same row distribution");
1260     mumps->id.ICNTL(20)= 0; /* dense RHS */
1261   } else { /* sparse B */
1262     PetscCheck(X != B,PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_IDN,"X and B must be different matrices");
1263     PetscCall(PetscObjectTypeCompare((PetscObject)B,MATTRANSPOSEMAT,&flgT));
1264     if (flgT) { /* input B is transpose of actural RHS matrix,
1265                  because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */
1266       PetscCall(MatTransposeGetMat(B,&Bt));
1267     } else SETERRQ(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_WRONG,"Matrix B must be MATTRANSPOSEMAT matrix");
1268     mumps->id.ICNTL(20)= 1; /* sparse RHS */
1269   }
1270 
1271   PetscCall(MatGetSize(B,&M,&nrhs));
1272   mumps->id.nrhs = nrhs;
1273   mumps->id.lrhs = M;
1274   mumps->id.rhs  = NULL;
1275 
1276   if (mumps->petsc_size == 1) {
1277     PetscScalar *aa;
1278     PetscInt    spnr,*ia,*ja;
1279     PetscBool   second_solve = PETSC_FALSE;
1280 
1281     PetscCall(MatDenseGetArray(X,&array));
1282     mumps->id.rhs = (MumpsScalar*)array;
1283 
1284     if (denseB) {
1285       /* copy B to X */
1286       PetscCall(MatDenseGetArrayRead(B,&rbray));
1287       PetscCall(PetscArraycpy(array,rbray,M*nrhs));
1288       PetscCall(MatDenseRestoreArrayRead(B,&rbray));
1289     } else { /* sparse B */
1290       PetscCall(MatSeqAIJGetArray(Bt,&aa));
1291       PetscCall(MatGetRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg));
1292       PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure");
1293       PetscCall(PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs));
1294       mumps->id.rhs_sparse  = (MumpsScalar*)aa;
1295     }
1296     /* handle condensation step of Schur complement (if any) */
1297     if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) {
1298       second_solve = PETSC_TRUE;
1299       PetscCall(MatMumpsHandleSchur_Private(A,PETSC_FALSE));
1300     }
1301     /* solve phase */
1302     /*-------------*/
1303     mumps->id.job = JOB_SOLVE;
1304     PetscMUMPS_c(mumps);
1305     PetscCheck(mumps->id.INFOG(1) >= 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d",mumps->id.INFOG(1));
1306 
1307     /* handle expansion step of Schur complement (if any) */
1308     if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A,PETSC_TRUE));
1309     if (!denseB) { /* sparse B */
1310       PetscCall(MatSeqAIJRestoreArray(Bt,&aa));
1311       PetscCall(MatRestoreRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg));
1312       PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure");
1313     }
1314     PetscCall(MatDenseRestoreArray(X,&array));
1315     PetscFunctionReturn(0);
1316   }
1317 
1318   /*--------- parallel case: MUMPS requires rhs B to be centralized on the host! --------*/
1319   PetscCheck(mumps->petsc_size <= 1 || !mumps->id.ICNTL(19),PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc");
1320 
1321   /* create msol_loc to hold mumps local solution */
1322   isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */
1323   sol_loc_save  = (PetscScalar*)mumps->id.sol_loc;
1324 
1325   lsol_loc  = mumps->id.lsol_loc;
1326   nlsol_loc = nrhs*lsol_loc;     /* length of sol_loc */
1327   PetscCall(PetscMalloc2(nlsol_loc,&sol_loc,lsol_loc,&isol_loc));
1328   mumps->id.sol_loc  = (MumpsScalar*)sol_loc;
1329   mumps->id.isol_loc = isol_loc;
1330 
1331   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,nlsol_loc,(PetscScalar*)sol_loc,&msol_loc));
1332 
1333   if (denseB) {
1334     if (mumps->ICNTL20 == 10) {
1335       mumps->id.ICNTL(20) = 10; /* dense distributed RHS */
1336       PetscCall(MatDenseGetArrayRead(B,&rbray));
1337       PetscCall(MatMumpsSetUpDistRHSInfo(A,nrhs,rbray));
1338       PetscCall(MatDenseRestoreArrayRead(B,&rbray));
1339       PetscCall(MatGetLocalSize(B,&m,NULL));
1340       PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),1,nrhs*m,nrhs*M,NULL,&v_mpi));
1341     } else {
1342       mumps->id.ICNTL(20) = 0; /* dense centralized RHS */
1343       /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in
1344         very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank
1345         0, re-arrange B into desired order, which is a local operation.
1346       */
1347 
1348       /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */
1349       /* wrap dense rhs matrix B into a vector v_mpi */
1350       PetscCall(MatGetLocalSize(B,&m,NULL));
1351       PetscCall(MatDenseGetArray(B,&bray));
1352       PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi));
1353       PetscCall(MatDenseRestoreArray(B,&bray));
1354 
1355       /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */
1356       if (!mumps->myid) {
1357         PetscInt *idx;
1358         /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */
1359         PetscCall(PetscMalloc1(nrhs*M,&idx));
1360         PetscCall(MatGetOwnershipRanges(B,&rstart));
1361         k = 0;
1362         for (proc=0; proc<mumps->petsc_size; proc++) {
1363           for (j=0; j<nrhs; j++) {
1364             for (i=rstart[proc]; i<rstart[proc+1]; i++) idx[k++] = j*M + i;
1365           }
1366         }
1367 
1368         PetscCall(VecCreateSeq(PETSC_COMM_SELF,nrhs*M,&b_seq));
1369         PetscCall(ISCreateGeneral(PETSC_COMM_SELF,nrhs*M,idx,PETSC_OWN_POINTER,&is_to));
1370         PetscCall(ISCreateStride(PETSC_COMM_SELF,nrhs*M,0,1,&is_from));
1371       } else {
1372         PetscCall(VecCreateSeq(PETSC_COMM_SELF,0,&b_seq));
1373         PetscCall(ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_to));
1374         PetscCall(ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_from));
1375       }
1376       PetscCall(VecScatterCreate(v_mpi,is_from,b_seq,is_to,&scat_rhs));
1377       PetscCall(VecScatterBegin(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD));
1378       PetscCall(ISDestroy(&is_to));
1379       PetscCall(ISDestroy(&is_from));
1380       PetscCall(VecScatterEnd(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD));
1381 
1382       if (!mumps->myid) { /* define rhs on the host */
1383         PetscCall(VecGetArray(b_seq,&bray));
1384         mumps->id.rhs = (MumpsScalar*)bray;
1385         PetscCall(VecRestoreArray(b_seq,&bray));
1386       }
1387     }
1388   } else { /* sparse B */
1389     b = (Mat_MPIAIJ*)Bt->data;
1390 
1391     /* wrap dense X into a vector v_mpi */
1392     PetscCall(MatGetLocalSize(X,&m,NULL));
1393     PetscCall(MatDenseGetArray(X,&bray));
1394     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)X),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi));
1395     PetscCall(MatDenseRestoreArray(X,&bray));
1396 
1397     if (!mumps->myid) {
1398       PetscCall(MatSeqAIJGetArray(b->A,&aa));
1399       PetscCall(MatGetRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg));
1400       PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure");
1401       PetscCall(PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs));
1402       mumps->id.rhs_sparse  = (MumpsScalar*)aa;
1403     } else {
1404       mumps->id.irhs_ptr    = NULL;
1405       mumps->id.irhs_sparse = NULL;
1406       mumps->id.nz_rhs      = 0;
1407       mumps->id.rhs_sparse  = NULL;
1408     }
1409   }
1410 
1411   /* solve phase */
1412   /*-------------*/
1413   mumps->id.job = JOB_SOLVE;
1414   PetscMUMPS_c(mumps);
1415   PetscCheck(mumps->id.INFOG(1) >= 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d",mumps->id.INFOG(1));
1416 
1417   /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */
1418   PetscCall(MatDenseGetArray(X,&array));
1419   PetscCall(VecPlaceArray(v_mpi,array));
1420 
1421   /* create scatter scat_sol */
1422   PetscCall(MatGetOwnershipRanges(X,&rstart));
1423   /* iidx: index for scatter mumps solution to petsc X */
1424 
1425   PetscCall(ISCreateStride(PETSC_COMM_SELF,nlsol_loc,0,1,&is_from));
1426   PetscCall(PetscMalloc1(nlsol_loc,&idxx));
1427   for (i=0; i<lsol_loc; i++) {
1428     isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */
1429 
1430     for (proc=0; proc<mumps->petsc_size; proc++) {
1431       if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc+1]) {
1432         myrstart = rstart[proc];
1433         k        = isol_loc[i] - myrstart;        /* local index on 1st column of petsc vector X */
1434         iidx     = k + myrstart*nrhs;             /* maps mumps isol_loc[i] to petsc index in X */
1435         m        = rstart[proc+1] - rstart[proc]; /* rows of X for this proc */
1436         break;
1437       }
1438     }
1439 
1440     for (j=0; j<nrhs; j++) idxx[i+j*lsol_loc] = iidx + j*m;
1441   }
1442   PetscCall(ISCreateGeneral(PETSC_COMM_SELF,nlsol_loc,idxx,PETSC_COPY_VALUES,&is_to));
1443   PetscCall(VecScatterCreate(msol_loc,is_from,v_mpi,is_to,&scat_sol));
1444   PetscCall(VecScatterBegin(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD));
1445   PetscCall(ISDestroy(&is_from));
1446   PetscCall(ISDestroy(&is_to));
1447   PetscCall(VecScatterEnd(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD));
1448   PetscCall(MatDenseRestoreArray(X,&array));
1449 
1450   /* free spaces */
1451   mumps->id.sol_loc  = (MumpsScalar*)sol_loc_save;
1452   mumps->id.isol_loc = isol_loc_save;
1453 
1454   PetscCall(PetscFree2(sol_loc,isol_loc));
1455   PetscCall(PetscFree(idxx));
1456   PetscCall(VecDestroy(&msol_loc));
1457   PetscCall(VecDestroy(&v_mpi));
1458   if (!denseB) {
1459     if (!mumps->myid) {
1460       b = (Mat_MPIAIJ*)Bt->data;
1461       PetscCall(MatSeqAIJRestoreArray(b->A,&aa));
1462       PetscCall(MatRestoreRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg));
1463       PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure");
1464     }
1465   } else {
1466     if (mumps->ICNTL20 == 0) {
1467       PetscCall(VecDestroy(&b_seq));
1468       PetscCall(VecScatterDestroy(&scat_rhs));
1469     }
1470   }
1471   PetscCall(VecScatterDestroy(&scat_sol));
1472   PetscCall(PetscLogFlops(2.0*nrhs*mumps->id.RINFO(3)));
1473   PetscFunctionReturn(0);
1474 }
1475 
1476 PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A,Mat Bt,Mat X)
1477 {
1478   PetscBool      flg;
1479   Mat            B;
1480 
1481   PetscFunctionBegin;
1482   PetscCall(PetscObjectTypeCompareAny((PetscObject)Bt,&flg,MATSEQAIJ,MATMPIAIJ,NULL));
1483   PetscCheck(flg,PetscObjectComm((PetscObject)Bt),PETSC_ERR_ARG_WRONG,"Matrix Bt must be MATAIJ matrix");
1484 
1485   /* Create B=Bt^T that uses Bt's data structure */
1486   PetscCall(MatCreateTranspose(Bt,&B));
1487 
1488   PetscCall(MatMatSolve_MUMPS(A,B,X));
1489   PetscCall(MatDestroy(&B));
1490   PetscFunctionReturn(0);
1491 }
1492 
1493 #if !defined(PETSC_USE_COMPLEX)
1494 /*
1495   input:
1496    F:        numeric factor
1497   output:
1498    nneg:     total number of negative pivots
1499    nzero:    total number of zero pivots
1500    npos:     (global dimension of F) - nneg - nzero
1501 */
1502 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F,PetscInt *nneg,PetscInt *nzero,PetscInt *npos)
1503 {
1504   Mat_MUMPS      *mumps =(Mat_MUMPS*)F->data;
1505   PetscMPIInt    size;
1506 
1507   PetscFunctionBegin;
1508   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)F),&size));
1509   /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */
1510   PetscCheck(size <= 1 || mumps->id.ICNTL(13) == 1,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia",mumps->id.INFOG(13));
1511 
1512   if (nneg) *nneg = mumps->id.INFOG(12);
1513   if (nzero || npos) {
1514     PetscCheck(mumps->id.ICNTL(24) == 1,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-mat_mumps_icntl_24 must be set as 1 for null pivot row detection");
1515     if (nzero) *nzero = mumps->id.INFOG(28);
1516     if (npos) *npos   = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28));
1517   }
1518   PetscFunctionReturn(0);
1519 }
1520 #endif
1521 
1522 PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse,Mat_MUMPS *mumps)
1523 {
1524   PetscInt       i,nreqs;
1525   PetscMUMPSInt  *irn,*jcn;
1526   PetscMPIInt    count;
1527   PetscInt64     totnnz,remain;
1528   const PetscInt osize=mumps->omp_comm_size;
1529   PetscScalar    *val;
1530 
1531   PetscFunctionBegin;
1532   if (osize > 1) {
1533     if (reuse == MAT_INITIAL_MATRIX) {
1534       /* master first gathers counts of nonzeros to receive */
1535       if (mumps->is_omp_master) PetscCall(PetscMalloc1(osize,&mumps->recvcount));
1536       PetscCallMPI(MPI_Gather(&mumps->nnz,1,MPIU_INT64,mumps->recvcount,1,MPIU_INT64,0/*master*/,mumps->omp_comm));
1537 
1538       /* Then each computes number of send/recvs */
1539       if (mumps->is_omp_master) {
1540         /* Start from 1 since self communication is not done in MPI */
1541         nreqs = 0;
1542         for (i=1; i<osize; i++) nreqs += (mumps->recvcount[i]+PETSC_MPI_INT_MAX-1)/PETSC_MPI_INT_MAX;
1543       } else {
1544         nreqs = (mumps->nnz+PETSC_MPI_INT_MAX-1)/PETSC_MPI_INT_MAX;
1545       }
1546       PetscCall(PetscMalloc1(nreqs*3,&mumps->reqs)); /* Triple the requests since we send irn, jcn and val seperately */
1547 
1548       /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others.
1549          MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz
1550          might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size
1551          is very small, the current approach should have no extra overhead compared to MPI_Gatherv.
1552        */
1553       nreqs = 0; /* counter for actual send/recvs */
1554       if (mumps->is_omp_master) {
1555         for (i=0,totnnz=0; i<osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */
1556         PetscCall(PetscMalloc2(totnnz,&irn,totnnz,&jcn));
1557         PetscCall(PetscMalloc1(totnnz,&val));
1558 
1559         /* Self communication */
1560         PetscCall(PetscArraycpy(irn,mumps->irn,mumps->nnz));
1561         PetscCall(PetscArraycpy(jcn,mumps->jcn,mumps->nnz));
1562         PetscCall(PetscArraycpy(val,mumps->val,mumps->nnz));
1563 
1564         /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */
1565         PetscCall(PetscFree2(mumps->irn,mumps->jcn));
1566         PetscCall(PetscFree(mumps->val_alloc));
1567         mumps->nnz = totnnz;
1568         mumps->irn = irn;
1569         mumps->jcn = jcn;
1570         mumps->val = mumps->val_alloc = val;
1571 
1572         irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */
1573         jcn += mumps->recvcount[0];
1574         val += mumps->recvcount[0];
1575 
1576         /* Remote communication */
1577         for (i=1; i<osize; i++) {
1578           count  = PetscMin(mumps->recvcount[i],PETSC_MPI_INT_MAX);
1579           remain = mumps->recvcount[i] - count;
1580           while (count>0) {
1581             PetscCallMPI(MPI_Irecv(irn,count,MPIU_MUMPSINT,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]));
1582             PetscCallMPI(MPI_Irecv(jcn,count,MPIU_MUMPSINT,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]));
1583             PetscCallMPI(MPI_Irecv(val,count,MPIU_SCALAR,  i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]));
1584             irn    += count;
1585             jcn    += count;
1586             val    += count;
1587             count   = PetscMin(remain,PETSC_MPI_INT_MAX);
1588             remain -= count;
1589           }
1590         }
1591       } else {
1592         irn    = mumps->irn;
1593         jcn    = mumps->jcn;
1594         val    = mumps->val;
1595         count  = PetscMin(mumps->nnz,PETSC_MPI_INT_MAX);
1596         remain = mumps->nnz - count;
1597         while (count>0) {
1598           PetscCallMPI(MPI_Isend(irn,count,MPIU_MUMPSINT,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]));
1599           PetscCallMPI(MPI_Isend(jcn,count,MPIU_MUMPSINT,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]));
1600           PetscCallMPI(MPI_Isend(val,count,MPIU_SCALAR,  0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]));
1601           irn    += count;
1602           jcn    += count;
1603           val    += count;
1604           count   = PetscMin(remain,PETSC_MPI_INT_MAX);
1605           remain -= count;
1606         }
1607       }
1608     } else {
1609       nreqs = 0;
1610       if (mumps->is_omp_master) {
1611         val = mumps->val + mumps->recvcount[0];
1612         for (i=1; i<osize; i++) { /* Remote communication only since self data is already in place */
1613           count  = PetscMin(mumps->recvcount[i],PETSC_MPI_INT_MAX);
1614           remain = mumps->recvcount[i] - count;
1615           while (count>0) {
1616             PetscCallMPI(MPI_Irecv(val,count,MPIU_SCALAR,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]));
1617             val    += count;
1618             count   = PetscMin(remain,PETSC_MPI_INT_MAX);
1619             remain -= count;
1620           }
1621         }
1622       } else {
1623         val    = mumps->val;
1624         count  = PetscMin(mumps->nnz,PETSC_MPI_INT_MAX);
1625         remain = mumps->nnz - count;
1626         while (count>0) {
1627           PetscCallMPI(MPI_Isend(val,count,MPIU_SCALAR,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]));
1628           val    += count;
1629           count   = PetscMin(remain,PETSC_MPI_INT_MAX);
1630           remain -= count;
1631         }
1632       }
1633     }
1634     PetscCallMPI(MPI_Waitall(nreqs,mumps->reqs,MPI_STATUSES_IGNORE));
1635     mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */
1636   }
1637   PetscFunctionReturn(0);
1638 }
1639 
1640 PetscErrorCode MatFactorNumeric_MUMPS(Mat F,Mat A,const MatFactorInfo *info)
1641 {
1642   Mat_MUMPS      *mumps =(Mat_MUMPS*)(F)->data;
1643   PetscBool      isMPIAIJ;
1644 
1645   PetscFunctionBegin;
1646   if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) {
1647     if (mumps->id.INFOG(1) == -6) {
1648       PetscCall(PetscInfo(A,"MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)));
1649     }
1650     PetscCall(PetscInfo(A,"MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)));
1651     PetscFunctionReturn(0);
1652   }
1653 
1654   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps));
1655   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX,mumps));
1656 
1657   /* numerical factorization phase */
1658   /*-------------------------------*/
1659   mumps->id.job = JOB_FACTNUMERIC;
1660   if (!mumps->id.ICNTL(18)) { /* A is centralized */
1661     if (!mumps->myid) {
1662       mumps->id.a = (MumpsScalar*)mumps->val;
1663     }
1664   } else {
1665     mumps->id.a_loc = (MumpsScalar*)mumps->val;
1666   }
1667   PetscMUMPS_c(mumps);
1668   if (mumps->id.INFOG(1) < 0) {
1669     if (A->erroriffailure) {
1670       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d",mumps->id.INFOG(1),mumps->id.INFO(2));
1671     } else {
1672       if (mumps->id.INFOG(1) == -10) { /* numerically singular matrix */
1673         PetscCall(PetscInfo(F,"matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)));
1674         F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
1675       } else if (mumps->id.INFOG(1) == -13) {
1676         PetscCall(PetscInfo(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, cannot allocate required memory %d megabytes\n",mumps->id.INFOG(1),mumps->id.INFO(2)));
1677         F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1678       } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) {
1679         PetscCall(PetscInfo(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d, problem with workarray \n",mumps->id.INFOG(1),mumps->id.INFO(2)));
1680         F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1681       } else {
1682         PetscCall(PetscInfo(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)));
1683         F->factorerrortype = MAT_FACTOR_OTHER;
1684       }
1685     }
1686   }
1687   PetscCheck(mumps->myid || mumps->id.ICNTL(16) <= 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"  mumps->id.ICNTL(16):=%d",mumps->id.INFOG(16));
1688 
1689   F->assembled    = PETSC_TRUE;
1690 
1691   if (F->schur) { /* reset Schur status to unfactored */
1692 #if defined(PETSC_HAVE_CUDA)
1693     F->schur->offloadmask = PETSC_OFFLOAD_CPU;
1694 #endif
1695     if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
1696       mumps->id.ICNTL(19) = 2;
1697       PetscCall(MatTranspose(F->schur,MAT_INPLACE_MATRIX,&F->schur));
1698     }
1699     PetscCall(MatFactorRestoreSchurComplement(F,NULL,MAT_FACTOR_SCHUR_UNFACTORED));
1700   }
1701 
1702   /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */
1703   if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3;
1704 
1705   if (!mumps->is_omp_master) mumps->id.INFO(23) = 0;
1706   if (mumps->petsc_size > 1) {
1707     PetscInt    lsol_loc;
1708     PetscScalar *sol_loc;
1709 
1710     PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&isMPIAIJ));
1711 
1712     /* distributed solution; Create x_seq=sol_loc for repeated use */
1713     if (mumps->x_seq) {
1714       PetscCall(VecScatterDestroy(&mumps->scat_sol));
1715       PetscCall(PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc));
1716       PetscCall(VecDestroy(&mumps->x_seq));
1717     }
1718     lsol_loc = mumps->id.INFO(23); /* length of sol_loc */
1719     PetscCall(PetscMalloc2(lsol_loc,&sol_loc,lsol_loc,&mumps->id.isol_loc));
1720     mumps->id.lsol_loc = lsol_loc;
1721     mumps->id.sol_loc = (MumpsScalar*)sol_loc;
1722     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,lsol_loc,sol_loc,&mumps->x_seq));
1723   }
1724   PetscCall(PetscLogFlops(mumps->id.RINFO(2)));
1725   PetscFunctionReturn(0);
1726 }
1727 
1728 /* Sets MUMPS options from the options database */
1729 PetscErrorCode MatSetFromOptions_MUMPS(Mat F, Mat A)
1730 {
1731   Mat_MUMPS      *mumps = (Mat_MUMPS*)F->data;
1732   PetscMUMPSInt  icntl=0,size,*listvar_schur;
1733   PetscInt       info[80],i,ninfo=80,rbs,cbs;
1734   PetscBool      flg=PETSC_FALSE,schur=(PetscBool)(mumps->id.ICNTL(26) == -1);
1735   MumpsScalar    *arr;
1736 
1737   PetscFunctionBegin;
1738   PetscOptionsBegin(PetscObjectComm((PetscObject)F),((PetscObject)F)->prefix,"MUMPS Options","Mat");
1739   if (mumps->id.job == JOB_NULL) { /* MatSetFromOptions_MUMPS() has never been called before */
1740     PetscInt nthreads = 0;
1741     PetscInt nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0;
1742     PetscInt nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0;
1743 
1744     mumps->petsc_comm = PetscObjectComm((PetscObject)A);
1745     PetscCallMPI(MPI_Comm_size(mumps->petsc_comm,&mumps->petsc_size));
1746     PetscCallMPI(MPI_Comm_rank(mumps->petsc_comm,&mumps->myid));/* "if (!myid)" still works even if mumps_comm is different */
1747 
1748     PetscCall(PetscOptionsName("-mat_mumps_use_omp_threads","Convert MPI processes into OpenMP threads","None",&mumps->use_petsc_omp_support));
1749     if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */
1750     /* do not use PetscOptionsInt() so that the option -mat_mumps_use_omp_threads is not displayed twice in the help */
1751     PetscCall(PetscOptionsGetInt(NULL,((PetscObject)F)->prefix,"-mat_mumps_use_omp_threads",&nthreads,NULL));
1752     if (mumps->use_petsc_omp_support) {
1753       PetscCheck(PetscDefined(HAVE_OPENMP_SUPPORT),PETSC_COMM_SELF,PETSC_ERR_SUP_SYS,"The system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual",((PetscObject)F)->prefix?((PetscObject)F)->prefix:"");
1754       PetscCheck(!schur,PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot use -%smat_mumps_use_omp_threads with the Schur complement feature",((PetscObject)F)->prefix?((PetscObject)F)->prefix:"");
1755 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1756       PetscCall(PetscOmpCtrlCreate(mumps->petsc_comm,nthreads,&mumps->omp_ctrl));
1757       PetscCall(PetscOmpCtrlGetOmpComms(mumps->omp_ctrl,&mumps->omp_comm,&mumps->mumps_comm,&mumps->is_omp_master));
1758 #endif
1759     } else {
1760       mumps->omp_comm      = PETSC_COMM_SELF;
1761       mumps->mumps_comm    = mumps->petsc_comm;
1762       mumps->is_omp_master = PETSC_TRUE;
1763     }
1764     PetscCallMPI(MPI_Comm_size(mumps->omp_comm,&mumps->omp_comm_size));
1765     mumps->reqs = NULL;
1766     mumps->tag  = 0;
1767 
1768     if (mumps->mumps_comm != MPI_COMM_NULL) {
1769       if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) {
1770         /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */
1771         MPI_Comm comm;
1772         PetscCallMPI(MPI_Comm_dup(mumps->mumps_comm,&comm));
1773         mumps->mumps_comm = comm;
1774       } else PetscCall(PetscCommGetComm(mumps->petsc_comm,&mumps->mumps_comm));
1775     }
1776 
1777     mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm);
1778     mumps->id.job = JOB_INIT;
1779     mumps->id.par = 1;  /* host participates factorizaton and solve */
1780     mumps->id.sym = mumps->sym;
1781 
1782     size = mumps->id.size_schur;
1783     arr = mumps->id.schur;
1784     listvar_schur = mumps->id.listvar_schur;
1785     PetscMUMPS_c(mumps);
1786     PetscCheck(mumps->id.INFOG(1) >= 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS: INFOG(1)=%d",mumps->id.INFOG(1));
1787     /* restore cached ICNTL and CNTL values */
1788     for (icntl = 0; icntl < nICNTL_pre; ++icntl) mumps->id.ICNTL(mumps->ICNTL_pre[1+2*icntl]) = mumps->ICNTL_pre[2+2*icntl];
1789     for (icntl = 0; icntl < nCNTL_pre; ++icntl)  mumps->id.CNTL((PetscInt)mumps->CNTL_pre[1+2*icntl]) = mumps->CNTL_pre[2+2*icntl];
1790     PetscCall(PetscFree(mumps->ICNTL_pre));
1791     PetscCall(PetscFree(mumps->CNTL_pre));
1792 
1793     if (schur) {
1794       mumps->id.size_schur = size;
1795       mumps->id.schur_lld  = size;
1796       mumps->id.schur = arr;
1797       mumps->id.listvar_schur = listvar_schur;
1798       if (mumps->petsc_size > 1) {
1799         PetscBool gs; /* gs is false if any rank other than root has non-empty IS */
1800 
1801         mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */
1802         gs = mumps->myid ? (mumps->id.size_schur ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */
1803         PetscCallMPI(MPI_Allreduce(MPI_IN_PLACE,&gs,1,MPIU_BOOL,MPI_LAND,mumps->petsc_comm));
1804         PetscCheck(gs,PETSC_COMM_SELF,PETSC_ERR_SUP,"MUMPS distributed parallel Schur complements not yet supported from PETSc");
1805       } else {
1806         if (F->factortype == MAT_FACTOR_LU) {
1807           mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */
1808         } else {
1809           mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */
1810         }
1811       }
1812       mumps->id.ICNTL(26) = -1;
1813     }
1814 
1815     /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code.
1816        For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS.
1817      */
1818     PetscCallMPI(MPI_Bcast(mumps->id.icntl,40,MPI_INT,  0,mumps->omp_comm));
1819     PetscCallMPI(MPI_Bcast(mumps->id.cntl, 15,MPIU_REAL,0,mumps->omp_comm));
1820 
1821     mumps->scat_rhs = NULL;
1822     mumps->scat_sol = NULL;
1823 
1824     /* set PETSc-MUMPS default options - override MUMPS default */
1825     mumps->id.ICNTL(3) = 0;
1826     mumps->id.ICNTL(4) = 0;
1827     if (mumps->petsc_size == 1) {
1828       mumps->id.ICNTL(18) = 0;   /* centralized assembled matrix input */
1829       mumps->id.ICNTL(7)  = 7;   /* automatic choice of ordering done by the package */
1830     } else {
1831       mumps->id.ICNTL(18) = 3;   /* distributed assembled matrix input */
1832       mumps->id.ICNTL(21) = 1;   /* distributed solution */
1833     }
1834   }
1835   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_1","ICNTL(1): output stream for error messages","None",mumps->id.ICNTL(1),&icntl,&flg));
1836   if (flg) mumps->id.ICNTL(1) = icntl;
1837   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_2","ICNTL(2): output stream for diagnostic printing, statistics, and warning","None",mumps->id.ICNTL(2),&icntl,&flg));
1838   if (flg) mumps->id.ICNTL(2) = icntl;
1839   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_3","ICNTL(3): output stream for global information, collected on the host","None",mumps->id.ICNTL(3),&icntl,&flg));
1840   if (flg) mumps->id.ICNTL(3) = icntl;
1841 
1842   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_4","ICNTL(4): level of printing (0 to 4)","None",mumps->id.ICNTL(4),&icntl,&flg));
1843   if (flg) mumps->id.ICNTL(4) = icntl;
1844   if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */
1845 
1846   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_6","ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)","None",mumps->id.ICNTL(6),&icntl,&flg));
1847   if (flg) mumps->id.ICNTL(6) = icntl;
1848 
1849   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_7","ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)","None",mumps->id.ICNTL(7),&icntl,&flg));
1850   if (flg) {
1851     PetscCheck(icntl != 1 && icntl >= 0 && icntl <= 7,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto");
1852     mumps->id.ICNTL(7) = icntl;
1853   }
1854 
1855   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_8","ICNTL(8): scaling strategy (-2 to 8 or 77)","None",mumps->id.ICNTL(8),&mumps->id.ICNTL(8),NULL));
1856   /* PetscCall(PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL)); handled by MatSolveTranspose_MUMPS() */
1857   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_10","ICNTL(10): max num of refinements","None",mumps->id.ICNTL(10),&mumps->id.ICNTL(10),NULL));
1858   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_11","ICNTL(11): statistics related to an error analysis (via -ksp_view)","None",mumps->id.ICNTL(11),&mumps->id.ICNTL(11),NULL));
1859   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_12","ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)","None",mumps->id.ICNTL(12),&mumps->id.ICNTL(12),NULL));
1860   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_13","ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting","None",mumps->id.ICNTL(13),&mumps->id.ICNTL(13),NULL));
1861   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_14","ICNTL(14): percentage increase in the estimated working space","None",mumps->id.ICNTL(14),&mumps->id.ICNTL(14),NULL));
1862   PetscCall(MatGetBlockSizes(A,&rbs,&cbs));
1863   if (rbs == cbs && rbs > 1) mumps->id.ICNTL(15) = -rbs;
1864   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_15","ICNTL(15): compression of the input matrix resulting from a block format","None",mumps->id.ICNTL(15),&mumps->id.ICNTL(15),&flg));
1865   if (flg) {
1866     PetscCheck(mumps->id.ICNTL(15) <= 0,PETSC_COMM_SELF,PETSC_ERR_SUP,"Positive -mat_mumps_icntl_15 not handled");
1867     PetscCheck((-mumps->id.ICNTL(15) % cbs == 0) && (-mumps->id.ICNTL(15) % rbs == 0),PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"The opposite of -mat_mumps_icntl_15 must be a multiple of the column and row blocksizes");
1868   }
1869   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_19","ICNTL(19): computes the Schur complement","None",mumps->id.ICNTL(19),&mumps->id.ICNTL(19),NULL));
1870   if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */
1871     PetscCall(MatDestroy(&F->schur));
1872     PetscCall(MatMumpsResetSchur_Private(mumps));
1873   }
1874 
1875   /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps",
1876      and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran
1877      and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...'
1878      This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug,
1879      see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590.
1880      In short, we could not use distributed RHS with MPICH until v4.0b1.
1881    */
1882 #if PETSC_PKG_MUMPS_VERSION_LT(5,3,0) || (defined(PETSC_HAVE_MPICH_NUMVERSION) && (PETSC_HAVE_MPICH_NUMVERSION < 40000101))
1883   mumps->ICNTL20 = 0;  /* Centralized dense RHS*/
1884 #else
1885   mumps->ICNTL20 = 10; /* Distributed dense RHS*/
1886 #endif
1887   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_20","ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides","None",mumps->ICNTL20,&mumps->ICNTL20,&flg));
1888   PetscCheck(!flg || mumps->ICNTL20 == 10 || mumps->ICNTL20 == 0,PETSC_COMM_SELF,PETSC_ERR_SUP,"ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10",(int)mumps->ICNTL20);
1889 #if PETSC_PKG_MUMPS_VERSION_LT(5,3,0)
1890   PetscCheck(!flg || mumps->ICNTL20 != 10,PETSC_COMM_SELF,PETSC_ERR_SUP,"ICNTL(20)=10 is not supported before MUMPS-5.3.0");
1891 #endif
1892   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL)); we only use distributed solution vector */
1893 
1894   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_22","ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)","None",mumps->id.ICNTL(22),&mumps->id.ICNTL(22),NULL));
1895   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_23","ICNTL(23): max size of the working memory (MB) that can allocate per processor","None",mumps->id.ICNTL(23),&mumps->id.ICNTL(23),NULL));
1896   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_24","ICNTL(24): detection of null pivot rows (0 or 1)","None",mumps->id.ICNTL(24),&mumps->id.ICNTL(24),NULL));
1897   if (mumps->id.ICNTL(24)) {
1898     mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */
1899   }
1900 
1901   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_25","ICNTL(25): computes a solution of a deficient matrix and a null space basis","None",mumps->id.ICNTL(25),&mumps->id.ICNTL(25),NULL));
1902   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_26","ICNTL(26): drives the solution phase if a Schur complement matrix","None",mumps->id.ICNTL(26),&mumps->id.ICNTL(26),NULL));
1903   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_27","ICNTL(27): controls the blocking size for multiple right-hand sides","None",mumps->id.ICNTL(27),&mumps->id.ICNTL(27),NULL));
1904   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_28","ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering","None",mumps->id.ICNTL(28),&mumps->id.ICNTL(28),NULL));
1905   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_29","ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis","None",mumps->id.ICNTL(29),&mumps->id.ICNTL(29),NULL));
1906   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL)); */ /* call MatMumpsGetInverse() directly */
1907   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_31","ICNTL(31): indicates which factors may be discarded during factorization","None",mumps->id.ICNTL(31),&mumps->id.ICNTL(31),NULL));
1908   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elemination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL));  -- not supported by PETSc API */
1909   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_33","ICNTL(33): compute determinant","None",mumps->id.ICNTL(33),&mumps->id.ICNTL(33),NULL));
1910   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_35","ICNTL(35): activates Block Low Rank (BLR) based factorization","None",mumps->id.ICNTL(35),&mumps->id.ICNTL(35),NULL));
1911   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_36","ICNTL(36): choice of BLR factorization variant","None",mumps->id.ICNTL(36),&mumps->id.ICNTL(36),NULL));
1912   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_38","ICNTL(38): estimated compression rate of LU factors with BLR","None",mumps->id.ICNTL(38),&mumps->id.ICNTL(38),NULL));
1913 
1914   PetscCall(PetscOptionsReal("-mat_mumps_cntl_1","CNTL(1): relative pivoting threshold","None",mumps->id.CNTL(1),&mumps->id.CNTL(1),NULL));
1915   PetscCall(PetscOptionsReal("-mat_mumps_cntl_2","CNTL(2): stopping criterion of refinement","None",mumps->id.CNTL(2),&mumps->id.CNTL(2),NULL));
1916   PetscCall(PetscOptionsReal("-mat_mumps_cntl_3","CNTL(3): absolute pivoting threshold","None",mumps->id.CNTL(3),&mumps->id.CNTL(3),NULL));
1917   PetscCall(PetscOptionsReal("-mat_mumps_cntl_4","CNTL(4): value for static pivoting","None",mumps->id.CNTL(4),&mumps->id.CNTL(4),NULL));
1918   PetscCall(PetscOptionsReal("-mat_mumps_cntl_5","CNTL(5): fixation for null pivots","None",mumps->id.CNTL(5),&mumps->id.CNTL(5),NULL));
1919   PetscCall(PetscOptionsReal("-mat_mumps_cntl_7","CNTL(7): dropping parameter used during BLR","None",mumps->id.CNTL(7),&mumps->id.CNTL(7),NULL));
1920 
1921   PetscCall(PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL));
1922 
1923   PetscCall(PetscOptionsIntArray("-mat_mumps_view_info","request INFO local to each processor","",info,&ninfo,NULL));
1924   if (ninfo) {
1925     PetscCheck(ninfo <= 80,PETSC_COMM_SELF,PETSC_ERR_USER,"number of INFO %" PetscInt_FMT " must <= 80",ninfo);
1926     PetscCall(PetscMalloc1(ninfo,&mumps->info));
1927     mumps->ninfo = ninfo;
1928     for (i=0; i<ninfo; i++) {
1929       PetscCheck(info[i] >= 0 && info[i] <= 80,PETSC_COMM_SELF,PETSC_ERR_USER,"index of INFO %" PetscInt_FMT " must between 1 and 80",ninfo);
1930       mumps->info[i] = info[i];
1931     }
1932   }
1933   PetscOptionsEnd();
1934   PetscFunctionReturn(0);
1935 }
1936 
1937 PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F,Mat A,const MatFactorInfo *info,Mat_MUMPS *mumps)
1938 {
1939   PetscFunctionBegin;
1940   if (mumps->id.INFOG(1) < 0) {
1941     if (A->erroriffailure) {
1942       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in analysis phase: INFOG(1)=%d",mumps->id.INFOG(1));
1943     } else {
1944       if (mumps->id.INFOG(1) == -6) {
1945         PetscCall(PetscInfo(F,"matrix is singular in structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)));
1946         F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT;
1947       } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) {
1948         PetscCall(PetscInfo(F,"problem of workspace, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)));
1949         F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1950       } else if (mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0) {
1951         PetscCall(PetscInfo(F,"Empty matrix\n"));
1952       } else {
1953         PetscCall(PetscInfo(F,"Error reported by MUMPS in analysis phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)));
1954         F->factorerrortype = MAT_FACTOR_OTHER;
1955       }
1956     }
1957   }
1958   PetscFunctionReturn(0);
1959 }
1960 
1961 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
1962 {
1963   Mat_MUMPS      *mumps = (Mat_MUMPS*)F->data;
1964   Vec            b;
1965   const PetscInt M = A->rmap->N;
1966 
1967   PetscFunctionBegin;
1968   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
1969     /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */
1970     PetscFunctionReturn(0);
1971   }
1972 
1973   /* Set MUMPS options from the options database */
1974   PetscCall(MatSetFromOptions_MUMPS(F,A));
1975 
1976   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
1977   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps));
1978 
1979   /* analysis phase */
1980   /*----------------*/
1981   mumps->id.job = JOB_FACTSYMBOLIC;
1982   mumps->id.n   = M;
1983   switch (mumps->id.ICNTL(18)) {
1984   case 0:  /* centralized assembled matrix input */
1985     if (!mumps->myid) {
1986       mumps->id.nnz = mumps->nnz;
1987       mumps->id.irn = mumps->irn;
1988       mumps->id.jcn = mumps->jcn;
1989       if (mumps->id.ICNTL(6)>1) mumps->id.a = (MumpsScalar*)mumps->val;
1990       if (r) {
1991         mumps->id.ICNTL(7) = 1;
1992         if (!mumps->myid) {
1993           const PetscInt *idx;
1994           PetscInt       i;
1995 
1996           PetscCall(PetscMalloc1(M,&mumps->id.perm_in));
1997           PetscCall(ISGetIndices(r,&idx));
1998           for (i=0; i<M; i++) PetscCall(PetscMUMPSIntCast(idx[i]+1,&(mumps->id.perm_in[i]))); /* perm_in[]: start from 1, not 0! */
1999           PetscCall(ISRestoreIndices(r,&idx));
2000         }
2001       }
2002     }
2003     break;
2004   case 3:  /* distributed assembled matrix input (size>1) */
2005     mumps->id.nnz_loc = mumps->nnz;
2006     mumps->id.irn_loc = mumps->irn;
2007     mumps->id.jcn_loc = mumps->jcn;
2008     if (mumps->id.ICNTL(6)>1) mumps->id.a_loc = (MumpsScalar*)mumps->val;
2009     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2010       PetscCall(MatCreateVecs(A,NULL,&b));
2011       PetscCall(VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq));
2012       PetscCall(VecDestroy(&b));
2013     }
2014     break;
2015   }
2016   PetscMUMPS_c(mumps);
2017   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps));
2018 
2019   F->ops->lufactornumeric = MatFactorNumeric_MUMPS;
2020   F->ops->solve           = MatSolve_MUMPS;
2021   F->ops->solvetranspose  = MatSolveTranspose_MUMPS;
2022   F->ops->matsolve        = MatMatSolve_MUMPS;
2023   F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS;
2024 
2025   mumps->matstruc = SAME_NONZERO_PATTERN;
2026   PetscFunctionReturn(0);
2027 }
2028 
2029 /* Note the Petsc r and c permutations are ignored */
2030 PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
2031 {
2032   Mat_MUMPS      *mumps = (Mat_MUMPS*)F->data;
2033   Vec            b;
2034   const PetscInt M = A->rmap->N;
2035 
2036   PetscFunctionBegin;
2037   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
2038     /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */
2039     PetscFunctionReturn(0);
2040   }
2041 
2042   /* Set MUMPS options from the options database */
2043   PetscCall(MatSetFromOptions_MUMPS(F,A));
2044 
2045   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
2046   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps));
2047 
2048   /* analysis phase */
2049   /*----------------*/
2050   mumps->id.job = JOB_FACTSYMBOLIC;
2051   mumps->id.n   = M;
2052   switch (mumps->id.ICNTL(18)) {
2053   case 0:  /* centralized assembled matrix input */
2054     if (!mumps->myid) {
2055       mumps->id.nnz = mumps->nnz;
2056       mumps->id.irn = mumps->irn;
2057       mumps->id.jcn = mumps->jcn;
2058       if (mumps->id.ICNTL(6)>1) {
2059         mumps->id.a = (MumpsScalar*)mumps->val;
2060       }
2061     }
2062     break;
2063   case 3:  /* distributed assembled matrix input (size>1) */
2064     mumps->id.nnz_loc = mumps->nnz;
2065     mumps->id.irn_loc = mumps->irn;
2066     mumps->id.jcn_loc = mumps->jcn;
2067     if (mumps->id.ICNTL(6)>1) {
2068       mumps->id.a_loc = (MumpsScalar*)mumps->val;
2069     }
2070     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2071       PetscCall(MatCreateVecs(A,NULL,&b));
2072       PetscCall(VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq));
2073       PetscCall(VecDestroy(&b));
2074     }
2075     break;
2076   }
2077   PetscMUMPS_c(mumps);
2078   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps));
2079 
2080   F->ops->lufactornumeric = MatFactorNumeric_MUMPS;
2081   F->ops->solve           = MatSolve_MUMPS;
2082   F->ops->solvetranspose  = MatSolveTranspose_MUMPS;
2083 
2084   mumps->matstruc = SAME_NONZERO_PATTERN;
2085   PetscFunctionReturn(0);
2086 }
2087 
2088 /* Note the Petsc r permutation and factor info are ignored */
2089 PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F,Mat A,IS r,const MatFactorInfo *info)
2090 {
2091   Mat_MUMPS      *mumps = (Mat_MUMPS*)F->data;
2092   Vec            b;
2093   const PetscInt M = A->rmap->N;
2094 
2095   PetscFunctionBegin;
2096   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
2097     /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */
2098     PetscFunctionReturn(0);
2099   }
2100 
2101   /* Set MUMPS options from the options database */
2102   PetscCall(MatSetFromOptions_MUMPS(F,A));
2103 
2104   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
2105   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps));
2106 
2107   /* analysis phase */
2108   /*----------------*/
2109   mumps->id.job = JOB_FACTSYMBOLIC;
2110   mumps->id.n   = M;
2111   switch (mumps->id.ICNTL(18)) {
2112   case 0:  /* centralized assembled matrix input */
2113     if (!mumps->myid) {
2114       mumps->id.nnz = mumps->nnz;
2115       mumps->id.irn = mumps->irn;
2116       mumps->id.jcn = mumps->jcn;
2117       if (mumps->id.ICNTL(6)>1) {
2118         mumps->id.a = (MumpsScalar*)mumps->val;
2119       }
2120     }
2121     break;
2122   case 3:  /* distributed assembled matrix input (size>1) */
2123     mumps->id.nnz_loc = mumps->nnz;
2124     mumps->id.irn_loc = mumps->irn;
2125     mumps->id.jcn_loc = mumps->jcn;
2126     if (mumps->id.ICNTL(6)>1) {
2127       mumps->id.a_loc = (MumpsScalar*)mumps->val;
2128     }
2129     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2130       PetscCall(MatCreateVecs(A,NULL,&b));
2131       PetscCall(VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq));
2132       PetscCall(VecDestroy(&b));
2133     }
2134     break;
2135   }
2136   PetscMUMPS_c(mumps);
2137   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps));
2138 
2139   F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS;
2140   F->ops->solve                 = MatSolve_MUMPS;
2141   F->ops->solvetranspose        = MatSolve_MUMPS;
2142   F->ops->matsolve              = MatMatSolve_MUMPS;
2143   F->ops->mattransposesolve     = MatMatTransposeSolve_MUMPS;
2144 #if defined(PETSC_USE_COMPLEX)
2145   F->ops->getinertia = NULL;
2146 #else
2147   F->ops->getinertia = MatGetInertia_SBAIJMUMPS;
2148 #endif
2149 
2150   mumps->matstruc = SAME_NONZERO_PATTERN;
2151   PetscFunctionReturn(0);
2152 }
2153 
2154 PetscErrorCode MatView_MUMPS(Mat A,PetscViewer viewer)
2155 {
2156   PetscBool         iascii;
2157   PetscViewerFormat format;
2158   Mat_MUMPS         *mumps=(Mat_MUMPS*)A->data;
2159 
2160   PetscFunctionBegin;
2161   /* check if matrix is mumps type */
2162   if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(0);
2163 
2164   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
2165   if (iascii) {
2166     PetscCall(PetscViewerGetFormat(viewer,&format));
2167     if (format == PETSC_VIEWER_ASCII_INFO) {
2168       PetscCall(PetscViewerASCIIPrintf(viewer,"MUMPS run parameters:\n"));
2169       PetscCall(PetscViewerASCIIPrintf(viewer,"  SYM (matrix type):                   %d\n",mumps->id.sym));
2170       PetscCall(PetscViewerASCIIPrintf(viewer,"  PAR (host participation):            %d\n",mumps->id.par));
2171       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(1) (output for error):         %d\n",mumps->id.ICNTL(1)));
2172       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(2) (output of diagnostic msg): %d\n",mumps->id.ICNTL(2)));
2173       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(3) (output for global info):   %d\n",mumps->id.ICNTL(3)));
2174       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(4) (level of printing):        %d\n",mumps->id.ICNTL(4)));
2175       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(5) (input mat struct):         %d\n",mumps->id.ICNTL(5)));
2176       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(6) (matrix prescaling):        %d\n",mumps->id.ICNTL(6)));
2177       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(7) (sequential matrix ordering):%d\n",mumps->id.ICNTL(7)));
2178       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(8) (scaling strategy):        %d\n",mumps->id.ICNTL(8)));
2179       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(10) (max num of refinements):  %d\n",mumps->id.ICNTL(10)));
2180       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(11) (error analysis):          %d\n",mumps->id.ICNTL(11)));
2181       if (mumps->id.ICNTL(11)>0) {
2182         PetscCall(PetscViewerASCIIPrintf(viewer,"    RINFOG(4) (inf norm of input mat):        %g\n",mumps->id.RINFOG(4)));
2183         PetscCall(PetscViewerASCIIPrintf(viewer,"    RINFOG(5) (inf norm of solution):         %g\n",mumps->id.RINFOG(5)));
2184         PetscCall(PetscViewerASCIIPrintf(viewer,"    RINFOG(6) (inf norm of residual):         %g\n",mumps->id.RINFOG(6)));
2185         PetscCall(PetscViewerASCIIPrintf(viewer,"    RINFOG(7),RINFOG(8) (backward error est): %g, %g\n",mumps->id.RINFOG(7),mumps->id.RINFOG(8)));
2186         PetscCall(PetscViewerASCIIPrintf(viewer,"    RINFOG(9) (error estimate):               %g \n",mumps->id.RINFOG(9)));
2187         PetscCall(PetscViewerASCIIPrintf(viewer,"    RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n",mumps->id.RINFOG(10),mumps->id.RINFOG(11)));
2188       }
2189       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(12) (efficiency control):                         %d\n",mumps->id.ICNTL(12)));
2190       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(13) (sequential factorization of the root node):  %d\n",mumps->id.ICNTL(13)));
2191       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(14) (percentage of estimated workspace increase): %d\n",mumps->id.ICNTL(14)));
2192       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(15) (compression of the input matrix):            %d\n",mumps->id.ICNTL(15)));
2193       /* ICNTL(15-17) not used */
2194       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(18) (input mat struct):                           %d\n",mumps->id.ICNTL(18)));
2195       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(19) (Schur complement info):                      %d\n",mumps->id.ICNTL(19)));
2196       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(20) (RHS sparse pattern):                         %d\n",mumps->id.ICNTL(20)));
2197       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(21) (solution struct):                            %d\n",mumps->id.ICNTL(21)));
2198       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(22) (in-core/out-of-core facility):               %d\n",mumps->id.ICNTL(22)));
2199       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(23) (max size of memory can be allocated locally):%d\n",mumps->id.ICNTL(23)));
2200 
2201       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(24) (detection of null pivot rows):               %d\n",mumps->id.ICNTL(24)));
2202       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(25) (computation of a null space basis):          %d\n",mumps->id.ICNTL(25)));
2203       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(26) (Schur options for RHS or solution):          %d\n",mumps->id.ICNTL(26)));
2204       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(27) (blocking size for multiple RHS):             %d\n",mumps->id.ICNTL(27)));
2205       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(28) (use parallel or sequential ordering):        %d\n",mumps->id.ICNTL(28)));
2206       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(29) (parallel ordering):                          %d\n",mumps->id.ICNTL(29)));
2207 
2208       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(30) (user-specified set of entries in inv(A)):    %d\n",mumps->id.ICNTL(30)));
2209       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(31) (factors is discarded in the solve phase):    %d\n",mumps->id.ICNTL(31)));
2210       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(33) (compute determinant):                        %d\n",mumps->id.ICNTL(33)));
2211       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(35) (activate BLR based factorization):           %d\n",mumps->id.ICNTL(35)));
2212       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(36) (choice of BLR factorization variant):        %d\n",mumps->id.ICNTL(36)));
2213       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(38) (estimated compression rate of LU factors):   %d\n",mumps->id.ICNTL(38)));
2214 
2215       PetscCall(PetscViewerASCIIPrintf(viewer,"  CNTL(1) (relative pivoting threshold):      %g \n",mumps->id.CNTL(1)));
2216       PetscCall(PetscViewerASCIIPrintf(viewer,"  CNTL(2) (stopping criterion of refinement): %g \n",mumps->id.CNTL(2)));
2217       PetscCall(PetscViewerASCIIPrintf(viewer,"  CNTL(3) (absolute pivoting threshold):      %g \n",mumps->id.CNTL(3)));
2218       PetscCall(PetscViewerASCIIPrintf(viewer,"  CNTL(4) (value of static pivoting):         %g \n",mumps->id.CNTL(4)));
2219       PetscCall(PetscViewerASCIIPrintf(viewer,"  CNTL(5) (fixation for null pivots):         %g \n",mumps->id.CNTL(5)));
2220       PetscCall(PetscViewerASCIIPrintf(viewer,"  CNTL(7) (dropping parameter for BLR):       %g \n",mumps->id.CNTL(7)));
2221 
2222       /* information local to each processor */
2223       PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(1) (local estimated flops for the elimination after analysis): \n"));
2224       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
2225       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"    [%d] %g \n",mumps->myid,mumps->id.RINFO(1)));
2226       PetscCall(PetscViewerFlush(viewer));
2227       PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(2) (local estimated flops for the assembly after factorization): \n"));
2228       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"    [%d]  %g \n",mumps->myid,mumps->id.RINFO(2)));
2229       PetscCall(PetscViewerFlush(viewer));
2230       PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(3) (local estimated flops for the elimination after factorization): \n"));
2231       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"    [%d]  %g \n",mumps->myid,mumps->id.RINFO(3)));
2232       PetscCall(PetscViewerFlush(viewer));
2233 
2234       PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization): \n"));
2235       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] %d\n",mumps->myid,mumps->id.INFO(15)));
2236       PetscCall(PetscViewerFlush(viewer));
2237 
2238       PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization): \n"));
2239       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"    [%d] %d\n",mumps->myid,mumps->id.INFO(16)));
2240       PetscCall(PetscViewerFlush(viewer));
2241 
2242       PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(23) (num of pivots eliminated on this processor after factorization): \n"));
2243       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"    [%d] %d\n",mumps->myid,mumps->id.INFO(23)));
2244       PetscCall(PetscViewerFlush(viewer));
2245 
2246       if (mumps->ninfo && mumps->ninfo <= 80) {
2247         PetscInt i;
2248         for (i=0; i<mumps->ninfo; i++) {
2249           PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(%" PetscInt_FMT "): \n",mumps->info[i]));
2250           PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"    [%d] %d\n",mumps->myid,mumps->id.INFO(mumps->info[i])));
2251           PetscCall(PetscViewerFlush(viewer));
2252         }
2253       }
2254       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
2255 
2256       if (!mumps->myid) { /* information from the host */
2257         PetscCall(PetscViewerASCIIPrintf(viewer,"  RINFOG(1) (global estimated flops for the elimination after analysis): %g \n",mumps->id.RINFOG(1)));
2258         PetscCall(PetscViewerASCIIPrintf(viewer,"  RINFOG(2) (global estimated flops for the assembly after factorization): %g \n",mumps->id.RINFOG(2)));
2259         PetscCall(PetscViewerASCIIPrintf(viewer,"  RINFOG(3) (global estimated flops for the elimination after factorization): %g \n",mumps->id.RINFOG(3)));
2260         PetscCall(PetscViewerASCIIPrintf(viewer,"  (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n",mumps->id.RINFOG(12),mumps->id.RINFOG(13),mumps->id.INFOG(34)));
2261 
2262         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n",mumps->id.INFOG(3)));
2263         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n",mumps->id.INFOG(4)));
2264         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(5) (estimated maximum front size in the complete tree): %d\n",mumps->id.INFOG(5)));
2265         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(6) (number of nodes in the complete tree): %d\n",mumps->id.INFOG(6)));
2266         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(7) (ordering option effectively used after analysis): %d\n",mumps->id.INFOG(7)));
2267         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n",mumps->id.INFOG(8)));
2268         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n",mumps->id.INFOG(9)));
2269         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(10) (total integer space store the matrix factors after factorization): %d\n",mumps->id.INFOG(10)));
2270         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(11) (order of largest frontal matrix after factorization): %d\n",mumps->id.INFOG(11)));
2271         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(12) (number of off-diagonal pivots): %d\n",mumps->id.INFOG(12)));
2272         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(13) (number of delayed pivots after factorization): %d\n",mumps->id.INFOG(13)));
2273         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(14) (number of memory compress after factorization): %d\n",mumps->id.INFOG(14)));
2274         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(15) (number of steps of iterative refinement after solution): %d\n",mumps->id.INFOG(15)));
2275         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n",mumps->id.INFOG(16)));
2276         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n",mumps->id.INFOG(17)));
2277         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n",mumps->id.INFOG(18)));
2278         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n",mumps->id.INFOG(19)));
2279         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(20) (estimated number of entries in the factors): %d\n",mumps->id.INFOG(20)));
2280         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n",mumps->id.INFOG(21)));
2281         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n",mumps->id.INFOG(22)));
2282         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n",mumps->id.INFOG(23)));
2283         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n",mumps->id.INFOG(24)));
2284         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n",mumps->id.INFOG(25)));
2285         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(28) (after factorization: number of null pivots encountered): %d\n",mumps->id.INFOG(28)));
2286         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n",mumps->id.INFOG(29)));
2287         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n",mumps->id.INFOG(30),mumps->id.INFOG(31)));
2288         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(32) (after analysis: type of analysis done): %d\n",mumps->id.INFOG(32)));
2289         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(33) (value used for ICNTL(8)): %d\n",mumps->id.INFOG(33)));
2290         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(34) (exponent of the determinant if determinant is requested): %d\n",mumps->id.INFOG(34)));
2291         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n",mumps->id.INFOG(35)));
2292         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n",mumps->id.INFOG(36)));
2293         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n",mumps->id.INFOG(37)));
2294         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n",mumps->id.INFOG(38)));
2295         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n",mumps->id.INFOG(39)));
2296       }
2297     }
2298   }
2299   PetscFunctionReturn(0);
2300 }
2301 
2302 PetscErrorCode MatGetInfo_MUMPS(Mat A,MatInfoType flag,MatInfo *info)
2303 {
2304   Mat_MUMPS *mumps =(Mat_MUMPS*)A->data;
2305 
2306   PetscFunctionBegin;
2307   info->block_size        = 1.0;
2308   info->nz_allocated      = mumps->id.INFOG(20);
2309   info->nz_used           = mumps->id.INFOG(20);
2310   info->nz_unneeded       = 0.0;
2311   info->assemblies        = 0.0;
2312   info->mallocs           = 0.0;
2313   info->memory            = 0.0;
2314   info->fill_ratio_given  = 0;
2315   info->fill_ratio_needed = 0;
2316   info->factor_mallocs    = 0;
2317   PetscFunctionReturn(0);
2318 }
2319 
2320 /* -------------------------------------------------------------------------------------------*/
2321 PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is)
2322 {
2323   Mat_MUMPS         *mumps =(Mat_MUMPS*)F->data;
2324   const PetscScalar *arr;
2325   const PetscInt    *idxs;
2326   PetscInt          size,i;
2327 
2328   PetscFunctionBegin;
2329   PetscCall(ISGetLocalSize(is,&size));
2330   /* Schur complement matrix */
2331   PetscCall(MatDestroy(&F->schur));
2332   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,size,size,NULL,&F->schur));
2333   PetscCall(MatDenseGetArrayRead(F->schur,&arr));
2334   mumps->id.schur      = (MumpsScalar*)arr;
2335   mumps->id.size_schur = size;
2336   mumps->id.schur_lld  = size;
2337   PetscCall(MatDenseRestoreArrayRead(F->schur,&arr));
2338   if (mumps->sym == 1) {
2339     PetscCall(MatSetOption(F->schur,MAT_SPD,PETSC_TRUE));
2340   }
2341 
2342   /* MUMPS expects Fortran style indices */
2343   PetscCall(PetscFree(mumps->id.listvar_schur));
2344   PetscCall(PetscMalloc1(size,&mumps->id.listvar_schur));
2345   PetscCall(ISGetIndices(is,&idxs));
2346   for (i=0; i<size; i++) PetscCall(PetscMUMPSIntCast(idxs[i]+1,&(mumps->id.listvar_schur[i])));
2347   PetscCall(ISRestoreIndices(is,&idxs));
2348   /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */
2349   mumps->id.ICNTL(26) = -1;
2350   PetscFunctionReturn(0);
2351 }
2352 
2353 /* -------------------------------------------------------------------------------------------*/
2354 PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F,Mat* S)
2355 {
2356   Mat            St;
2357   Mat_MUMPS      *mumps =(Mat_MUMPS*)F->data;
2358   PetscScalar    *array;
2359 #if defined(PETSC_USE_COMPLEX)
2360   PetscScalar    im = PetscSqrtScalar((PetscScalar)-1.0);
2361 #endif
2362 
2363   PetscFunctionBegin;
2364   PetscCheck(mumps->id.ICNTL(19),PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it");
2365   PetscCall(MatCreate(PETSC_COMM_SELF,&St));
2366   PetscCall(MatSetSizes(St,PETSC_DECIDE,PETSC_DECIDE,mumps->id.size_schur,mumps->id.size_schur));
2367   PetscCall(MatSetType(St,MATDENSE));
2368   PetscCall(MatSetUp(St));
2369   PetscCall(MatDenseGetArray(St,&array));
2370   if (!mumps->sym) { /* MUMPS always return a full matrix */
2371     if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
2372       PetscInt i,j,N=mumps->id.size_schur;
2373       for (i=0;i<N;i++) {
2374         for (j=0;j<N;j++) {
2375 #if !defined(PETSC_USE_COMPLEX)
2376           PetscScalar val = mumps->id.schur[i*N+j];
2377 #else
2378           PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i;
2379 #endif
2380           array[j*N+i] = val;
2381         }
2382       }
2383     } else { /* stored by columns */
2384       PetscCall(PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur));
2385     }
2386   } else { /* either full or lower-triangular (not packed) */
2387     if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */
2388       PetscInt i,j,N=mumps->id.size_schur;
2389       for (i=0;i<N;i++) {
2390         for (j=i;j<N;j++) {
2391 #if !defined(PETSC_USE_COMPLEX)
2392           PetscScalar val = mumps->id.schur[i*N+j];
2393 #else
2394           PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i;
2395 #endif
2396           array[i*N+j] = val;
2397           array[j*N+i] = val;
2398         }
2399       }
2400     } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */
2401       PetscCall(PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur));
2402     } else { /* ICNTL(19) == 1 lower triangular stored by rows */
2403       PetscInt i,j,N=mumps->id.size_schur;
2404       for (i=0;i<N;i++) {
2405         for (j=0;j<i+1;j++) {
2406 #if !defined(PETSC_USE_COMPLEX)
2407           PetscScalar val = mumps->id.schur[i*N+j];
2408 #else
2409           PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i;
2410 #endif
2411           array[i*N+j] = val;
2412           array[j*N+i] = val;
2413         }
2414       }
2415     }
2416   }
2417   PetscCall(MatDenseRestoreArray(St,&array));
2418   *S   = St;
2419   PetscFunctionReturn(0);
2420 }
2421 
2422 /* -------------------------------------------------------------------------------------------*/
2423 PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt ival)
2424 {
2425   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2426 
2427   PetscFunctionBegin;
2428   if (mumps->id.job == JOB_NULL) { /* need to cache icntl and ival since PetscMUMPS_c() has never been called */
2429     PetscInt i,nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; /* number of already cached ICNTL */
2430     for (i = 0; i < nICNTL_pre; ++i) if (mumps->ICNTL_pre[1+2*i] == icntl) break; /* is this ICNTL already cached? */
2431     if (i == nICNTL_pre) { /* not already cached */
2432       if (i > 0) PetscCall(PetscRealloc(sizeof(PetscMUMPSInt)*(2*nICNTL_pre + 3),&mumps->ICNTL_pre));
2433       else PetscCall(PetscCalloc(sizeof(PetscMUMPSInt)*3,&mumps->ICNTL_pre));
2434       mumps->ICNTL_pre[0]++;
2435     }
2436     mumps->ICNTL_pre[1+2*i] = icntl;
2437     PetscCall(PetscMUMPSIntCast(ival,mumps->ICNTL_pre+2+2*i));
2438   } else PetscCall(PetscMUMPSIntCast(ival,&mumps->id.ICNTL(icntl)));
2439   PetscFunctionReturn(0);
2440 }
2441 
2442 PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt *ival)
2443 {
2444   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2445 
2446   PetscFunctionBegin;
2447   *ival = mumps->id.ICNTL(icntl);
2448   PetscFunctionReturn(0);
2449 }
2450 
2451 /*@
2452   MatMumpsSetIcntl - Set MUMPS parameter ICNTL()
2453 
2454    Logically Collective on Mat
2455 
2456    Input Parameters:
2457 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2458 .  icntl - index of MUMPS parameter array ICNTL()
2459 -  ival - value of MUMPS ICNTL(icntl)
2460 
2461   Options Database:
2462 .   -mat_mumps_icntl_<icntl> <ival> - change the option numbered icntl to ival
2463 
2464    Level: beginner
2465 
2466    References:
2467 .  * - MUMPS Users' Guide
2468 
2469 .seealso: `MatGetFactor()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2470 @*/
2471 PetscErrorCode MatMumpsSetIcntl(Mat F,PetscInt icntl,PetscInt ival)
2472 {
2473   PetscFunctionBegin;
2474   PetscValidType(F,1);
2475   PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2476   PetscValidLogicalCollectiveInt(F,icntl,2);
2477   PetscValidLogicalCollectiveInt(F,ival,3);
2478   PetscCheck(icntl >= 1 && icntl <= 38,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONG,"Unsupported ICNTL value %" PetscInt_FMT,icntl);
2479   PetscTryMethod(F,"MatMumpsSetIcntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival));
2480   PetscFunctionReturn(0);
2481 }
2482 
2483 /*@
2484   MatMumpsGetIcntl - Get MUMPS parameter ICNTL()
2485 
2486    Logically Collective on Mat
2487 
2488    Input Parameters:
2489 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2490 -  icntl - index of MUMPS parameter array ICNTL()
2491 
2492   Output Parameter:
2493 .  ival - value of MUMPS ICNTL(icntl)
2494 
2495    Level: beginner
2496 
2497    References:
2498 .  * - MUMPS Users' Guide
2499 
2500 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2501 @*/
2502 PetscErrorCode MatMumpsGetIcntl(Mat F,PetscInt icntl,PetscInt *ival)
2503 {
2504   PetscFunctionBegin;
2505   PetscValidType(F,1);
2506   PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2507   PetscValidLogicalCollectiveInt(F,icntl,2);
2508   PetscValidIntPointer(ival,3);
2509   PetscCheck(icntl >= 1 && icntl <= 38,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONG,"Unsupported ICNTL value %" PetscInt_FMT,icntl);
2510   PetscUseMethod(F,"MatMumpsGetIcntl_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));
2511   PetscFunctionReturn(0);
2512 }
2513 
2514 /* -------------------------------------------------------------------------------------------*/
2515 PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal val)
2516 {
2517   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2518 
2519   PetscFunctionBegin;
2520   if (mumps->id.job == JOB_NULL) {
2521     PetscInt i,nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0;
2522     for (i = 0; i < nCNTL_pre; ++i) if (mumps->CNTL_pre[1+2*i] == icntl) break;
2523     if (i == nCNTL_pre) {
2524       if (i > 0) PetscCall(PetscRealloc(sizeof(PetscReal)*(2*nCNTL_pre + 3),&mumps->CNTL_pre));
2525       else PetscCall(PetscCalloc(sizeof(PetscReal)*3,&mumps->CNTL_pre));
2526       mumps->CNTL_pre[0]++;
2527     }
2528     mumps->CNTL_pre[1+2*i] = icntl;
2529     mumps->CNTL_pre[2+2*i] = val;
2530   } else mumps->id.CNTL(icntl) = val;
2531   PetscFunctionReturn(0);
2532 }
2533 
2534 PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal *val)
2535 {
2536   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2537 
2538   PetscFunctionBegin;
2539   *val = mumps->id.CNTL(icntl);
2540   PetscFunctionReturn(0);
2541 }
2542 
2543 /*@
2544   MatMumpsSetCntl - Set MUMPS parameter CNTL()
2545 
2546    Logically Collective on Mat
2547 
2548    Input Parameters:
2549 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2550 .  icntl - index of MUMPS parameter array CNTL()
2551 -  val - value of MUMPS CNTL(icntl)
2552 
2553   Options Database:
2554 .   -mat_mumps_cntl_<icntl> <val>  - change the option numbered icntl to ival
2555 
2556    Level: beginner
2557 
2558    References:
2559 .  * - MUMPS Users' Guide
2560 
2561 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2562 @*/
2563 PetscErrorCode MatMumpsSetCntl(Mat F,PetscInt icntl,PetscReal val)
2564 {
2565   PetscFunctionBegin;
2566   PetscValidType(F,1);
2567   PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2568   PetscValidLogicalCollectiveInt(F,icntl,2);
2569   PetscValidLogicalCollectiveReal(F,val,3);
2570   PetscCheck(icntl >= 1 && icntl <= 7,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONG,"Unsupported CNTL value %" PetscInt_FMT,icntl);
2571   PetscTryMethod(F,"MatMumpsSetCntl_C",(Mat,PetscInt,PetscReal),(F,icntl,val));
2572   PetscFunctionReturn(0);
2573 }
2574 
2575 /*@
2576   MatMumpsGetCntl - Get MUMPS parameter CNTL()
2577 
2578    Logically Collective on Mat
2579 
2580    Input Parameters:
2581 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2582 -  icntl - index of MUMPS parameter array CNTL()
2583 
2584   Output Parameter:
2585 .  val - value of MUMPS CNTL(icntl)
2586 
2587    Level: beginner
2588 
2589    References:
2590 .  * - MUMPS Users' Guide
2591 
2592 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2593 @*/
2594 PetscErrorCode MatMumpsGetCntl(Mat F,PetscInt icntl,PetscReal *val)
2595 {
2596   PetscFunctionBegin;
2597   PetscValidType(F,1);
2598   PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2599   PetscValidLogicalCollectiveInt(F,icntl,2);
2600   PetscValidRealPointer(val,3);
2601   PetscCheck(icntl >= 1 && icntl <= 7,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONG,"Unsupported CNTL value %" PetscInt_FMT,icntl);
2602   PetscUseMethod(F,"MatMumpsGetCntl_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));
2603   PetscFunctionReturn(0);
2604 }
2605 
2606 PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F,PetscInt icntl,PetscInt *info)
2607 {
2608   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2609 
2610   PetscFunctionBegin;
2611   *info = mumps->id.INFO(icntl);
2612   PetscFunctionReturn(0);
2613 }
2614 
2615 PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F,PetscInt icntl,PetscInt *infog)
2616 {
2617   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2618 
2619   PetscFunctionBegin;
2620   *infog = mumps->id.INFOG(icntl);
2621   PetscFunctionReturn(0);
2622 }
2623 
2624 PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfo)
2625 {
2626   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2627 
2628   PetscFunctionBegin;
2629   *rinfo = mumps->id.RINFO(icntl);
2630   PetscFunctionReturn(0);
2631 }
2632 
2633 PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfog)
2634 {
2635   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2636 
2637   PetscFunctionBegin;
2638   *rinfog = mumps->id.RINFOG(icntl);
2639   PetscFunctionReturn(0);
2640 }
2641 
2642 PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F,Mat spRHS)
2643 {
2644   Mat            Bt = NULL,Btseq = NULL;
2645   PetscBool      flg;
2646   Mat_MUMPS      *mumps =(Mat_MUMPS*)F->data;
2647   PetscScalar    *aa;
2648   PetscInt       spnr,*ia,*ja,M,nrhs;
2649 
2650   PetscFunctionBegin;
2651   PetscValidPointer(spRHS,2);
2652   PetscCall(PetscObjectTypeCompare((PetscObject)spRHS,MATTRANSPOSEMAT,&flg));
2653   if (flg) {
2654     PetscCall(MatTransposeGetMat(spRHS,&Bt));
2655   } else SETERRQ(PetscObjectComm((PetscObject)spRHS),PETSC_ERR_ARG_WRONG,"Matrix spRHS must be type MATTRANSPOSEMAT matrix");
2656 
2657   PetscCall(MatMumpsSetIcntl(F,30,1));
2658 
2659   if (mumps->petsc_size > 1) {
2660     Mat_MPIAIJ *b = (Mat_MPIAIJ*)Bt->data;
2661     Btseq = b->A;
2662   } else {
2663     Btseq = Bt;
2664   }
2665 
2666   PetscCall(MatGetSize(spRHS,&M,&nrhs));
2667   mumps->id.nrhs = nrhs;
2668   mumps->id.lrhs = M;
2669   mumps->id.rhs  = NULL;
2670 
2671   if (!mumps->myid) {
2672     PetscCall(MatSeqAIJGetArray(Btseq,&aa));
2673     PetscCall(MatGetRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg));
2674     PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure");
2675     PetscCall(PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs));
2676     mumps->id.rhs_sparse  = (MumpsScalar*)aa;
2677   } else {
2678     mumps->id.irhs_ptr    = NULL;
2679     mumps->id.irhs_sparse = NULL;
2680     mumps->id.nz_rhs      = 0;
2681     mumps->id.rhs_sparse  = NULL;
2682   }
2683   mumps->id.ICNTL(20)   = 1; /* rhs is sparse */
2684   mumps->id.ICNTL(21)   = 0; /* solution is in assembled centralized format */
2685 
2686   /* solve phase */
2687   /*-------------*/
2688   mumps->id.job = JOB_SOLVE;
2689   PetscMUMPS_c(mumps);
2690   if (mumps->id.INFOG(1) < 0)
2691     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d INFO(2)=%d",mumps->id.INFOG(1),mumps->id.INFO(2));
2692 
2693   if (!mumps->myid) {
2694     PetscCall(MatSeqAIJRestoreArray(Btseq,&aa));
2695     PetscCall(MatRestoreRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg));
2696     PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure");
2697   }
2698   PetscFunctionReturn(0);
2699 }
2700 
2701 /*@
2702   MatMumpsGetInverse - Get user-specified set of entries in inverse of A
2703 
2704    Logically Collective on Mat
2705 
2706    Input Parameters:
2707 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2708 -  spRHS - sequential sparse matrix in MATTRANSPOSEMAT format holding specified indices in processor[0]
2709 
2710   Output Parameter:
2711 . spRHS - requested entries of inverse of A
2712 
2713    Level: beginner
2714 
2715    References:
2716 .  * - MUMPS Users' Guide
2717 
2718 .seealso: `MatGetFactor()`, `MatCreateTranspose()`
2719 @*/
2720 PetscErrorCode MatMumpsGetInverse(Mat F,Mat spRHS)
2721 {
2722   PetscFunctionBegin;
2723   PetscValidType(F,1);
2724   PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2725   PetscUseMethod(F,"MatMumpsGetInverse_C",(Mat,Mat),(F,spRHS));
2726   PetscFunctionReturn(0);
2727 }
2728 
2729 PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F,Mat spRHST)
2730 {
2731   Mat            spRHS;
2732 
2733   PetscFunctionBegin;
2734   PetscCall(MatCreateTranspose(spRHST,&spRHS));
2735   PetscCall(MatMumpsGetInverse_MUMPS(F,spRHS));
2736   PetscCall(MatDestroy(&spRHS));
2737   PetscFunctionReturn(0);
2738 }
2739 
2740 /*@
2741   MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix A^T
2742 
2743    Logically Collective on Mat
2744 
2745    Input Parameters:
2746 +  F - the factored matrix of A obtained by calling MatGetFactor() from PETSc-MUMPS interface
2747 -  spRHST - sequential sparse matrix in MATAIJ format holding specified indices of A^T in processor[0]
2748 
2749   Output Parameter:
2750 . spRHST - requested entries of inverse of A^T
2751 
2752    Level: beginner
2753 
2754    References:
2755 .  * - MUMPS Users' Guide
2756 
2757 .seealso: `MatGetFactor()`, `MatCreateTranspose()`, `MatMumpsGetInverse()`
2758 @*/
2759 PetscErrorCode MatMumpsGetInverseTranspose(Mat F,Mat spRHST)
2760 {
2761   PetscBool      flg;
2762 
2763   PetscFunctionBegin;
2764   PetscValidType(F,1);
2765   PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2766   PetscCall(PetscObjectTypeCompareAny((PetscObject)spRHST,&flg,MATSEQAIJ,MATMPIAIJ,NULL));
2767   PetscCheck(flg,PetscObjectComm((PetscObject)spRHST),PETSC_ERR_ARG_WRONG,"Matrix spRHST must be MATAIJ matrix");
2768 
2769   PetscUseMethod(F,"MatMumpsGetInverseTranspose_C",(Mat,Mat),(F,spRHST));
2770   PetscFunctionReturn(0);
2771 }
2772 
2773 /*@
2774   MatMumpsGetInfo - Get MUMPS parameter INFO()
2775 
2776    Logically Collective on Mat
2777 
2778    Input Parameters:
2779 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2780 -  icntl - index of MUMPS parameter array INFO()
2781 
2782   Output Parameter:
2783 .  ival - value of MUMPS INFO(icntl)
2784 
2785    Level: beginner
2786 
2787    References:
2788 .  * - MUMPS Users' Guide
2789 
2790 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2791 @*/
2792 PetscErrorCode MatMumpsGetInfo(Mat F,PetscInt icntl,PetscInt *ival)
2793 {
2794   PetscFunctionBegin;
2795   PetscValidType(F,1);
2796   PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2797   PetscValidIntPointer(ival,3);
2798   PetscUseMethod(F,"MatMumpsGetInfo_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));
2799   PetscFunctionReturn(0);
2800 }
2801 
2802 /*@
2803   MatMumpsGetInfog - Get MUMPS parameter INFOG()
2804 
2805    Logically Collective on Mat
2806 
2807    Input Parameters:
2808 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2809 -  icntl - index of MUMPS parameter array INFOG()
2810 
2811   Output Parameter:
2812 .  ival - value of MUMPS INFOG(icntl)
2813 
2814    Level: beginner
2815 
2816    References:
2817 .  * - MUMPS Users' Guide
2818 
2819 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2820 @*/
2821 PetscErrorCode MatMumpsGetInfog(Mat F,PetscInt icntl,PetscInt *ival)
2822 {
2823   PetscFunctionBegin;
2824   PetscValidType(F,1);
2825   PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2826   PetscValidIntPointer(ival,3);
2827   PetscUseMethod(F,"MatMumpsGetInfog_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));
2828   PetscFunctionReturn(0);
2829 }
2830 
2831 /*@
2832   MatMumpsGetRinfo - Get MUMPS parameter RINFO()
2833 
2834    Logically Collective on Mat
2835 
2836    Input Parameters:
2837 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2838 -  icntl - index of MUMPS parameter array RINFO()
2839 
2840   Output Parameter:
2841 .  val - value of MUMPS RINFO(icntl)
2842 
2843    Level: beginner
2844 
2845    References:
2846 .  * - MUMPS Users' Guide
2847 
2848 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfog()`
2849 @*/
2850 PetscErrorCode MatMumpsGetRinfo(Mat F,PetscInt icntl,PetscReal *val)
2851 {
2852   PetscFunctionBegin;
2853   PetscValidType(F,1);
2854   PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2855   PetscValidRealPointer(val,3);
2856   PetscUseMethod(F,"MatMumpsGetRinfo_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));
2857   PetscFunctionReturn(0);
2858 }
2859 
2860 /*@
2861   MatMumpsGetRinfog - Get MUMPS parameter RINFOG()
2862 
2863    Logically Collective on Mat
2864 
2865    Input Parameters:
2866 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2867 -  icntl - index of MUMPS parameter array RINFOG()
2868 
2869   Output Parameter:
2870 .  val - value of MUMPS RINFOG(icntl)
2871 
2872    Level: beginner
2873 
2874    References:
2875 .  * - MUMPS Users' Guide
2876 
2877 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`
2878 @*/
2879 PetscErrorCode MatMumpsGetRinfog(Mat F,PetscInt icntl,PetscReal *val)
2880 {
2881   PetscFunctionBegin;
2882   PetscValidType(F,1);
2883   PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2884   PetscValidRealPointer(val,3);
2885   PetscUseMethod(F,"MatMumpsGetRinfog_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));
2886   PetscFunctionReturn(0);
2887 }
2888 
2889 /*MC
2890   MATSOLVERMUMPS -  A matrix type providing direct solvers (LU and Cholesky) for
2891   distributed and sequential matrices via the external package MUMPS.
2892 
2893   Works with MATAIJ and MATSBAIJ matrices
2894 
2895   Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS
2896 
2897   Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. See details below.
2898 
2899   Use -pc_type cholesky or lu -pc_factor_mat_solver_type mumps to use this direct solver
2900 
2901   Options Database Keys:
2902 +  -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages
2903 .  -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning
2904 .  -mat_mumps_icntl_3 -  ICNTL(3): output stream for global information, collected on the host
2905 .  -mat_mumps_icntl_4 -  ICNTL(4): level of printing (0 to 4)
2906 .  -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)
2907 .  -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto
2908                         Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only)
2909 .  -mat_mumps_icntl_8  - ICNTL(8): scaling strategy (-2 to 8 or 77)
2910 .  -mat_mumps_icntl_10  - ICNTL(10): max num of refinements
2911 .  -mat_mumps_icntl_11  - ICNTL(11): statistics related to an error analysis (via -ksp_view)
2912 .  -mat_mumps_icntl_12  - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)
2913 .  -mat_mumps_icntl_13  - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting
2914 .  -mat_mumps_icntl_14  - ICNTL(14): percentage increase in the estimated working space
2915 .  -mat_mumps_icntl_15  - ICNTL(15): compression of the input matrix resulting from a block format
2916 .  -mat_mumps_icntl_19  - ICNTL(19): computes the Schur complement
2917 .  -mat_mumps_icntl_20  - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS
2918 .  -mat_mumps_icntl_22  - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)
2919 .  -mat_mumps_icntl_23  - ICNTL(23): max size of the working memory (MB) that can allocate per processor
2920 .  -mat_mumps_icntl_24  - ICNTL(24): detection of null pivot rows (0 or 1)
2921 .  -mat_mumps_icntl_25  - ICNTL(25): compute a solution of a deficient matrix and a null space basis
2922 .  -mat_mumps_icntl_26  - ICNTL(26): drives the solution phase if a Schur complement matrix
2923 .  -mat_mumps_icntl_28  - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering
2924 .  -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis
2925 .  -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A)
2926 .  -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization
2927 .  -mat_mumps_icntl_33 - ICNTL(33): compute determinant
2928 .  -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature
2929 .  -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant
2930 .  -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR
2931 .  -mat_mumps_cntl_1  - CNTL(1): relative pivoting threshold
2932 .  -mat_mumps_cntl_2  -  CNTL(2): stopping criterion of refinement
2933 .  -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold
2934 .  -mat_mumps_cntl_4 - CNTL(4): value for static pivoting
2935 .  -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots
2936 .  -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization
2937 -  -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS.
2938                                    Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual.
2939 
2940   Level: beginner
2941 
2942     Notes:
2943     MUMPS Cholesky does not handle (complex) Hermitian matrices http://mumps.enseeiht.fr/doc/userguide_5.2.1.pdf so using it will error if the matrix is Hermitian.
2944 
2945     When used within a `KSP`/`PC` solve the options are prefixed with that of the `PC`. Otherwise one can set the options prefix by calling
2946     `MatSetOptionsPrefixFactor()` on the matrix from which the factor was obtained or `MatSetOptionsPrefix()` on the factor matrix.
2947 
2948     When a MUMPS factorization fails inside a KSP solve, for example with a KSP_DIVERGED_PC_FAILED, one can find the MUMPS information about the failure by calling
2949 $          KSPGetPC(ksp,&pc);
2950 $          PCFactorGetMatrix(pc,&mat);
2951 $          MatMumpsGetInfo(mat,....);
2952 $          MatMumpsGetInfog(mat,....); etc.
2953            Or you can run with -ksp_error_if_not_converged and the program will be stopped and the information printed in the error message.
2954 
2955   Using MUMPS with 64-bit integers
2956     MUMPS provides 64-bit integer support in two build modes:
2957       full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and
2958       requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI).
2959 
2960       selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices,
2961       MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and
2962       columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit
2963       integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS.
2964 
2965     With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc.
2966 
2967   Two modes to run MUMPS/PETSc with OpenMP
2968 $     Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP
2969 $     threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test".
2970 
2971 $     -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example,
2972 $     if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16"
2973 
2974    To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part
2975    (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with --with-openmp --download-hwloc
2976    (or --with-hwloc), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS
2977    libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS
2978    (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided).
2979 
2980    If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI
2981    processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of
2982    size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm
2983    are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set
2984    by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs.
2985    In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets,
2986    if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind
2987    MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half
2988    cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the
2989    problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding.
2990    For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and
2991    examine the mapping result.
2992 
2993    PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set OMP_PROC_BIND and OMP_PLACES in job scripts,
2994    for example, export OMP_PLACES=threads and export OMP_PROC_BIND=spread. One does not need to export OMP_NUM_THREADS=m in job scripts as PETSc
2995    calls omp_set_num_threads(m) internally before calling MUMPS.
2996 
2997    References:
2998 +  * - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011).
2999 -  * - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017.
3000 
3001 .seealso: `PCFactorSetMatSolverType()`, `MatSolverType`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`, `KSPGetPC()`, `PCFactorGetMatrix()`
3002 
3003 M*/
3004 
3005 static PetscErrorCode MatFactorGetSolverType_mumps(Mat A,MatSolverType *type)
3006 {
3007   PetscFunctionBegin;
3008   *type = MATSOLVERMUMPS;
3009   PetscFunctionReturn(0);
3010 }
3011 
3012 /* MatGetFactor for Seq and MPI AIJ matrices */
3013 static PetscErrorCode MatGetFactor_aij_mumps(Mat A,MatFactorType ftype,Mat *F)
3014 {
3015   Mat            B;
3016   Mat_MUMPS      *mumps;
3017   PetscBool      isSeqAIJ;
3018   PetscMPIInt    size;
3019 
3020   PetscFunctionBegin;
3021  #if defined(PETSC_USE_COMPLEX)
3022   PetscCheck(A->hermitian != PETSC_BOOL3_TRUE || A->symmetric == PETSC_BOOL3_TRUE || ftype != MAT_FACTOR_CHOLESKY,PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian CHOLESKY Factor is not supported");
3023  #endif
3024   /* Create the factorization matrix */
3025   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ));
3026   PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B));
3027   PetscCall(MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N));
3028   PetscCall(PetscStrallocpy("mumps",&((PetscObject)B)->type_name));
3029   PetscCall(MatSetUp(B));
3030 
3031   PetscCall(PetscNewLog(B,&mumps));
3032 
3033   B->ops->view    = MatView_MUMPS;
3034   B->ops->getinfo = MatGetInfo_MUMPS;
3035 
3036   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps));
3037   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS));
3038   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS));
3039   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS));
3040   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS));
3041   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS));
3042   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS));
3043   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS));
3044   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS));
3045   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS));
3046   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS));
3047   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS));
3048   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS));
3049 
3050   if (ftype == MAT_FACTOR_LU) {
3051     B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
3052     B->factortype            = MAT_FACTOR_LU;
3053     if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij;
3054     else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij;
3055     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_LU]));
3056     mumps->sym = 0;
3057   } else {
3058     B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
3059     B->factortype                  = MAT_FACTOR_CHOLESKY;
3060     if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij;
3061     else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij;
3062     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_CHOLESKY]));
3063 #if defined(PETSC_USE_COMPLEX)
3064     mumps->sym = 2;
3065 #else
3066     if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1;
3067     else                            mumps->sym = 2;
3068 #endif
3069   }
3070 
3071   /* set solvertype */
3072   PetscCall(PetscFree(B->solvertype));
3073   PetscCall(PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype));
3074   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
3075   if (size == 1) {
3076     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3077     B->canuseordering = PETSC_TRUE;
3078   }
3079   B->ops->destroy = MatDestroy_MUMPS;
3080   B->data         = (void*)mumps;
3081 
3082   *F = B;
3083   mumps->id.job = JOB_NULL;
3084   mumps->ICNTL_pre = NULL;
3085   mumps->CNTL_pre = NULL;
3086   mumps->matstruc = DIFFERENT_NONZERO_PATTERN;
3087   PetscFunctionReturn(0);
3088 }
3089 
3090 /* MatGetFactor for Seq and MPI SBAIJ matrices */
3091 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A,MatFactorType ftype,Mat *F)
3092 {
3093   Mat            B;
3094   Mat_MUMPS      *mumps;
3095   PetscBool      isSeqSBAIJ;
3096   PetscMPIInt    size;
3097 
3098   PetscFunctionBegin;
3099  #if defined(PETSC_USE_COMPLEX)
3100   PetscCheck(A->hermitian != PETSC_BOOL3_TRUE || A->symmetric == PETSC_BOOL3_TRUE,PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian CHOLESKY Factor is not supported");
3101  #endif
3102   PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B));
3103   PetscCall(MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N));
3104   PetscCall(PetscStrallocpy("mumps",&((PetscObject)B)->type_name));
3105   PetscCall(MatSetUp(B));
3106 
3107   PetscCall(PetscNewLog(B,&mumps));
3108   PetscCall(PetscObjectTypeCompare((PetscObject)A,MATSEQSBAIJ,&isSeqSBAIJ));
3109   if (isSeqSBAIJ) {
3110     mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij;
3111   } else {
3112     mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij;
3113   }
3114 
3115   B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
3116   B->ops->view                   = MatView_MUMPS;
3117   B->ops->getinfo                = MatGetInfo_MUMPS;
3118 
3119   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps));
3120   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS));
3121   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS));
3122   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS));
3123   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS));
3124   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS));
3125   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS));
3126   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS));
3127   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS));
3128   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS));
3129   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS));
3130   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS));
3131   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS));
3132 
3133   B->factortype = MAT_FACTOR_CHOLESKY;
3134 #if defined(PETSC_USE_COMPLEX)
3135   mumps->sym = 2;
3136 #else
3137   if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1;
3138   else                            mumps->sym = 2;
3139 #endif
3140 
3141   /* set solvertype */
3142   PetscCall(PetscFree(B->solvertype));
3143   PetscCall(PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype));
3144   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
3145   if (size == 1) {
3146     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3147     B->canuseordering = PETSC_TRUE;
3148   }
3149   PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_CHOLESKY]));
3150   B->ops->destroy = MatDestroy_MUMPS;
3151   B->data         = (void*)mumps;
3152 
3153   *F = B;
3154   mumps->id.job = JOB_NULL;
3155   mumps->ICNTL_pre = NULL;
3156   mumps->CNTL_pre = NULL;
3157   mumps->matstruc = DIFFERENT_NONZERO_PATTERN;
3158   PetscFunctionReturn(0);
3159 }
3160 
3161 static PetscErrorCode MatGetFactor_baij_mumps(Mat A,MatFactorType ftype,Mat *F)
3162 {
3163   Mat            B;
3164   Mat_MUMPS      *mumps;
3165   PetscBool      isSeqBAIJ;
3166   PetscMPIInt    size;
3167 
3168   PetscFunctionBegin;
3169   /* Create the factorization matrix */
3170   PetscCall(PetscObjectTypeCompare((PetscObject)A,MATSEQBAIJ,&isSeqBAIJ));
3171   PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B));
3172   PetscCall(MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N));
3173   PetscCall(PetscStrallocpy("mumps",&((PetscObject)B)->type_name));
3174   PetscCall(MatSetUp(B));
3175 
3176   PetscCall(PetscNewLog(B,&mumps));
3177   if (ftype == MAT_FACTOR_LU) {
3178     B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS;
3179     B->factortype            = MAT_FACTOR_LU;
3180     if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij;
3181     else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij;
3182     mumps->sym = 0;
3183     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_LU]));
3184   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead");
3185 
3186   B->ops->view        = MatView_MUMPS;
3187   B->ops->getinfo     = MatGetInfo_MUMPS;
3188 
3189   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps));
3190   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS));
3191   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS));
3192   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS));
3193   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS));
3194   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS));
3195   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS));
3196   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS));
3197   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS));
3198   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS));
3199   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS));
3200   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS));
3201   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS));
3202 
3203   /* set solvertype */
3204   PetscCall(PetscFree(B->solvertype));
3205   PetscCall(PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype));
3206   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
3207   if (size == 1) {
3208     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3209     B->canuseordering = PETSC_TRUE;
3210   }
3211   B->ops->destroy = MatDestroy_MUMPS;
3212   B->data         = (void*)mumps;
3213 
3214   *F = B;
3215   mumps->id.job = JOB_NULL;
3216   mumps->ICNTL_pre = NULL;
3217   mumps->CNTL_pre = NULL;
3218   mumps->matstruc = DIFFERENT_NONZERO_PATTERN;
3219   PetscFunctionReturn(0);
3220 }
3221 
3222 /* MatGetFactor for Seq and MPI SELL matrices */
3223 static PetscErrorCode MatGetFactor_sell_mumps(Mat A,MatFactorType ftype,Mat *F)
3224 {
3225   Mat            B;
3226   Mat_MUMPS      *mumps;
3227   PetscBool      isSeqSELL;
3228   PetscMPIInt    size;
3229 
3230   PetscFunctionBegin;
3231   /* Create the factorization matrix */
3232   PetscCall(PetscObjectTypeCompare((PetscObject)A,MATSEQSELL,&isSeqSELL));
3233   PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B));
3234   PetscCall(MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N));
3235   PetscCall(PetscStrallocpy("mumps",&((PetscObject)B)->type_name));
3236   PetscCall(MatSetUp(B));
3237 
3238   PetscCall(PetscNewLog(B,&mumps));
3239 
3240   B->ops->view        = MatView_MUMPS;
3241   B->ops->getinfo     = MatGetInfo_MUMPS;
3242 
3243   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps));
3244   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS));
3245   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS));
3246   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS));
3247   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS));
3248   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS));
3249   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS));
3250   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS));
3251   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS));
3252   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS));
3253   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS));
3254 
3255   if (ftype == MAT_FACTOR_LU) {
3256     B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
3257     B->factortype            = MAT_FACTOR_LU;
3258     if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij;
3259     else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented");
3260     mumps->sym = 0;
3261     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_LU]));
3262   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented");
3263 
3264   /* set solvertype */
3265   PetscCall(PetscFree(B->solvertype));
3266   PetscCall(PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype));
3267   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
3268   if (size == 1) {
3269     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization  */
3270     B->canuseordering = PETSC_TRUE;
3271   }
3272   B->ops->destroy = MatDestroy_MUMPS;
3273   B->data         = (void*)mumps;
3274 
3275   *F = B;
3276   mumps->id.job = JOB_NULL;
3277   mumps->ICNTL_pre = NULL;
3278   mumps->CNTL_pre = NULL;
3279   mumps->matstruc = DIFFERENT_NONZERO_PATTERN;
3280   PetscFunctionReturn(0);
3281 }
3282 
3283 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void)
3284 {
3285   PetscFunctionBegin;
3286   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps));
3287   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps));
3288   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps));
3289   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps));
3290   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATMPISBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps));
3291   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps));
3292   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps));
3293   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps));
3294   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps));
3295   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps));
3296   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSELL,MAT_FACTOR_LU,MatGetFactor_sell_mumps));
3297   PetscFunctionReturn(0);
3298 }
3299