xref: /petsc/src/mat/impls/aij/mpi/mumps/mumps.c (revision 5cb80ecd61fc93edbe681011dc160a66bbda2b5d)
1 
2 /*
3     Provides an interface to the MUMPS sparse solver
4 */
5 #include <petscpkg_version.h>
6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I  "petscmat.h"  I*/
7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
8 #include <../src/mat/impls/sell/mpi/mpisell.h>
9 
10 EXTERN_C_BEGIN
11 #if defined(PETSC_USE_COMPLEX)
12 #if defined(PETSC_USE_REAL_SINGLE)
13 #include <cmumps_c.h>
14 #else
15 #include <zmumps_c.h>
16 #endif
17 #else
18 #if defined(PETSC_USE_REAL_SINGLE)
19 #include <smumps_c.h>
20 #else
21 #include <dmumps_c.h>
22 #endif
23 #endif
24 EXTERN_C_END
25 #define JOB_INIT -1
26 #define JOB_NULL 0
27 #define JOB_FACTSYMBOLIC 1
28 #define JOB_FACTNUMERIC 2
29 #define JOB_SOLVE 3
30 #define JOB_END -2
31 
32 /* calls to MUMPS */
33 #if defined(PETSC_USE_COMPLEX)
34 #if defined(PETSC_USE_REAL_SINGLE)
35 #define MUMPS_c cmumps_c
36 #else
37 #define MUMPS_c zmumps_c
38 #endif
39 #else
40 #if defined(PETSC_USE_REAL_SINGLE)
41 #define MUMPS_c smumps_c
42 #else
43 #define MUMPS_c dmumps_c
44 #endif
45 #endif
46 
47 /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for
48    number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the
49    naming convention in PetscMPIInt, PetscBLASInt etc.
50 */
51 typedef MUMPS_INT PetscMUMPSInt;
52 
53 #if PETSC_PKG_MUMPS_VERSION_GE(5,3,0)
54   #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */
55     #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out"
56   #endif
57 #else
58   #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */
59     #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out"
60   #endif
61 #endif
62 
63 #define MPIU_MUMPSINT             MPI_INT
64 #define PETSC_MUMPS_INT_MAX       2147483647
65 #define PETSC_MUMPS_INT_MIN       -2147483648
66 
67 /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/
68 static inline PetscErrorCode PetscMUMPSIntCast(PetscInt a,PetscMUMPSInt *b)
69 {
70   PetscFunctionBegin;
71 #if PetscDefined(USE_64BIT_INDICES)
72   PetscAssert(a <= PETSC_MUMPS_INT_MAX && a >= PETSC_MUMPS_INT_MIN,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscInt too long for PetscMUMPSInt");
73 #endif
74   *b = (PetscMUMPSInt)(a);
75   PetscFunctionReturn(0);
76 }
77 
78 /* Put these utility routines here since they are only used in this file */
79 static inline PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems *PetscOptionsObject,const char opt[],const char text[],const char man[],PetscMUMPSInt currentvalue,PetscMUMPSInt *value,PetscBool *set,PetscMUMPSInt lb,PetscMUMPSInt ub)
80 {
81   PetscInt       myval;
82   PetscBool      myset;
83   PetscFunctionBegin;
84   /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */
85   PetscCall(PetscOptionsInt_Private(PetscOptionsObject,opt,text,man,(PetscInt)currentvalue,&myval,&myset,lb,ub));
86   if (myset) PetscCall(PetscMUMPSIntCast(myval,value));
87   if (set) *set = myset;
88   PetscFunctionReturn(0);
89 }
90 #define PetscOptionsMUMPSInt(a,b,c,d,e,f) PetscOptionsMUMPSInt_Private(PetscOptionsObject,a,b,c,d,e,f,PETSC_MUMPS_INT_MIN,PETSC_MUMPS_INT_MAX)
91 
92 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */
93 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
94 #define PetscMUMPS_c(mumps) \
95   do { \
96     if (mumps->use_petsc_omp_support) { \
97       if (mumps->is_omp_master) { \
98         PetscCall(PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl)); \
99         MUMPS_c(&mumps->id); \
100         PetscCall(PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl)); \
101       } \
102       PetscCall(PetscOmpCtrlBarrier(mumps->omp_ctrl)); \
103       /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific      \
104          to processes, so we only Bcast info[1], an error code and leave others (since they do not have   \
105          an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82.                   \
106          omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \
107       */ \
108       PetscCallMPI(MPI_Bcast(mumps->id.infog, 40,MPIU_MUMPSINT, 0,mumps->omp_comm));\
109       PetscCallMPI(MPI_Bcast(mumps->id.rinfog,20,MPIU_REAL,     0,mumps->omp_comm));\
110       PetscCallMPI(MPI_Bcast(mumps->id.info,  1, MPIU_MUMPSINT, 0,mumps->omp_comm));\
111     } else { \
112       MUMPS_c(&mumps->id); \
113     } \
114   } while (0)
115 #else
116 #define PetscMUMPS_c(mumps) \
117   do { MUMPS_c(&mumps->id); } while (0)
118 #endif
119 
120 /* declare MumpsScalar */
121 #if defined(PETSC_USE_COMPLEX)
122 #if defined(PETSC_USE_REAL_SINGLE)
123 #define MumpsScalar mumps_complex
124 #else
125 #define MumpsScalar mumps_double_complex
126 #endif
127 #else
128 #define MumpsScalar PetscScalar
129 #endif
130 
131 /* macros s.t. indices match MUMPS documentation */
132 #define ICNTL(I) icntl[(I)-1]
133 #define CNTL(I) cntl[(I)-1]
134 #define INFOG(I) infog[(I)-1]
135 #define INFO(I) info[(I)-1]
136 #define RINFOG(I) rinfog[(I)-1]
137 #define RINFO(I) rinfo[(I)-1]
138 
139 typedef struct Mat_MUMPS Mat_MUMPS;
140 struct Mat_MUMPS {
141 #if defined(PETSC_USE_COMPLEX)
142 #if defined(PETSC_USE_REAL_SINGLE)
143   CMUMPS_STRUC_C id;
144 #else
145   ZMUMPS_STRUC_C id;
146 #endif
147 #else
148 #if defined(PETSC_USE_REAL_SINGLE)
149   SMUMPS_STRUC_C id;
150 #else
151   DMUMPS_STRUC_C id;
152 #endif
153 #endif
154 
155   MatStructure   matstruc;
156   PetscMPIInt    myid,petsc_size;
157   PetscMUMPSInt  *irn,*jcn;             /* the (i,j,v) triplets passed to mumps. */
158   PetscScalar    *val,*val_alloc;       /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */
159   PetscInt64     nnz;                   /* number of nonzeros. The type is called selective 64-bit in mumps */
160   PetscMUMPSInt  sym;
161   MPI_Comm       mumps_comm;
162   PetscMUMPSInt  *ICNTL_pre;
163   PetscReal      *CNTL_pre;
164   PetscMUMPSInt  ICNTL9_pre;            /* check if ICNTL(9) is changed from previous MatSolve */
165   VecScatter     scat_rhs, scat_sol;    /* used by MatSolve() */
166   PetscMUMPSInt  ICNTL20;               /* use centralized (0) or distributed (10) dense RHS */
167   PetscMUMPSInt  lrhs_loc,nloc_rhs,*irhs_loc;
168 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
169   PetscInt       *rhs_nrow,max_nrhs;
170   PetscMPIInt    *rhs_recvcounts,*rhs_disps;
171   PetscScalar    *rhs_loc,*rhs_recvbuf;
172 #endif
173   Vec            b_seq,x_seq;
174   PetscInt       ninfo,*info;           /* which INFO to display */
175   PetscInt       sizeredrhs;
176   PetscScalar    *schur_sol;
177   PetscInt       schur_sizesol;
178   PetscMUMPSInt  *ia_alloc,*ja_alloc;   /* work arrays used for the CSR struct for sparse rhs */
179   PetscInt64     cur_ilen,cur_jlen;     /* current len of ia_alloc[], ja_alloc[] */
180   PetscErrorCode (*ConvertToTriples)(Mat,PetscInt,MatReuse,Mat_MUMPS*);
181 
182   /* stuff used by petsc/mumps OpenMP support*/
183   PetscBool      use_petsc_omp_support;
184   PetscOmpCtrl   omp_ctrl;              /* an OpenMP controler that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */
185   MPI_Comm       petsc_comm,omp_comm;   /* petsc_comm is petsc matrix's comm */
186   PetscInt64     *recvcount;            /* a collection of nnz on omp_master */
187   PetscMPIInt    tag,omp_comm_size;
188   PetscBool      is_omp_master;         /* is this rank the master of omp_comm */
189   MPI_Request    *reqs;
190 };
191 
192 /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt.
193    Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices.
194  */
195 static PetscErrorCode PetscMUMPSIntCSRCast(Mat_MUMPS *mumps,PetscInt nrow,PetscInt *ia,PetscInt *ja,PetscMUMPSInt **ia_mumps,PetscMUMPSInt **ja_mumps,PetscMUMPSInt *nnz_mumps)
196 {
197   PetscInt       nnz=ia[nrow]-1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscInt64 since mumps only uses PetscMUMPSInt for rhs */
198 
199   PetscFunctionBegin;
200 #if defined(PETSC_USE_64BIT_INDICES)
201   {
202     PetscInt i;
203     if (nrow+1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */
204       PetscCall(PetscFree(mumps->ia_alloc));
205       PetscCall(PetscMalloc1(nrow+1,&mumps->ia_alloc));
206       mumps->cur_ilen = nrow+1;
207     }
208     if (nnz > mumps->cur_jlen) {
209       PetscCall(PetscFree(mumps->ja_alloc));
210       PetscCall(PetscMalloc1(nnz,&mumps->ja_alloc));
211       mumps->cur_jlen = nnz;
212     }
213     for (i=0; i<nrow+1; i++) PetscCall(PetscMUMPSIntCast(ia[i],&(mumps->ia_alloc[i])));
214     for (i=0; i<nnz; i++)    PetscCall(PetscMUMPSIntCast(ja[i],&(mumps->ja_alloc[i])));
215     *ia_mumps = mumps->ia_alloc;
216     *ja_mumps = mumps->ja_alloc;
217   }
218 #else
219   *ia_mumps = ia;
220   *ja_mumps = ja;
221 #endif
222   PetscCall(PetscMUMPSIntCast(nnz,nnz_mumps));
223   PetscFunctionReturn(0);
224 }
225 
226 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS* mumps)
227 {
228   PetscFunctionBegin;
229   PetscCall(PetscFree(mumps->id.listvar_schur));
230   PetscCall(PetscFree(mumps->id.redrhs));
231   PetscCall(PetscFree(mumps->schur_sol));
232   mumps->id.size_schur = 0;
233   mumps->id.schur_lld  = 0;
234   mumps->id.ICNTL(19)  = 0;
235   PetscFunctionReturn(0);
236 }
237 
238 /* solve with rhs in mumps->id.redrhs and return in the same location */
239 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F)
240 {
241   Mat_MUMPS            *mumps=(Mat_MUMPS*)F->data;
242   Mat                  S,B,X;
243   MatFactorSchurStatus schurstatus;
244   PetscInt             sizesol;
245 
246   PetscFunctionBegin;
247   PetscCall(MatFactorFactorizeSchurComplement(F));
248   PetscCall(MatFactorGetSchurComplement(F,&S,&schurstatus));
249   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&B));
250   PetscCall(MatSetType(B,((PetscObject)S)->type_name));
251 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
252   PetscCall(MatBindToCPU(B,S->boundtocpu));
253 #endif
254   switch (schurstatus) {
255   case MAT_FACTOR_SCHUR_FACTORED:
256     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&X));
257     PetscCall(MatSetType(X,((PetscObject)S)->type_name));
258 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
259     PetscCall(MatBindToCPU(X,S->boundtocpu));
260 #endif
261     if (!mumps->id.ICNTL(9)) { /* transpose solve */
262       PetscCall(MatMatSolveTranspose(S,B,X));
263     } else {
264       PetscCall(MatMatSolve(S,B,X));
265     }
266     break;
267   case MAT_FACTOR_SCHUR_INVERTED:
268     sizesol = mumps->id.nrhs*mumps->id.size_schur;
269     if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) {
270       PetscCall(PetscFree(mumps->schur_sol));
271       PetscCall(PetscMalloc1(sizesol,&mumps->schur_sol));
272       mumps->schur_sizesol = sizesol;
273     }
274     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,mumps->schur_sol,&X));
275     PetscCall(MatSetType(X,((PetscObject)S)->type_name));
276 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
277     PetscCall(MatBindToCPU(X,S->boundtocpu));
278 #endif
279     PetscCall(MatProductCreateWithMat(S,B,NULL,X));
280     if (!mumps->id.ICNTL(9)) { /* transpose solve */
281       PetscCall(MatProductSetType(X,MATPRODUCT_AtB));
282     } else {
283       PetscCall(MatProductSetType(X,MATPRODUCT_AB));
284     }
285     PetscCall(MatProductSetFromOptions(X));
286     PetscCall(MatProductSymbolic(X));
287     PetscCall(MatProductNumeric(X));
288 
289     PetscCall(MatCopy(X,B,SAME_NONZERO_PATTERN));
290     break;
291   default:
292     SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_SUP,"Unhandled MatFactorSchurStatus %d",F->schur_status);
293   }
294   PetscCall(MatFactorRestoreSchurComplement(F,&S,schurstatus));
295   PetscCall(MatDestroy(&B));
296   PetscCall(MatDestroy(&X));
297   PetscFunctionReturn(0);
298 }
299 
300 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion)
301 {
302   Mat_MUMPS     *mumps=(Mat_MUMPS*)F->data;
303 
304   PetscFunctionBegin;
305   if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */
306     PetscFunctionReturn(0);
307   }
308   if (!expansion) { /* prepare for the condensation step */
309     PetscInt sizeredrhs = mumps->id.nrhs*mumps->id.size_schur;
310     /* allocate MUMPS internal array to store reduced right-hand sides */
311     if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) {
312       PetscCall(PetscFree(mumps->id.redrhs));
313       mumps->id.lredrhs = mumps->id.size_schur;
314       PetscCall(PetscMalloc1(mumps->id.nrhs*mumps->id.lredrhs,&mumps->id.redrhs));
315       mumps->sizeredrhs = mumps->id.nrhs*mumps->id.lredrhs;
316     }
317     mumps->id.ICNTL(26) = 1; /* condensation phase */
318   } else { /* prepare for the expansion step */
319     /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */
320     PetscCall(MatMumpsSolveSchur_Private(F));
321     mumps->id.ICNTL(26) = 2; /* expansion phase */
322     PetscMUMPS_c(mumps);
323     PetscCheck(mumps->id.INFOG(1) >= 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d",mumps->id.INFOG(1));
324     /* restore defaults */
325     mumps->id.ICNTL(26) = -1;
326     /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */
327     if (mumps->id.nrhs > 1) {
328       PetscCall(PetscFree(mumps->id.redrhs));
329       mumps->id.lredrhs = 0;
330       mumps->sizeredrhs = 0;
331     }
332   }
333   PetscFunctionReturn(0);
334 }
335 
336 /*
337   MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz]
338 
339   input:
340     A       - matrix in aij,baij or sbaij format
341     shift   - 0: C style output triple; 1: Fortran style output triple.
342     reuse   - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple
343               MAT_REUSE_MATRIX:   only the values in v array are updated
344   output:
345     nnz     - dim of r, c, and v (number of local nonzero entries of A)
346     r, c, v - row and col index, matrix values (matrix triples)
347 
348   The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is
349   freed with PetscFree(mumps->irn);  This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means
350   that the PetscMalloc() cannot easily be replaced with a PetscMalloc3().
351 
352  */
353 
354 PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
355 {
356   const PetscScalar *av;
357   const PetscInt    *ai,*aj,*ajj,M=A->rmap->n;
358   PetscInt64        nz,rnz,i,j,k;
359   PetscMUMPSInt     *row,*col;
360   Mat_SeqAIJ        *aa=(Mat_SeqAIJ*)A->data;
361 
362   PetscFunctionBegin;
363   PetscCall(MatSeqAIJGetArrayRead(A,&av));
364   mumps->val = (PetscScalar*)av;
365   if (reuse == MAT_INITIAL_MATRIX) {
366     nz   = aa->nz;
367     ai   = aa->i;
368     aj   = aa->j;
369     PetscCall(PetscMalloc2(nz,&row,nz,&col));
370     for (i=k=0; i<M; i++) {
371       rnz = ai[i+1] - ai[i];
372       ajj = aj + ai[i];
373       for (j=0; j<rnz; j++) {
374         PetscCall(PetscMUMPSIntCast(i+shift,&row[k]));
375         PetscCall(PetscMUMPSIntCast(ajj[j] + shift,&col[k]));
376         k++;
377       }
378     }
379     mumps->irn = row;
380     mumps->jcn = col;
381     mumps->nnz = nz;
382   }
383   PetscCall(MatSeqAIJRestoreArrayRead(A,&av));
384   PetscFunctionReturn(0);
385 }
386 
387 PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
388 {
389   PetscInt64     nz,i,j,k,r;
390   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
391   PetscMUMPSInt  *row,*col;
392 
393   PetscFunctionBegin;
394   mumps->val = a->val;
395   if (reuse == MAT_INITIAL_MATRIX) {
396     nz   = a->sliidx[a->totalslices];
397     PetscCall(PetscMalloc2(nz,&row,nz,&col));
398     for (i=k=0; i<a->totalslices; i++) {
399       for (j=a->sliidx[i],r=0; j<a->sliidx[i+1]; j++,r=((r+1)&0x07)) {
400         PetscCall(PetscMUMPSIntCast(8*i+r+shift,&row[k++]));
401       }
402     }
403     for (i=0;i<nz;i++) PetscCall(PetscMUMPSIntCast(a->colidx[i]+shift,&col[i]));
404     mumps->irn = row;
405     mumps->jcn = col;
406     mumps->nnz = nz;
407   }
408   PetscFunctionReturn(0);
409 }
410 
411 PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
412 {
413   Mat_SeqBAIJ    *aa=(Mat_SeqBAIJ*)A->data;
414   const PetscInt *ai,*aj,*ajj,bs2 = aa->bs2;
415   PetscInt64     M,nz,idx=0,rnz,i,j,k,m;
416   PetscInt       bs;
417   PetscMUMPSInt  *row,*col;
418 
419   PetscFunctionBegin;
420   PetscCall(MatGetBlockSize(A,&bs));
421   M          = A->rmap->N/bs;
422   mumps->val = aa->a;
423   if (reuse == MAT_INITIAL_MATRIX) {
424     ai   = aa->i; aj = aa->j;
425     nz   = bs2*aa->nz;
426     PetscCall(PetscMalloc2(nz,&row,nz,&col));
427     for (i=0; i<M; i++) {
428       ajj = aj + ai[i];
429       rnz = ai[i+1] - ai[i];
430       for (k=0; k<rnz; k++) {
431         for (j=0; j<bs; j++) {
432           for (m=0; m<bs; m++) {
433             PetscCall(PetscMUMPSIntCast(i*bs + m + shift,&row[idx]));
434             PetscCall(PetscMUMPSIntCast(bs*ajj[k] + j + shift,&col[idx]));
435             idx++;
436           }
437         }
438       }
439     }
440     mumps->irn = row;
441     mumps->jcn = col;
442     mumps->nnz = nz;
443   }
444   PetscFunctionReturn(0);
445 }
446 
447 PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
448 {
449   const PetscInt *ai, *aj,*ajj;
450   PetscInt        bs;
451   PetscInt64      nz,rnz,i,j,k,m;
452   PetscMUMPSInt   *row,*col;
453   PetscScalar     *val;
454   Mat_SeqSBAIJ    *aa=(Mat_SeqSBAIJ*)A->data;
455   const PetscInt  bs2=aa->bs2,mbs=aa->mbs;
456 #if defined(PETSC_USE_COMPLEX)
457   PetscBool       isset,hermitian;
458 #endif
459 
460   PetscFunctionBegin;
461 #if defined(PETSC_USE_COMPLEX)
462   PetscCall(MatIsHermitianKnown(A,&isset,&hermitian));
463   PetscCheck(!isset || !hermitian,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy");
464 #endif
465   ai   = aa->i;
466   aj   = aa->j;
467   PetscCall(MatGetBlockSize(A,&bs));
468   if (reuse == MAT_INITIAL_MATRIX) {
469     nz   = aa->nz;
470     PetscCall(PetscMalloc2(bs2*nz,&row,bs2*nz,&col));
471     if (bs>1) {
472       PetscCall(PetscMalloc1(bs2*nz,&mumps->val_alloc));
473       mumps->val = mumps->val_alloc;
474     } else {
475       mumps->val = aa->a;
476     }
477     mumps->irn = row;
478     mumps->jcn = col;
479   } else {
480     if (bs == 1) mumps->val = aa->a;
481     row = mumps->irn;
482     col = mumps->jcn;
483   }
484   val = mumps->val;
485 
486   nz = 0;
487   if (bs>1) {
488     for (i=0; i<mbs; i++) {
489       rnz = ai[i+1] - ai[i];
490       ajj = aj + ai[i];
491       for (j=0; j<rnz; j++) {
492         for (k=0; k<bs; k++) {
493           for (m=0; m<bs; m++) {
494             if (ajj[j]>i || k>=m) {
495               if (reuse == MAT_INITIAL_MATRIX) {
496                 PetscCall(PetscMUMPSIntCast(i*bs + m + shift,&row[nz]));
497                 PetscCall(PetscMUMPSIntCast(ajj[j]*bs + k + shift,&col[nz]));
498               }
499               val[nz++] = aa->a[(ai[i]+j)*bs2 + m + k*bs];
500             }
501           }
502         }
503       }
504     }
505   } else if (reuse == MAT_INITIAL_MATRIX) {
506     for (i=0; i<mbs; i++) {
507       rnz = ai[i+1] - ai[i];
508       ajj = aj + ai[i];
509       for (j=0; j<rnz; j++) {
510         PetscCall(PetscMUMPSIntCast(i+shift,&row[nz]));
511         PetscCall(PetscMUMPSIntCast(ajj[j] + shift,&col[nz]));
512         nz++;
513       }
514     }
515     PetscCheck(nz == aa->nz,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Different numbers of nonzeros %" PetscInt64_FMT " != %" PetscInt_FMT,nz,aa->nz);
516   }
517   if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz;
518   PetscFunctionReturn(0);
519 }
520 
521 PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
522 {
523   const PetscInt    *ai,*aj,*ajj,*adiag,M=A->rmap->n;
524   PetscInt64        nz,rnz,i,j;
525   const PetscScalar *av,*v1;
526   PetscScalar       *val;
527   PetscMUMPSInt     *row,*col;
528   Mat_SeqAIJ        *aa=(Mat_SeqAIJ*)A->data;
529   PetscBool         missing;
530 #if defined(PETSC_USE_COMPLEX)
531   PetscBool         hermitian,isset;
532 #endif
533 
534   PetscFunctionBegin;
535 #if defined(PETSC_USE_COMPLEX)
536   PetscCall(MatIsHermitianKnown(A,&isset,&hermitian));
537   PetscCheck(!isset || !hermitian,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy");
538 #endif
539   PetscCall(MatSeqAIJGetArrayRead(A,&av));
540   ai    = aa->i; aj = aa->j;
541   adiag = aa->diag;
542   PetscCall(MatMissingDiagonal_SeqAIJ(A,&missing,NULL));
543   if (reuse == MAT_INITIAL_MATRIX) {
544     /* count nz in the upper triangular part of A */
545     nz = 0;
546     if (missing) {
547       for (i=0; i<M; i++) {
548         if (PetscUnlikely(adiag[i] >= ai[i+1])) {
549           for (j=ai[i];j<ai[i+1];j++) {
550             if (aj[j] < i) continue;
551             nz++;
552           }
553         } else {
554           nz += ai[i+1] - adiag[i];
555         }
556       }
557     } else {
558       for (i=0; i<M; i++) nz += ai[i+1] - adiag[i];
559     }
560     PetscCall(PetscMalloc2(nz,&row,nz,&col));
561     PetscCall(PetscMalloc1(nz,&val));
562     mumps->nnz = nz;
563     mumps->irn = row;
564     mumps->jcn = col;
565     mumps->val = mumps->val_alloc = val;
566 
567     nz = 0;
568     if (missing) {
569       for (i=0; i<M; i++) {
570         if (PetscUnlikely(adiag[i] >= ai[i+1])) {
571           for (j=ai[i];j<ai[i+1];j++) {
572             if (aj[j] < i) continue;
573             PetscCall(PetscMUMPSIntCast(i+shift,&row[nz]));
574             PetscCall(PetscMUMPSIntCast(aj[j]+shift,&col[nz]));
575             val[nz] = av[j];
576             nz++;
577           }
578         } else {
579           rnz = ai[i+1] - adiag[i];
580           ajj = aj + adiag[i];
581           v1  = av + adiag[i];
582           for (j=0; j<rnz; j++) {
583             PetscCall(PetscMUMPSIntCast(i+shift,&row[nz]));
584             PetscCall(PetscMUMPSIntCast(ajj[j] + shift,&col[nz]));
585             val[nz++] = v1[j];
586           }
587         }
588       }
589     } else {
590       for (i=0; i<M; i++) {
591         rnz = ai[i+1] - adiag[i];
592         ajj = aj + adiag[i];
593         v1  = av + adiag[i];
594         for (j=0; j<rnz; j++) {
595           PetscCall(PetscMUMPSIntCast(i+shift,&row[nz]));
596           PetscCall(PetscMUMPSIntCast(ajj[j] + shift,&col[nz]));
597           val[nz++] = v1[j];
598         }
599       }
600     }
601   } else {
602     nz = 0;
603     val = mumps->val;
604     if (missing) {
605       for (i=0; i <M; i++) {
606         if (PetscUnlikely(adiag[i] >= ai[i+1])) {
607           for (j=ai[i];j<ai[i+1];j++) {
608             if (aj[j] < i) continue;
609             val[nz++] = av[j];
610           }
611         } else {
612           rnz = ai[i+1] - adiag[i];
613           v1  = av + adiag[i];
614           for (j=0; j<rnz; j++) {
615             val[nz++] = v1[j];
616           }
617         }
618       }
619     } else {
620       for (i=0; i <M; i++) {
621         rnz = ai[i+1] - adiag[i];
622         v1  = av + adiag[i];
623         for (j=0; j<rnz; j++) {
624           val[nz++] = v1[j];
625         }
626       }
627     }
628   }
629   PetscCall(MatSeqAIJRestoreArrayRead(A,&av));
630   PetscFunctionReturn(0);
631 }
632 
633 PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
634 {
635   const PetscInt    *ai,*aj,*bi,*bj,*garray,*ajj,*bjj;
636   PetscInt          bs;
637   PetscInt64        rstart,nz,i,j,k,m,jj,irow,countA,countB;
638   PetscMUMPSInt     *row,*col;
639   const PetscScalar *av,*bv,*v1,*v2;
640   PetscScalar       *val;
641   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)A->data;
642   Mat_SeqSBAIJ      *aa  = (Mat_SeqSBAIJ*)(mat->A)->data;
643   Mat_SeqBAIJ       *bb  = (Mat_SeqBAIJ*)(mat->B)->data;
644   const PetscInt    bs2=aa->bs2,mbs=aa->mbs;
645 #if defined(PETSC_USE_COMPLEX)
646   PetscBool         hermitian,isset;
647 #endif
648 
649   PetscFunctionBegin;
650 #if defined(PETSC_USE_COMPLEX)
651   PetscCall(MatIsHermitianKnown(A,&isset,&hermitian));
652   PetscCheck(!isset || !hermitian,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy");
653 #endif
654   PetscCall(MatGetBlockSize(A,&bs));
655   rstart = A->rmap->rstart;
656   ai = aa->i;
657   aj = aa->j;
658   bi = bb->i;
659   bj = bb->j;
660   av = aa->a;
661   bv = bb->a;
662 
663   garray = mat->garray;
664 
665   if (reuse == MAT_INITIAL_MATRIX) {
666     nz   = (aa->nz+bb->nz)*bs2; /* just a conservative estimate */
667     PetscCall(PetscMalloc2(nz,&row,nz,&col));
668     PetscCall(PetscMalloc1(nz,&val));
669     /* can not decide the exact mumps->nnz now because of the SBAIJ */
670     mumps->irn = row;
671     mumps->jcn = col;
672     mumps->val = mumps->val_alloc = val;
673   } else {
674     val = mumps->val;
675   }
676 
677   jj = 0; irow = rstart;
678   for (i=0; i<mbs; i++) {
679     ajj    = aj + ai[i];                 /* ptr to the beginning of this row */
680     countA = ai[i+1] - ai[i];
681     countB = bi[i+1] - bi[i];
682     bjj    = bj + bi[i];
683     v1     = av + ai[i]*bs2;
684     v2     = bv + bi[i]*bs2;
685 
686     if (bs>1) {
687       /* A-part */
688       for (j=0; j<countA; j++) {
689         for (k=0; k<bs; k++) {
690           for (m=0; m<bs; m++) {
691             if (rstart + ajj[j]*bs>irow || k>=m) {
692               if (reuse == MAT_INITIAL_MATRIX) {
693                 PetscCall(PetscMUMPSIntCast(irow + m + shift,&row[jj]));
694                 PetscCall(PetscMUMPSIntCast(rstart + ajj[j]*bs + k + shift,&col[jj]));
695               }
696               val[jj++] = v1[j*bs2 + m + k*bs];
697             }
698           }
699         }
700       }
701 
702       /* B-part */
703       for (j=0; j < countB; j++) {
704         for (k=0; k<bs; k++) {
705           for (m=0; m<bs; m++) {
706             if (reuse == MAT_INITIAL_MATRIX) {
707               PetscCall(PetscMUMPSIntCast(irow + m + shift,&row[jj]));
708               PetscCall(PetscMUMPSIntCast(garray[bjj[j]]*bs + k + shift,&col[jj]));
709             }
710             val[jj++] = v2[j*bs2 + m + k*bs];
711           }
712         }
713       }
714     } else {
715       /* A-part */
716       for (j=0; j<countA; j++) {
717         if (reuse == MAT_INITIAL_MATRIX) {
718           PetscCall(PetscMUMPSIntCast(irow + shift,&row[jj]));
719           PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]));
720         }
721         val[jj++] = v1[j];
722       }
723 
724       /* B-part */
725       for (j=0; j < countB; j++) {
726         if (reuse == MAT_INITIAL_MATRIX) {
727           PetscCall(PetscMUMPSIntCast(irow + shift,&row[jj]));
728           PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]));
729         }
730         val[jj++] = v2[j];
731       }
732     }
733     irow+=bs;
734   }
735   mumps->nnz = jj;
736   PetscFunctionReturn(0);
737 }
738 
739 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
740 {
741   const PetscInt    *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj;
742   PetscInt64        rstart,nz,i,j,jj,irow,countA,countB;
743   PetscMUMPSInt     *row,*col;
744   const PetscScalar *av, *bv,*v1,*v2;
745   PetscScalar       *val;
746   Mat               Ad,Ao;
747   Mat_SeqAIJ        *aa;
748   Mat_SeqAIJ        *bb;
749 
750   PetscFunctionBegin;
751   PetscCall(MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&garray));
752   PetscCall(MatSeqAIJGetArrayRead(Ad,&av));
753   PetscCall(MatSeqAIJGetArrayRead(Ao,&bv));
754 
755   aa = (Mat_SeqAIJ*)(Ad)->data;
756   bb = (Mat_SeqAIJ*)(Ao)->data;
757   ai = aa->i;
758   aj = aa->j;
759   bi = bb->i;
760   bj = bb->j;
761 
762   rstart = A->rmap->rstart;
763 
764   if (reuse == MAT_INITIAL_MATRIX) {
765     nz   = (PetscInt64)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */
766     PetscCall(PetscMalloc2(nz,&row,nz,&col));
767     PetscCall(PetscMalloc1(nz,&val));
768     mumps->nnz = nz;
769     mumps->irn = row;
770     mumps->jcn = col;
771     mumps->val = mumps->val_alloc = val;
772   } else {
773     val = mumps->val;
774   }
775 
776   jj = 0; irow = rstart;
777   for (i=0; i<m; i++) {
778     ajj    = aj + ai[i];                 /* ptr to the beginning of this row */
779     countA = ai[i+1] - ai[i];
780     countB = bi[i+1] - bi[i];
781     bjj    = bj + bi[i];
782     v1     = av + ai[i];
783     v2     = bv + bi[i];
784 
785     /* A-part */
786     for (j=0; j<countA; j++) {
787       if (reuse == MAT_INITIAL_MATRIX) {
788         PetscCall(PetscMUMPSIntCast(irow + shift,&row[jj]));
789         PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]));
790       }
791       val[jj++] = v1[j];
792     }
793 
794     /* B-part */
795     for (j=0; j < countB; j++) {
796       if (reuse == MAT_INITIAL_MATRIX) {
797         PetscCall(PetscMUMPSIntCast(irow + shift,&row[jj]));
798         PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]));
799       }
800       val[jj++] = v2[j];
801     }
802     irow++;
803   }
804   PetscCall(MatSeqAIJRestoreArrayRead(Ad,&av));
805   PetscCall(MatSeqAIJRestoreArrayRead(Ao,&bv));
806   PetscFunctionReturn(0);
807 }
808 
809 PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
810 {
811   Mat_MPIBAIJ       *mat    = (Mat_MPIBAIJ*)A->data;
812   Mat_SeqBAIJ       *aa     = (Mat_SeqBAIJ*)(mat->A)->data;
813   Mat_SeqBAIJ       *bb     = (Mat_SeqBAIJ*)(mat->B)->data;
814   const PetscInt    *ai     = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j,*ajj, *bjj;
815   const PetscInt    *garray = mat->garray,mbs=mat->mbs,rstart=A->rmap->rstart;
816   const PetscInt    bs2=mat->bs2;
817   PetscInt          bs;
818   PetscInt64        nz,i,j,k,n,jj,irow,countA,countB,idx;
819   PetscMUMPSInt     *row,*col;
820   const PetscScalar *av=aa->a, *bv=bb->a,*v1,*v2;
821   PetscScalar       *val;
822 
823   PetscFunctionBegin;
824   PetscCall(MatGetBlockSize(A,&bs));
825   if (reuse == MAT_INITIAL_MATRIX) {
826     nz   = bs2*(aa->nz + bb->nz);
827     PetscCall(PetscMalloc2(nz,&row,nz,&col));
828     PetscCall(PetscMalloc1(nz,&val));
829     mumps->nnz = nz;
830     mumps->irn = row;
831     mumps->jcn = col;
832     mumps->val = mumps->val_alloc = val;
833   } else {
834     val = mumps->val;
835   }
836 
837   jj = 0; irow = rstart;
838   for (i=0; i<mbs; i++) {
839     countA = ai[i+1] - ai[i];
840     countB = bi[i+1] - bi[i];
841     ajj    = aj + ai[i];
842     bjj    = bj + bi[i];
843     v1     = av + bs2*ai[i];
844     v2     = bv + bs2*bi[i];
845 
846     idx = 0;
847     /* A-part */
848     for (k=0; k<countA; k++) {
849       for (j=0; j<bs; j++) {
850         for (n=0; n<bs; n++) {
851           if (reuse == MAT_INITIAL_MATRIX) {
852             PetscCall(PetscMUMPSIntCast(irow + n + shift,&row[jj]));
853             PetscCall(PetscMUMPSIntCast(rstart + bs*ajj[k] + j + shift,&col[jj]));
854           }
855           val[jj++] = v1[idx++];
856         }
857       }
858     }
859 
860     idx = 0;
861     /* B-part */
862     for (k=0; k<countB; k++) {
863       for (j=0; j<bs; j++) {
864         for (n=0; n<bs; n++) {
865           if (reuse == MAT_INITIAL_MATRIX) {
866             PetscCall(PetscMUMPSIntCast(irow + n + shift,&row[jj]));
867             PetscCall(PetscMUMPSIntCast(bs*garray[bjj[k]] + j + shift,&col[jj]));
868           }
869           val[jj++] = v2[idx++];
870         }
871       }
872     }
873     irow += bs;
874   }
875   PetscFunctionReturn(0);
876 }
877 
878 PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
879 {
880   const PetscInt    *ai, *aj,*adiag, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj;
881   PetscInt64        rstart,nz,nza,nzb,i,j,jj,irow,countA,countB;
882   PetscMUMPSInt     *row,*col;
883   const PetscScalar *av, *bv,*v1,*v2;
884   PetscScalar       *val;
885   Mat               Ad,Ao;
886   Mat_SeqAIJ        *aa;
887   Mat_SeqAIJ        *bb;
888 #if defined(PETSC_USE_COMPLEX)
889   PetscBool         hermitian,isset;
890 #endif
891 
892   PetscFunctionBegin;
893 #if defined(PETSC_USE_COMPLEX)
894   PetscCall(MatIsHermitianKnown(A,&isset,&hermitian));
895   PetscCheck(!isset || !hermitian,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy");
896 #endif
897   PetscCall(MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&garray));
898   PetscCall(MatSeqAIJGetArrayRead(Ad,&av));
899   PetscCall(MatSeqAIJGetArrayRead(Ao,&bv));
900 
901   aa    = (Mat_SeqAIJ*)(Ad)->data;
902   bb    = (Mat_SeqAIJ*)(Ao)->data;
903   ai    = aa->i;
904   aj    = aa->j;
905   adiag = aa->diag;
906   bi    = bb->i;
907   bj    = bb->j;
908 
909   rstart = A->rmap->rstart;
910 
911   if (reuse == MAT_INITIAL_MATRIX) {
912     nza = 0;    /* num of upper triangular entries in mat->A, including diagonals */
913     nzb = 0;    /* num of upper triangular entries in mat->B */
914     for (i=0; i<m; i++) {
915       nza   += (ai[i+1] - adiag[i]);
916       countB = bi[i+1] - bi[i];
917       bjj    = bj + bi[i];
918       for (j=0; j<countB; j++) {
919         if (garray[bjj[j]] > rstart) nzb++;
920       }
921     }
922 
923     nz   = nza + nzb; /* total nz of upper triangular part of mat */
924     PetscCall(PetscMalloc2(nz,&row,nz,&col));
925     PetscCall(PetscMalloc1(nz,&val));
926     mumps->nnz = nz;
927     mumps->irn = row;
928     mumps->jcn = col;
929     mumps->val = mumps->val_alloc = val;
930   } else {
931     val = mumps->val;
932   }
933 
934   jj = 0; irow = rstart;
935   for (i=0; i<m; i++) {
936     ajj    = aj + adiag[i];                 /* ptr to the beginning of the diagonal of this row */
937     v1     = av + adiag[i];
938     countA = ai[i+1] - adiag[i];
939     countB = bi[i+1] - bi[i];
940     bjj    = bj + bi[i];
941     v2     = bv + bi[i];
942 
943     /* A-part */
944     for (j=0; j<countA; j++) {
945       if (reuse == MAT_INITIAL_MATRIX) {
946         PetscCall(PetscMUMPSIntCast(irow + shift,&row[jj]));
947         PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]));
948       }
949       val[jj++] = v1[j];
950     }
951 
952     /* B-part */
953     for (j=0; j < countB; j++) {
954       if (garray[bjj[j]] > rstart) {
955         if (reuse == MAT_INITIAL_MATRIX) {
956           PetscCall(PetscMUMPSIntCast(irow + shift,&row[jj]));
957           PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]));
958         }
959         val[jj++] = v2[j];
960       }
961     }
962     irow++;
963   }
964   PetscCall(MatSeqAIJRestoreArrayRead(Ad,&av));
965   PetscCall(MatSeqAIJRestoreArrayRead(Ao,&bv));
966   PetscFunctionReturn(0);
967 }
968 
969 PetscErrorCode MatDestroy_MUMPS(Mat A)
970 {
971   Mat_MUMPS      *mumps=(Mat_MUMPS*)A->data;
972 
973   PetscFunctionBegin;
974   PetscCall(PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc));
975   PetscCall(VecScatterDestroy(&mumps->scat_rhs));
976   PetscCall(VecScatterDestroy(&mumps->scat_sol));
977   PetscCall(VecDestroy(&mumps->b_seq));
978   PetscCall(VecDestroy(&mumps->x_seq));
979   PetscCall(PetscFree(mumps->id.perm_in));
980   PetscCall(PetscFree2(mumps->irn,mumps->jcn));
981   PetscCall(PetscFree(mumps->val_alloc));
982   PetscCall(PetscFree(mumps->info));
983   PetscCall(PetscFree(mumps->ICNTL_pre));
984   PetscCall(PetscFree(mumps->CNTL_pre));
985   PetscCall(MatMumpsResetSchur_Private(mumps));
986   if (mumps->id.job != JOB_NULL) { /* cannot call PetscMUMPS_c() if JOB_INIT has never been called for this instance */
987     mumps->id.job = JOB_END;
988     PetscMUMPS_c(mumps);
989     PetscCheck(mumps->id.INFOG(1) >= 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in MatDestroy_MUMPS: INFOG(1)=%d",mumps->id.INFOG(1));
990     if (mumps->mumps_comm != MPI_COMM_NULL) {
991       if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) PetscCallMPI(MPI_Comm_free(&mumps->mumps_comm));
992       else PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A),&mumps->mumps_comm));
993     }
994   }
995 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
996   if (mumps->use_petsc_omp_support) {
997     PetscCall(PetscOmpCtrlDestroy(&mumps->omp_ctrl));
998     PetscCall(PetscFree2(mumps->rhs_loc,mumps->rhs_recvbuf));
999     PetscCall(PetscFree3(mumps->rhs_nrow,mumps->rhs_recvcounts,mumps->rhs_disps));
1000   }
1001 #endif
1002   PetscCall(PetscFree(mumps->ia_alloc));
1003   PetscCall(PetscFree(mumps->ja_alloc));
1004   PetscCall(PetscFree(mumps->recvcount));
1005   PetscCall(PetscFree(mumps->reqs));
1006   PetscCall(PetscFree(mumps->irhs_loc));
1007   PetscCall(PetscFree(A->data));
1008 
1009   /* clear composed functions */
1010   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL));
1011   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatFactorSetSchurIS_C",NULL));
1012   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatFactorCreateSchurComplement_C",NULL));
1013   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetIcntl_C",NULL));
1014   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetIcntl_C",NULL));
1015   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetCntl_C",NULL));
1016   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetCntl_C",NULL));
1017   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfo_C",NULL));
1018   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfog_C",NULL));
1019   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfo_C",NULL));
1020   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfog_C",NULL));
1021   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverse_C",NULL));
1022   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverseTranspose_C",NULL));
1023   PetscFunctionReturn(0);
1024 }
1025 
1026 /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */
1027 static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A,PetscInt nrhs,const PetscScalar *array)
1028 {
1029   Mat_MUMPS          *mumps=(Mat_MUMPS*)A->data;
1030   const PetscMPIInt  ompsize=mumps->omp_comm_size;
1031   PetscInt           i,m,M,rstart;
1032 
1033   PetscFunctionBegin;
1034   PetscCall(MatGetSize(A,&M,NULL));
1035   PetscCall(MatGetLocalSize(A,&m,NULL));
1036   PetscCheck(M <= PETSC_MUMPS_INT_MAX,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscInt too long for PetscMUMPSInt");
1037   if (ompsize == 1) {
1038     if (!mumps->irhs_loc) {
1039       mumps->nloc_rhs = m;
1040       PetscCall(PetscMalloc1(m,&mumps->irhs_loc));
1041       PetscCall(MatGetOwnershipRange(A,&rstart,NULL));
1042       for (i=0; i<m; i++) mumps->irhs_loc[i] = rstart+i+1; /* use 1-based indices */
1043     }
1044     mumps->id.rhs_loc = (MumpsScalar*)array;
1045   } else {
1046   #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1047     const PetscInt  *ranges;
1048     PetscMPIInt     j,k,sendcount,*petsc_ranks,*omp_ranks;
1049     MPI_Group       petsc_group,omp_group;
1050     PetscScalar     *recvbuf=NULL;
1051 
1052     if (mumps->is_omp_master) {
1053       /* Lazily initialize the omp stuff for distributed rhs */
1054       if (!mumps->irhs_loc) {
1055         PetscCall(PetscMalloc2(ompsize,&omp_ranks,ompsize,&petsc_ranks));
1056         PetscCall(PetscMalloc3(ompsize,&mumps->rhs_nrow,ompsize,&mumps->rhs_recvcounts,ompsize,&mumps->rhs_disps));
1057         PetscCallMPI(MPI_Comm_group(mumps->petsc_comm,&petsc_group));
1058         PetscCallMPI(MPI_Comm_group(mumps->omp_comm,&omp_group));
1059         for (j=0; j<ompsize; j++) omp_ranks[j] = j;
1060         PetscCallMPI(MPI_Group_translate_ranks(omp_group,ompsize,omp_ranks,petsc_group,petsc_ranks));
1061 
1062         /* Populate mumps->irhs_loc[], rhs_nrow[] */
1063         mumps->nloc_rhs = 0;
1064         PetscCall(MatGetOwnershipRanges(A,&ranges));
1065         for (j=0; j<ompsize; j++) {
1066           mumps->rhs_nrow[j] = ranges[petsc_ranks[j]+1] - ranges[petsc_ranks[j]];
1067           mumps->nloc_rhs   += mumps->rhs_nrow[j];
1068         }
1069         PetscCall(PetscMalloc1(mumps->nloc_rhs,&mumps->irhs_loc));
1070         for (j=k=0; j<ompsize; j++) {
1071           for (i=ranges[petsc_ranks[j]]; i<ranges[petsc_ranks[j]+1]; i++,k++) mumps->irhs_loc[k] = i+1; /* uses 1-based indices */
1072         }
1073 
1074         PetscCall(PetscFree2(omp_ranks,petsc_ranks));
1075         PetscCallMPI(MPI_Group_free(&petsc_group));
1076         PetscCallMPI(MPI_Group_free(&omp_group));
1077       }
1078 
1079       /* Realloc buffers when current nrhs is bigger than what we have met */
1080       if (nrhs > mumps->max_nrhs) {
1081         PetscCall(PetscFree2(mumps->rhs_loc,mumps->rhs_recvbuf));
1082         PetscCall(PetscMalloc2(mumps->nloc_rhs*nrhs,&mumps->rhs_loc,mumps->nloc_rhs*nrhs,&mumps->rhs_recvbuf));
1083         mumps->max_nrhs = nrhs;
1084       }
1085 
1086       /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */
1087       for (j=0; j<ompsize; j++) PetscCall(PetscMPIIntCast(mumps->rhs_nrow[j]*nrhs,&mumps->rhs_recvcounts[j]));
1088       mumps->rhs_disps[0] = 0;
1089       for (j=1; j<ompsize; j++) {
1090         mumps->rhs_disps[j] = mumps->rhs_disps[j-1] + mumps->rhs_recvcounts[j-1];
1091         PetscCheck(mumps->rhs_disps[j] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscMPIInt overflow!");
1092       }
1093       recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */
1094     }
1095 
1096     PetscCall(PetscMPIIntCast(m*nrhs,&sendcount));
1097     PetscCallMPI(MPI_Gatherv(array,sendcount,MPIU_SCALAR,recvbuf,mumps->rhs_recvcounts,mumps->rhs_disps,MPIU_SCALAR,0,mumps->omp_comm));
1098 
1099     if (mumps->is_omp_master) {
1100       if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */
1101         PetscScalar *dst,*dstbase = mumps->rhs_loc;
1102         for (j=0; j<ompsize; j++) {
1103           const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j];
1104           dst = dstbase;
1105           for (i=0; i<nrhs; i++) {
1106             PetscCall(PetscArraycpy(dst,src,mumps->rhs_nrow[j]));
1107             src += mumps->rhs_nrow[j];
1108             dst += mumps->nloc_rhs;
1109           }
1110           dstbase += mumps->rhs_nrow[j];
1111         }
1112       }
1113       mumps->id.rhs_loc = (MumpsScalar*)mumps->rhs_loc;
1114     }
1115   #endif /* PETSC_HAVE_OPENMP_SUPPORT */
1116   }
1117   mumps->id.nrhs     = nrhs;
1118   mumps->id.nloc_rhs = mumps->nloc_rhs;
1119   mumps->id.lrhs_loc = mumps->nloc_rhs;
1120   mumps->id.irhs_loc = mumps->irhs_loc;
1121   PetscFunctionReturn(0);
1122 }
1123 
1124 PetscErrorCode MatSolve_MUMPS(Mat A,Vec b,Vec x)
1125 {
1126   Mat_MUMPS          *mumps=(Mat_MUMPS*)A->data;
1127   const PetscScalar  *rarray = NULL;
1128   PetscScalar        *array;
1129   IS                 is_iden,is_petsc;
1130   PetscInt           i;
1131   PetscBool          second_solve = PETSC_FALSE;
1132   static PetscBool   cite1 = PETSC_FALSE,cite2 = PETSC_FALSE;
1133 
1134   PetscFunctionBegin;
1135   PetscCall(PetscCitationsRegister("@article{MUMPS01,\n  author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n  title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n  journal = {SIAM Journal on Matrix Analysis and Applications},\n  volume = {23},\n  number = {1},\n  pages = {15--41},\n  year = {2001}\n}\n",&cite1));
1136   PetscCall(PetscCitationsRegister("@article{MUMPS02,\n  author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n  title = {Hybrid scheduling for the parallel solution of linear systems},\n  journal = {Parallel Computing},\n  volume = {32},\n  number = {2},\n  pages = {136--156},\n  year = {2006}\n}\n",&cite2));
1137 
1138   if (A->factorerrortype) {
1139     PetscCall(PetscInfo(A,"MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)));
1140     PetscCall(VecSetInf(x));
1141     PetscFunctionReturn(0);
1142   }
1143 
1144   mumps->id.nrhs = 1;
1145   if (mumps->petsc_size > 1) {
1146     if (mumps->ICNTL20 == 10) {
1147       mumps->id.ICNTL(20) = 10; /* dense distributed RHS */
1148       PetscCall(VecGetArrayRead(b,&rarray));
1149       PetscCall(MatMumpsSetUpDistRHSInfo(A,1,rarray));
1150     } else {
1151       mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential rhs vector*/
1152       PetscCall(VecScatterBegin(mumps->scat_rhs,b,mumps->b_seq,INSERT_VALUES,SCATTER_FORWARD));
1153       PetscCall(VecScatterEnd(mumps->scat_rhs,b,mumps->b_seq,INSERT_VALUES,SCATTER_FORWARD));
1154       if (!mumps->myid) {
1155         PetscCall(VecGetArray(mumps->b_seq,&array));
1156         mumps->id.rhs = (MumpsScalar*)array;
1157       }
1158     }
1159   } else {  /* petsc_size == 1 */
1160     mumps->id.ICNTL(20) = 0; /* dense centralized RHS */
1161     PetscCall(VecCopy(b,x));
1162     PetscCall(VecGetArray(x,&array));
1163     mumps->id.rhs = (MumpsScalar*)array;
1164   }
1165 
1166   /*
1167      handle condensation step of Schur complement (if any)
1168      We set by default ICNTL(26) == -1 when Schur indices have been provided by the user.
1169      According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase
1170      Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system.
1171      This requires an extra call to PetscMUMPS_c and the computation of the factors for S
1172   */
1173   if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) {
1174     PetscCheck(mumps->petsc_size <= 1,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc");
1175     second_solve = PETSC_TRUE;
1176     PetscCall(MatMumpsHandleSchur_Private(A,PETSC_FALSE));
1177   }
1178   /* solve phase */
1179   /*-------------*/
1180   mumps->id.job = JOB_SOLVE;
1181   PetscMUMPS_c(mumps);
1182   PetscCheck(mumps->id.INFOG(1) >= 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d",mumps->id.INFOG(1));
1183 
1184   /* handle expansion step of Schur complement (if any) */
1185   if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A,PETSC_TRUE));
1186 
1187   if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */
1188     if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) {
1189       /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */
1190       PetscCall(VecScatterDestroy(&mumps->scat_sol));
1191     }
1192     if (!mumps->scat_sol) { /* create scatter scat_sol */
1193       PetscInt *isol2_loc=NULL;
1194       PetscCall(ISCreateStride(PETSC_COMM_SELF,mumps->id.lsol_loc,0,1,&is_iden)); /* from */
1195       PetscCall(PetscMalloc1(mumps->id.lsol_loc,&isol2_loc));
1196       for (i=0; i<mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i]-1; /* change Fortran style to C style */
1197       PetscCall(ISCreateGeneral(PETSC_COMM_SELF,mumps->id.lsol_loc,isol2_loc,PETSC_OWN_POINTER,&is_petsc));  /* to */
1198       PetscCall(VecScatterCreate(mumps->x_seq,is_iden,x,is_petsc,&mumps->scat_sol));
1199       PetscCall(ISDestroy(&is_iden));
1200       PetscCall(ISDestroy(&is_petsc));
1201       mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */
1202     }
1203 
1204     PetscCall(VecScatterBegin(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD));
1205     PetscCall(VecScatterEnd(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD));
1206   }
1207 
1208   if (mumps->petsc_size > 1) {
1209     if (mumps->ICNTL20 == 10) {
1210       PetscCall(VecRestoreArrayRead(b,&rarray));
1211     } else if (!mumps->myid) {
1212       PetscCall(VecRestoreArray(mumps->b_seq,&array));
1213     }
1214   } else PetscCall(VecRestoreArray(x,&array));
1215 
1216   PetscCall(PetscLogFlops(2.0*mumps->id.RINFO(3)));
1217   PetscFunctionReturn(0);
1218 }
1219 
1220 PetscErrorCode MatSolveTranspose_MUMPS(Mat A,Vec b,Vec x)
1221 {
1222   Mat_MUMPS      *mumps=(Mat_MUMPS*)A->data;
1223 
1224   PetscFunctionBegin;
1225   mumps->id.ICNTL(9) = 0;
1226   PetscCall(MatSolve_MUMPS(A,b,x));
1227   mumps->id.ICNTL(9) = 1;
1228   PetscFunctionReturn(0);
1229 }
1230 
1231 PetscErrorCode MatMatSolve_MUMPS(Mat A,Mat B,Mat X)
1232 {
1233   Mat               Bt = NULL;
1234   PetscBool         denseX,denseB,flg,flgT;
1235   Mat_MUMPS         *mumps=(Mat_MUMPS*)A->data;
1236   PetscInt          i,nrhs,M;
1237   PetscScalar       *array;
1238   const PetscScalar *rbray;
1239   PetscInt          lsol_loc,nlsol_loc,*idxx,iidx = 0;
1240   PetscMUMPSInt     *isol_loc,*isol_loc_save;
1241   PetscScalar       *bray,*sol_loc,*sol_loc_save;
1242   IS                is_to,is_from;
1243   PetscInt          k,proc,j,m,myrstart;
1244   const PetscInt    *rstart;
1245   Vec               v_mpi,msol_loc;
1246   VecScatter        scat_sol;
1247   Vec               b_seq;
1248   VecScatter        scat_rhs;
1249   PetscScalar       *aa;
1250   PetscInt          spnr,*ia,*ja;
1251   Mat_MPIAIJ        *b = NULL;
1252 
1253   PetscFunctionBegin;
1254   PetscCall(PetscObjectTypeCompareAny((PetscObject)X,&denseX,MATSEQDENSE,MATMPIDENSE,NULL));
1255   PetscCheck(denseX,PetscObjectComm((PetscObject)X),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix");
1256 
1257   PetscCall(PetscObjectTypeCompareAny((PetscObject)B,&denseB,MATSEQDENSE,MATMPIDENSE,NULL));
1258   if (denseB) {
1259     PetscCheck(B->rmap->n == X->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Matrix B and X must have same row distribution");
1260     mumps->id.ICNTL(20)= 0; /* dense RHS */
1261   } else { /* sparse B */
1262     PetscCheck(X != B,PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_IDN,"X and B must be different matrices");
1263     PetscCall(PetscObjectTypeCompare((PetscObject)B,MATTRANSPOSEMAT,&flgT));
1264     if (flgT) { /* input B is transpose of actural RHS matrix,
1265                  because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */
1266       PetscCall(MatTransposeGetMat(B,&Bt));
1267     } else SETERRQ(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_WRONG,"Matrix B must be MATTRANSPOSEMAT matrix");
1268     mumps->id.ICNTL(20)= 1; /* sparse RHS */
1269   }
1270 
1271   PetscCall(MatGetSize(B,&M,&nrhs));
1272   mumps->id.nrhs = nrhs;
1273   mumps->id.lrhs = M;
1274   mumps->id.rhs  = NULL;
1275 
1276   if (mumps->petsc_size == 1) {
1277     PetscScalar *aa;
1278     PetscInt    spnr,*ia,*ja;
1279     PetscBool   second_solve = PETSC_FALSE;
1280 
1281     PetscCall(MatDenseGetArray(X,&array));
1282     mumps->id.rhs = (MumpsScalar*)array;
1283 
1284     if (denseB) {
1285       /* copy B to X */
1286       PetscCall(MatDenseGetArrayRead(B,&rbray));
1287       PetscCall(PetscArraycpy(array,rbray,M*nrhs));
1288       PetscCall(MatDenseRestoreArrayRead(B,&rbray));
1289     } else { /* sparse B */
1290       PetscCall(MatSeqAIJGetArray(Bt,&aa));
1291       PetscCall(MatGetRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg));
1292       PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure");
1293       PetscCall(PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs));
1294       mumps->id.rhs_sparse  = (MumpsScalar*)aa;
1295     }
1296     /* handle condensation step of Schur complement (if any) */
1297     if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) {
1298       second_solve = PETSC_TRUE;
1299       PetscCall(MatMumpsHandleSchur_Private(A,PETSC_FALSE));
1300     }
1301     /* solve phase */
1302     /*-------------*/
1303     mumps->id.job = JOB_SOLVE;
1304     PetscMUMPS_c(mumps);
1305     PetscCheck(mumps->id.INFOG(1) >= 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d",mumps->id.INFOG(1));
1306 
1307     /* handle expansion step of Schur complement (if any) */
1308     if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A,PETSC_TRUE));
1309     if (!denseB) { /* sparse B */
1310       PetscCall(MatSeqAIJRestoreArray(Bt,&aa));
1311       PetscCall(MatRestoreRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg));
1312       PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure");
1313     }
1314     PetscCall(MatDenseRestoreArray(X,&array));
1315     PetscFunctionReturn(0);
1316   }
1317 
1318   /*--------- parallel case: MUMPS requires rhs B to be centralized on the host! --------*/
1319   PetscCheck(mumps->petsc_size <= 1 || !mumps->id.ICNTL(19),PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc");
1320 
1321   /* create msol_loc to hold mumps local solution */
1322   isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */
1323   sol_loc_save  = (PetscScalar*)mumps->id.sol_loc;
1324 
1325   lsol_loc  = mumps->id.lsol_loc;
1326   nlsol_loc = nrhs*lsol_loc;     /* length of sol_loc */
1327   PetscCall(PetscMalloc2(nlsol_loc,&sol_loc,lsol_loc,&isol_loc));
1328   mumps->id.sol_loc  = (MumpsScalar*)sol_loc;
1329   mumps->id.isol_loc = isol_loc;
1330 
1331   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,nlsol_loc,(PetscScalar*)sol_loc,&msol_loc));
1332 
1333   if (denseB) {
1334     if (mumps->ICNTL20 == 10) {
1335       mumps->id.ICNTL(20) = 10; /* dense distributed RHS */
1336       PetscCall(MatDenseGetArrayRead(B,&rbray));
1337       PetscCall(MatMumpsSetUpDistRHSInfo(A,nrhs,rbray));
1338       PetscCall(MatDenseRestoreArrayRead(B,&rbray));
1339       PetscCall(MatGetLocalSize(B,&m,NULL));
1340       PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),1,nrhs*m,nrhs*M,NULL,&v_mpi));
1341     } else {
1342       mumps->id.ICNTL(20) = 0; /* dense centralized RHS */
1343       /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in
1344         very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank
1345         0, re-arrange B into desired order, which is a local operation.
1346       */
1347 
1348       /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */
1349       /* wrap dense rhs matrix B into a vector v_mpi */
1350       PetscCall(MatGetLocalSize(B,&m,NULL));
1351       PetscCall(MatDenseGetArray(B,&bray));
1352       PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi));
1353       PetscCall(MatDenseRestoreArray(B,&bray));
1354 
1355       /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */
1356       if (!mumps->myid) {
1357         PetscInt *idx;
1358         /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */
1359         PetscCall(PetscMalloc1(nrhs*M,&idx));
1360         PetscCall(MatGetOwnershipRanges(B,&rstart));
1361         k = 0;
1362         for (proc=0; proc<mumps->petsc_size; proc++) {
1363           for (j=0; j<nrhs; j++) {
1364             for (i=rstart[proc]; i<rstart[proc+1]; i++) idx[k++] = j*M + i;
1365           }
1366         }
1367 
1368         PetscCall(VecCreateSeq(PETSC_COMM_SELF,nrhs*M,&b_seq));
1369         PetscCall(ISCreateGeneral(PETSC_COMM_SELF,nrhs*M,idx,PETSC_OWN_POINTER,&is_to));
1370         PetscCall(ISCreateStride(PETSC_COMM_SELF,nrhs*M,0,1,&is_from));
1371       } else {
1372         PetscCall(VecCreateSeq(PETSC_COMM_SELF,0,&b_seq));
1373         PetscCall(ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_to));
1374         PetscCall(ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_from));
1375       }
1376       PetscCall(VecScatterCreate(v_mpi,is_from,b_seq,is_to,&scat_rhs));
1377       PetscCall(VecScatterBegin(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD));
1378       PetscCall(ISDestroy(&is_to));
1379       PetscCall(ISDestroy(&is_from));
1380       PetscCall(VecScatterEnd(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD));
1381 
1382       if (!mumps->myid) { /* define rhs on the host */
1383         PetscCall(VecGetArray(b_seq,&bray));
1384         mumps->id.rhs = (MumpsScalar*)bray;
1385         PetscCall(VecRestoreArray(b_seq,&bray));
1386       }
1387     }
1388   } else { /* sparse B */
1389     b = (Mat_MPIAIJ*)Bt->data;
1390 
1391     /* wrap dense X into a vector v_mpi */
1392     PetscCall(MatGetLocalSize(X,&m,NULL));
1393     PetscCall(MatDenseGetArray(X,&bray));
1394     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)X),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi));
1395     PetscCall(MatDenseRestoreArray(X,&bray));
1396 
1397     if (!mumps->myid) {
1398       PetscCall(MatSeqAIJGetArray(b->A,&aa));
1399       PetscCall(MatGetRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg));
1400       PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure");
1401       PetscCall(PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs));
1402       mumps->id.rhs_sparse  = (MumpsScalar*)aa;
1403     } else {
1404       mumps->id.irhs_ptr    = NULL;
1405       mumps->id.irhs_sparse = NULL;
1406       mumps->id.nz_rhs      = 0;
1407       mumps->id.rhs_sparse  = NULL;
1408     }
1409   }
1410 
1411   /* solve phase */
1412   /*-------------*/
1413   mumps->id.job = JOB_SOLVE;
1414   PetscMUMPS_c(mumps);
1415   PetscCheck(mumps->id.INFOG(1) >= 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d",mumps->id.INFOG(1));
1416 
1417   /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */
1418   PetscCall(MatDenseGetArray(X,&array));
1419   PetscCall(VecPlaceArray(v_mpi,array));
1420 
1421   /* create scatter scat_sol */
1422   PetscCall(MatGetOwnershipRanges(X,&rstart));
1423   /* iidx: index for scatter mumps solution to petsc X */
1424 
1425   PetscCall(ISCreateStride(PETSC_COMM_SELF,nlsol_loc,0,1,&is_from));
1426   PetscCall(PetscMalloc1(nlsol_loc,&idxx));
1427   for (i=0; i<lsol_loc; i++) {
1428     isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */
1429 
1430     for (proc=0; proc<mumps->petsc_size; proc++) {
1431       if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc+1]) {
1432         myrstart = rstart[proc];
1433         k        = isol_loc[i] - myrstart;        /* local index on 1st column of petsc vector X */
1434         iidx     = k + myrstart*nrhs;             /* maps mumps isol_loc[i] to petsc index in X */
1435         m        = rstart[proc+1] - rstart[proc]; /* rows of X for this proc */
1436         break;
1437       }
1438     }
1439 
1440     for (j=0; j<nrhs; j++) idxx[i+j*lsol_loc] = iidx + j*m;
1441   }
1442   PetscCall(ISCreateGeneral(PETSC_COMM_SELF,nlsol_loc,idxx,PETSC_COPY_VALUES,&is_to));
1443   PetscCall(VecScatterCreate(msol_loc,is_from,v_mpi,is_to,&scat_sol));
1444   PetscCall(VecScatterBegin(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD));
1445   PetscCall(ISDestroy(&is_from));
1446   PetscCall(ISDestroy(&is_to));
1447   PetscCall(VecScatterEnd(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD));
1448   PetscCall(MatDenseRestoreArray(X,&array));
1449 
1450   /* free spaces */
1451   mumps->id.sol_loc  = (MumpsScalar*)sol_loc_save;
1452   mumps->id.isol_loc = isol_loc_save;
1453 
1454   PetscCall(PetscFree2(sol_loc,isol_loc));
1455   PetscCall(PetscFree(idxx));
1456   PetscCall(VecDestroy(&msol_loc));
1457   PetscCall(VecDestroy(&v_mpi));
1458   if (!denseB) {
1459     if (!mumps->myid) {
1460       b = (Mat_MPIAIJ*)Bt->data;
1461       PetscCall(MatSeqAIJRestoreArray(b->A,&aa));
1462       PetscCall(MatRestoreRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg));
1463       PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure");
1464     }
1465   } else {
1466     if (mumps->ICNTL20 == 0) {
1467       PetscCall(VecDestroy(&b_seq));
1468       PetscCall(VecScatterDestroy(&scat_rhs));
1469     }
1470   }
1471   PetscCall(VecScatterDestroy(&scat_sol));
1472   PetscCall(PetscLogFlops(2.0*nrhs*mumps->id.RINFO(3)));
1473   PetscFunctionReturn(0);
1474 }
1475 
1476 PetscErrorCode MatMatSolveTranspose_MUMPS(Mat A,Mat B,Mat X)
1477 {
1478   Mat_MUMPS     *mumps=(Mat_MUMPS*)A->data;
1479   PetscMUMPSInt oldvalue = mumps->id.ICNTL(9);
1480 
1481   PetscFunctionBegin;
1482   mumps->id.ICNTL(9) = 0;
1483   PetscCall(MatMatSolve_MUMPS(A,B,X));
1484   mumps->id.ICNTL(9) = oldvalue;
1485   PetscFunctionReturn(0);
1486 }
1487 
1488 PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A,Mat Bt,Mat X)
1489 {
1490   PetscBool      flg;
1491   Mat            B;
1492 
1493   PetscFunctionBegin;
1494   PetscCall(PetscObjectTypeCompareAny((PetscObject)Bt,&flg,MATSEQAIJ,MATMPIAIJ,NULL));
1495   PetscCheck(flg,PetscObjectComm((PetscObject)Bt),PETSC_ERR_ARG_WRONG,"Matrix Bt must be MATAIJ matrix");
1496 
1497   /* Create B=Bt^T that uses Bt's data structure */
1498   PetscCall(MatCreateTranspose(Bt,&B));
1499 
1500   PetscCall(MatMatSolve_MUMPS(A,B,X));
1501   PetscCall(MatDestroy(&B));
1502   PetscFunctionReturn(0);
1503 }
1504 
1505 #if !defined(PETSC_USE_COMPLEX)
1506 /*
1507   input:
1508    F:        numeric factor
1509   output:
1510    nneg:     total number of negative pivots
1511    nzero:    total number of zero pivots
1512    npos:     (global dimension of F) - nneg - nzero
1513 */
1514 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F,PetscInt *nneg,PetscInt *nzero,PetscInt *npos)
1515 {
1516   Mat_MUMPS      *mumps =(Mat_MUMPS*)F->data;
1517   PetscMPIInt    size;
1518 
1519   PetscFunctionBegin;
1520   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)F),&size));
1521   /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */
1522   PetscCheck(size <= 1 || mumps->id.ICNTL(13) == 1,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia",mumps->id.INFOG(13));
1523 
1524   if (nneg) *nneg = mumps->id.INFOG(12);
1525   if (nzero || npos) {
1526     PetscCheck(mumps->id.ICNTL(24) == 1,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-mat_mumps_icntl_24 must be set as 1 for null pivot row detection");
1527     if (nzero) *nzero = mumps->id.INFOG(28);
1528     if (npos) *npos   = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28));
1529   }
1530   PetscFunctionReturn(0);
1531 }
1532 #endif
1533 
1534 PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse,Mat_MUMPS *mumps)
1535 {
1536   PetscInt       i,nreqs;
1537   PetscMUMPSInt  *irn,*jcn;
1538   PetscMPIInt    count;
1539   PetscInt64     totnnz,remain;
1540   const PetscInt osize=mumps->omp_comm_size;
1541   PetscScalar    *val;
1542 
1543   PetscFunctionBegin;
1544   if (osize > 1) {
1545     if (reuse == MAT_INITIAL_MATRIX) {
1546       /* master first gathers counts of nonzeros to receive */
1547       if (mumps->is_omp_master) PetscCall(PetscMalloc1(osize,&mumps->recvcount));
1548       PetscCallMPI(MPI_Gather(&mumps->nnz,1,MPIU_INT64,mumps->recvcount,1,MPIU_INT64,0/*master*/,mumps->omp_comm));
1549 
1550       /* Then each computes number of send/recvs */
1551       if (mumps->is_omp_master) {
1552         /* Start from 1 since self communication is not done in MPI */
1553         nreqs = 0;
1554         for (i=1; i<osize; i++) nreqs += (mumps->recvcount[i]+PETSC_MPI_INT_MAX-1)/PETSC_MPI_INT_MAX;
1555       } else {
1556         nreqs = (mumps->nnz+PETSC_MPI_INT_MAX-1)/PETSC_MPI_INT_MAX;
1557       }
1558       PetscCall(PetscMalloc1(nreqs*3,&mumps->reqs)); /* Triple the requests since we send irn, jcn and val seperately */
1559 
1560       /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others.
1561          MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz
1562          might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size
1563          is very small, the current approach should have no extra overhead compared to MPI_Gatherv.
1564        */
1565       nreqs = 0; /* counter for actual send/recvs */
1566       if (mumps->is_omp_master) {
1567         for (i=0,totnnz=0; i<osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */
1568         PetscCall(PetscMalloc2(totnnz,&irn,totnnz,&jcn));
1569         PetscCall(PetscMalloc1(totnnz,&val));
1570 
1571         /* Self communication */
1572         PetscCall(PetscArraycpy(irn,mumps->irn,mumps->nnz));
1573         PetscCall(PetscArraycpy(jcn,mumps->jcn,mumps->nnz));
1574         PetscCall(PetscArraycpy(val,mumps->val,mumps->nnz));
1575 
1576         /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */
1577         PetscCall(PetscFree2(mumps->irn,mumps->jcn));
1578         PetscCall(PetscFree(mumps->val_alloc));
1579         mumps->nnz = totnnz;
1580         mumps->irn = irn;
1581         mumps->jcn = jcn;
1582         mumps->val = mumps->val_alloc = val;
1583 
1584         irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */
1585         jcn += mumps->recvcount[0];
1586         val += mumps->recvcount[0];
1587 
1588         /* Remote communication */
1589         for (i=1; i<osize; i++) {
1590           count  = PetscMin(mumps->recvcount[i],PETSC_MPI_INT_MAX);
1591           remain = mumps->recvcount[i] - count;
1592           while (count>0) {
1593             PetscCallMPI(MPI_Irecv(irn,count,MPIU_MUMPSINT,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]));
1594             PetscCallMPI(MPI_Irecv(jcn,count,MPIU_MUMPSINT,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]));
1595             PetscCallMPI(MPI_Irecv(val,count,MPIU_SCALAR,  i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]));
1596             irn    += count;
1597             jcn    += count;
1598             val    += count;
1599             count   = PetscMin(remain,PETSC_MPI_INT_MAX);
1600             remain -= count;
1601           }
1602         }
1603       } else {
1604         irn    = mumps->irn;
1605         jcn    = mumps->jcn;
1606         val    = mumps->val;
1607         count  = PetscMin(mumps->nnz,PETSC_MPI_INT_MAX);
1608         remain = mumps->nnz - count;
1609         while (count>0) {
1610           PetscCallMPI(MPI_Isend(irn,count,MPIU_MUMPSINT,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]));
1611           PetscCallMPI(MPI_Isend(jcn,count,MPIU_MUMPSINT,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]));
1612           PetscCallMPI(MPI_Isend(val,count,MPIU_SCALAR,  0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]));
1613           irn    += count;
1614           jcn    += count;
1615           val    += count;
1616           count   = PetscMin(remain,PETSC_MPI_INT_MAX);
1617           remain -= count;
1618         }
1619       }
1620     } else {
1621       nreqs = 0;
1622       if (mumps->is_omp_master) {
1623         val = mumps->val + mumps->recvcount[0];
1624         for (i=1; i<osize; i++) { /* Remote communication only since self data is already in place */
1625           count  = PetscMin(mumps->recvcount[i],PETSC_MPI_INT_MAX);
1626           remain = mumps->recvcount[i] - count;
1627           while (count>0) {
1628             PetscCallMPI(MPI_Irecv(val,count,MPIU_SCALAR,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]));
1629             val    += count;
1630             count   = PetscMin(remain,PETSC_MPI_INT_MAX);
1631             remain -= count;
1632           }
1633         }
1634       } else {
1635         val    = mumps->val;
1636         count  = PetscMin(mumps->nnz,PETSC_MPI_INT_MAX);
1637         remain = mumps->nnz - count;
1638         while (count>0) {
1639           PetscCallMPI(MPI_Isend(val,count,MPIU_SCALAR,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]));
1640           val    += count;
1641           count   = PetscMin(remain,PETSC_MPI_INT_MAX);
1642           remain -= count;
1643         }
1644       }
1645     }
1646     PetscCallMPI(MPI_Waitall(nreqs,mumps->reqs,MPI_STATUSES_IGNORE));
1647     mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */
1648   }
1649   PetscFunctionReturn(0);
1650 }
1651 
1652 PetscErrorCode MatFactorNumeric_MUMPS(Mat F,Mat A,const MatFactorInfo *info)
1653 {
1654   Mat_MUMPS      *mumps =(Mat_MUMPS*)(F)->data;
1655   PetscBool      isMPIAIJ;
1656 
1657   PetscFunctionBegin;
1658   if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) {
1659     if (mumps->id.INFOG(1) == -6) {
1660       PetscCall(PetscInfo(A,"MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)));
1661     }
1662     PetscCall(PetscInfo(A,"MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)));
1663     PetscFunctionReturn(0);
1664   }
1665 
1666   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps));
1667   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX,mumps));
1668 
1669   /* numerical factorization phase */
1670   /*-------------------------------*/
1671   mumps->id.job = JOB_FACTNUMERIC;
1672   if (!mumps->id.ICNTL(18)) { /* A is centralized */
1673     if (!mumps->myid) {
1674       mumps->id.a = (MumpsScalar*)mumps->val;
1675     }
1676   } else {
1677     mumps->id.a_loc = (MumpsScalar*)mumps->val;
1678   }
1679   PetscMUMPS_c(mumps);
1680   if (mumps->id.INFOG(1) < 0) {
1681     PetscCheck(!A->erroriffailure,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d",mumps->id.INFOG(1),mumps->id.INFO(2));
1682     if (mumps->id.INFOG(1) == -10) { /* numerically singular matrix */
1683       PetscCall(PetscInfo(F,"matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)));
1684       F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
1685     } else if (mumps->id.INFOG(1) == -13) {
1686       PetscCall(PetscInfo(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, cannot allocate required memory %d megabytes\n",mumps->id.INFOG(1),mumps->id.INFO(2)));
1687       F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1688     } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) {
1689       PetscCall(PetscInfo(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d, problem with workarray \n",mumps->id.INFOG(1),mumps->id.INFO(2)));
1690       F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1691     } else {
1692       PetscCall(PetscInfo(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)));
1693       F->factorerrortype = MAT_FACTOR_OTHER;
1694     }
1695   }
1696   PetscCheck(mumps->myid || mumps->id.ICNTL(16) <= 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"  mumps->id.ICNTL(16):=%d",mumps->id.INFOG(16));
1697 
1698   F->assembled    = PETSC_TRUE;
1699 
1700   if (F->schur) { /* reset Schur status to unfactored */
1701 #if defined(PETSC_HAVE_CUDA)
1702     F->schur->offloadmask = PETSC_OFFLOAD_CPU;
1703 #endif
1704     if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
1705       mumps->id.ICNTL(19) = 2;
1706       PetscCall(MatTranspose(F->schur,MAT_INPLACE_MATRIX,&F->schur));
1707     }
1708     PetscCall(MatFactorRestoreSchurComplement(F,NULL,MAT_FACTOR_SCHUR_UNFACTORED));
1709   }
1710 
1711   /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */
1712   if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3;
1713 
1714   if (!mumps->is_omp_master) mumps->id.INFO(23) = 0;
1715   if (mumps->petsc_size > 1) {
1716     PetscInt    lsol_loc;
1717     PetscScalar *sol_loc;
1718 
1719     PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&isMPIAIJ));
1720 
1721     /* distributed solution; Create x_seq=sol_loc for repeated use */
1722     if (mumps->x_seq) {
1723       PetscCall(VecScatterDestroy(&mumps->scat_sol));
1724       PetscCall(PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc));
1725       PetscCall(VecDestroy(&mumps->x_seq));
1726     }
1727     lsol_loc = mumps->id.INFO(23); /* length of sol_loc */
1728     PetscCall(PetscMalloc2(lsol_loc,&sol_loc,lsol_loc,&mumps->id.isol_loc));
1729     mumps->id.lsol_loc = lsol_loc;
1730     mumps->id.sol_loc = (MumpsScalar*)sol_loc;
1731     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,lsol_loc,sol_loc,&mumps->x_seq));
1732   }
1733   PetscCall(PetscLogFlops(mumps->id.RINFO(2)));
1734   PetscFunctionReturn(0);
1735 }
1736 
1737 /* Sets MUMPS options from the options database */
1738 PetscErrorCode MatSetFromOptions_MUMPS(Mat F, Mat A)
1739 {
1740   Mat_MUMPS      *mumps = (Mat_MUMPS*)F->data;
1741   PetscMUMPSInt  icntl=0,size,*listvar_schur;
1742   PetscInt       info[80],i,ninfo=80,rbs,cbs;
1743   PetscBool      flg=PETSC_FALSE,schur=(PetscBool)(mumps->id.ICNTL(26) == -1);
1744   MumpsScalar    *arr;
1745 
1746   PetscFunctionBegin;
1747   PetscOptionsBegin(PetscObjectComm((PetscObject)F),((PetscObject)F)->prefix,"MUMPS Options","Mat");
1748   if (mumps->id.job == JOB_NULL) { /* MatSetFromOptions_MUMPS() has never been called before */
1749     PetscInt nthreads = 0;
1750     PetscInt nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0;
1751     PetscInt nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0;
1752 
1753     mumps->petsc_comm = PetscObjectComm((PetscObject)A);
1754     PetscCallMPI(MPI_Comm_size(mumps->petsc_comm,&mumps->petsc_size));
1755     PetscCallMPI(MPI_Comm_rank(mumps->petsc_comm,&mumps->myid));/* "if (!myid)" still works even if mumps_comm is different */
1756 
1757     PetscCall(PetscOptionsName("-mat_mumps_use_omp_threads","Convert MPI processes into OpenMP threads","None",&mumps->use_petsc_omp_support));
1758     if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */
1759     /* do not use PetscOptionsInt() so that the option -mat_mumps_use_omp_threads is not displayed twice in the help */
1760     PetscCall(PetscOptionsGetInt(NULL,((PetscObject)F)->prefix,"-mat_mumps_use_omp_threads",&nthreads,NULL));
1761     if (mumps->use_petsc_omp_support) {
1762       PetscCheck(PetscDefined(HAVE_OPENMP_SUPPORT),PETSC_COMM_SELF,PETSC_ERR_SUP_SYS,"The system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual",((PetscObject)F)->prefix?((PetscObject)F)->prefix:"");
1763       PetscCheck(!schur,PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot use -%smat_mumps_use_omp_threads with the Schur complement feature",((PetscObject)F)->prefix?((PetscObject)F)->prefix:"");
1764 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1765       PetscCall(PetscOmpCtrlCreate(mumps->petsc_comm,nthreads,&mumps->omp_ctrl));
1766       PetscCall(PetscOmpCtrlGetOmpComms(mumps->omp_ctrl,&mumps->omp_comm,&mumps->mumps_comm,&mumps->is_omp_master));
1767 #endif
1768     } else {
1769       mumps->omp_comm      = PETSC_COMM_SELF;
1770       mumps->mumps_comm    = mumps->petsc_comm;
1771       mumps->is_omp_master = PETSC_TRUE;
1772     }
1773     PetscCallMPI(MPI_Comm_size(mumps->omp_comm,&mumps->omp_comm_size));
1774     mumps->reqs = NULL;
1775     mumps->tag  = 0;
1776 
1777     if (mumps->mumps_comm != MPI_COMM_NULL) {
1778       if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) {
1779         /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */
1780         MPI_Comm comm;
1781         PetscCallMPI(MPI_Comm_dup(mumps->mumps_comm,&comm));
1782         mumps->mumps_comm = comm;
1783       } else PetscCall(PetscCommGetComm(mumps->petsc_comm,&mumps->mumps_comm));
1784     }
1785 
1786     mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm);
1787     mumps->id.job = JOB_INIT;
1788     mumps->id.par = 1;  /* host participates factorizaton and solve */
1789     mumps->id.sym = mumps->sym;
1790 
1791     size = mumps->id.size_schur;
1792     arr = mumps->id.schur;
1793     listvar_schur = mumps->id.listvar_schur;
1794     PetscMUMPS_c(mumps);
1795     PetscCheck(mumps->id.INFOG(1) >= 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS: INFOG(1)=%d",mumps->id.INFOG(1));
1796     /* restore cached ICNTL and CNTL values */
1797     for (icntl = 0; icntl < nICNTL_pre; ++icntl) mumps->id.ICNTL(mumps->ICNTL_pre[1+2*icntl]) = mumps->ICNTL_pre[2+2*icntl];
1798     for (icntl = 0; icntl < nCNTL_pre; ++icntl)  mumps->id.CNTL((PetscInt)mumps->CNTL_pre[1+2*icntl]) = mumps->CNTL_pre[2+2*icntl];
1799     PetscCall(PetscFree(mumps->ICNTL_pre));
1800     PetscCall(PetscFree(mumps->CNTL_pre));
1801 
1802     if (schur) {
1803       mumps->id.size_schur = size;
1804       mumps->id.schur_lld  = size;
1805       mumps->id.schur = arr;
1806       mumps->id.listvar_schur = listvar_schur;
1807       if (mumps->petsc_size > 1) {
1808         PetscBool gs; /* gs is false if any rank other than root has non-empty IS */
1809 
1810         mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */
1811         gs = mumps->myid ? (mumps->id.size_schur ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */
1812         PetscCallMPI(MPI_Allreduce(MPI_IN_PLACE,&gs,1,MPIU_BOOL,MPI_LAND,mumps->petsc_comm));
1813         PetscCheck(gs,PETSC_COMM_SELF,PETSC_ERR_SUP,"MUMPS distributed parallel Schur complements not yet supported from PETSc");
1814       } else {
1815         if (F->factortype == MAT_FACTOR_LU) {
1816           mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */
1817         } else {
1818           mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */
1819         }
1820       }
1821       mumps->id.ICNTL(26) = -1;
1822     }
1823 
1824     /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code.
1825        For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS.
1826      */
1827     PetscCallMPI(MPI_Bcast(mumps->id.icntl,40,MPI_INT,  0,mumps->omp_comm));
1828     PetscCallMPI(MPI_Bcast(mumps->id.cntl, 15,MPIU_REAL,0,mumps->omp_comm));
1829 
1830     mumps->scat_rhs = NULL;
1831     mumps->scat_sol = NULL;
1832 
1833     /* set PETSc-MUMPS default options - override MUMPS default */
1834     mumps->id.ICNTL(3) = 0;
1835     mumps->id.ICNTL(4) = 0;
1836     if (mumps->petsc_size == 1) {
1837       mumps->id.ICNTL(18) = 0;   /* centralized assembled matrix input */
1838       mumps->id.ICNTL(7)  = 7;   /* automatic choice of ordering done by the package */
1839     } else {
1840       mumps->id.ICNTL(18) = 3;   /* distributed assembled matrix input */
1841       mumps->id.ICNTL(21) = 1;   /* distributed solution */
1842     }
1843   }
1844   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_1","ICNTL(1): output stream for error messages","None",mumps->id.ICNTL(1),&icntl,&flg));
1845   if (flg) mumps->id.ICNTL(1) = icntl;
1846   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_2","ICNTL(2): output stream for diagnostic printing, statistics, and warning","None",mumps->id.ICNTL(2),&icntl,&flg));
1847   if (flg) mumps->id.ICNTL(2) = icntl;
1848   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_3","ICNTL(3): output stream for global information, collected on the host","None",mumps->id.ICNTL(3),&icntl,&flg));
1849   if (flg) mumps->id.ICNTL(3) = icntl;
1850 
1851   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_4","ICNTL(4): level of printing (0 to 4)","None",mumps->id.ICNTL(4),&icntl,&flg));
1852   if (flg) mumps->id.ICNTL(4) = icntl;
1853   if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */
1854 
1855   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_6","ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)","None",mumps->id.ICNTL(6),&icntl,&flg));
1856   if (flg) mumps->id.ICNTL(6) = icntl;
1857 
1858   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_7","ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)","None",mumps->id.ICNTL(7),&icntl,&flg));
1859   if (flg) {
1860     PetscCheck(icntl != 1 && icntl >= 0 && icntl <= 7,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto");
1861     mumps->id.ICNTL(7) = icntl;
1862   }
1863 
1864   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_8","ICNTL(8): scaling strategy (-2 to 8 or 77)","None",mumps->id.ICNTL(8),&mumps->id.ICNTL(8),NULL));
1865   /* PetscCall(PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL)); handled by MatSolveTranspose_MUMPS() */
1866   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_10","ICNTL(10): max num of refinements","None",mumps->id.ICNTL(10),&mumps->id.ICNTL(10),NULL));
1867   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_11","ICNTL(11): statistics related to an error analysis (via -ksp_view)","None",mumps->id.ICNTL(11),&mumps->id.ICNTL(11),NULL));
1868   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_12","ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)","None",mumps->id.ICNTL(12),&mumps->id.ICNTL(12),NULL));
1869   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_13","ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting","None",mumps->id.ICNTL(13),&mumps->id.ICNTL(13),NULL));
1870   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_14","ICNTL(14): percentage increase in the estimated working space","None",mumps->id.ICNTL(14),&mumps->id.ICNTL(14),NULL));
1871   PetscCall(MatGetBlockSizes(A,&rbs,&cbs));
1872   if (rbs == cbs && rbs > 1) mumps->id.ICNTL(15) = -rbs;
1873   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_15","ICNTL(15): compression of the input matrix resulting from a block format","None",mumps->id.ICNTL(15),&mumps->id.ICNTL(15),&flg));
1874   if (flg) {
1875     PetscCheck(mumps->id.ICNTL(15) <= 0,PETSC_COMM_SELF,PETSC_ERR_SUP,"Positive -mat_mumps_icntl_15 not handled");
1876     PetscCheck((-mumps->id.ICNTL(15) % cbs == 0) && (-mumps->id.ICNTL(15) % rbs == 0),PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"The opposite of -mat_mumps_icntl_15 must be a multiple of the column and row blocksizes");
1877   }
1878   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_19","ICNTL(19): computes the Schur complement","None",mumps->id.ICNTL(19),&mumps->id.ICNTL(19),NULL));
1879   if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */
1880     PetscCall(MatDestroy(&F->schur));
1881     PetscCall(MatMumpsResetSchur_Private(mumps));
1882   }
1883 
1884   /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps",
1885      and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran
1886      and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...'
1887      This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug,
1888      see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590.
1889      In short, we could not use distributed RHS with MPICH until v4.0b1.
1890    */
1891 #if PETSC_PKG_MUMPS_VERSION_LT(5,3,0) || (defined(PETSC_HAVE_MPICH_NUMVERSION) && (PETSC_HAVE_MPICH_NUMVERSION < 40000101))
1892   mumps->ICNTL20 = 0;  /* Centralized dense RHS*/
1893 #else
1894   mumps->ICNTL20 = 10; /* Distributed dense RHS*/
1895 #endif
1896   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_20","ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides","None",mumps->ICNTL20,&mumps->ICNTL20,&flg));
1897   PetscCheck(!flg || mumps->ICNTL20 == 10 || mumps->ICNTL20 == 0,PETSC_COMM_SELF,PETSC_ERR_SUP,"ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10",(int)mumps->ICNTL20);
1898 #if PETSC_PKG_MUMPS_VERSION_LT(5,3,0)
1899   PetscCheck(!flg || mumps->ICNTL20 != 10,PETSC_COMM_SELF,PETSC_ERR_SUP,"ICNTL(20)=10 is not supported before MUMPS-5.3.0");
1900 #endif
1901   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL)); we only use distributed solution vector */
1902 
1903   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_22","ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)","None",mumps->id.ICNTL(22),&mumps->id.ICNTL(22),NULL));
1904   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_23","ICNTL(23): max size of the working memory (MB) that can allocate per processor","None",mumps->id.ICNTL(23),&mumps->id.ICNTL(23),NULL));
1905   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_24","ICNTL(24): detection of null pivot rows (0 or 1)","None",mumps->id.ICNTL(24),&mumps->id.ICNTL(24),NULL));
1906   if (mumps->id.ICNTL(24)) {
1907     mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */
1908   }
1909 
1910   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_25","ICNTL(25): computes a solution of a deficient matrix and a null space basis","None",mumps->id.ICNTL(25),&mumps->id.ICNTL(25),NULL));
1911   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_26","ICNTL(26): drives the solution phase if a Schur complement matrix","None",mumps->id.ICNTL(26),&mumps->id.ICNTL(26),NULL));
1912   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_27","ICNTL(27): controls the blocking size for multiple right-hand sides","None",mumps->id.ICNTL(27),&mumps->id.ICNTL(27),NULL));
1913   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_28","ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering","None",mumps->id.ICNTL(28),&mumps->id.ICNTL(28),NULL));
1914   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_29","ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis","None",mumps->id.ICNTL(29),&mumps->id.ICNTL(29),NULL));
1915   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL)); */ /* call MatMumpsGetInverse() directly */
1916   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_31","ICNTL(31): indicates which factors may be discarded during factorization","None",mumps->id.ICNTL(31),&mumps->id.ICNTL(31),NULL));
1917   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elemination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL));  -- not supported by PETSc API */
1918   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_33","ICNTL(33): compute determinant","None",mumps->id.ICNTL(33),&mumps->id.ICNTL(33),NULL));
1919   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_35","ICNTL(35): activates Block Low Rank (BLR) based factorization","None",mumps->id.ICNTL(35),&mumps->id.ICNTL(35),NULL));
1920   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_36","ICNTL(36): choice of BLR factorization variant","None",mumps->id.ICNTL(36),&mumps->id.ICNTL(36),NULL));
1921   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_38","ICNTL(38): estimated compression rate of LU factors with BLR","None",mumps->id.ICNTL(38),&mumps->id.ICNTL(38),NULL));
1922 
1923   PetscCall(PetscOptionsReal("-mat_mumps_cntl_1","CNTL(1): relative pivoting threshold","None",mumps->id.CNTL(1),&mumps->id.CNTL(1),NULL));
1924   PetscCall(PetscOptionsReal("-mat_mumps_cntl_2","CNTL(2): stopping criterion of refinement","None",mumps->id.CNTL(2),&mumps->id.CNTL(2),NULL));
1925   PetscCall(PetscOptionsReal("-mat_mumps_cntl_3","CNTL(3): absolute pivoting threshold","None",mumps->id.CNTL(3),&mumps->id.CNTL(3),NULL));
1926   PetscCall(PetscOptionsReal("-mat_mumps_cntl_4","CNTL(4): value for static pivoting","None",mumps->id.CNTL(4),&mumps->id.CNTL(4),NULL));
1927   PetscCall(PetscOptionsReal("-mat_mumps_cntl_5","CNTL(5): fixation for null pivots","None",mumps->id.CNTL(5),&mumps->id.CNTL(5),NULL));
1928   PetscCall(PetscOptionsReal("-mat_mumps_cntl_7","CNTL(7): dropping parameter used during BLR","None",mumps->id.CNTL(7),&mumps->id.CNTL(7),NULL));
1929 
1930   PetscCall(PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL));
1931 
1932   PetscCall(PetscOptionsIntArray("-mat_mumps_view_info","request INFO local to each processor","",info,&ninfo,NULL));
1933   if (ninfo) {
1934     PetscCheck(ninfo <= 80,PETSC_COMM_SELF,PETSC_ERR_USER,"number of INFO %" PetscInt_FMT " must <= 80",ninfo);
1935     PetscCall(PetscMalloc1(ninfo,&mumps->info));
1936     mumps->ninfo = ninfo;
1937     for (i=0; i<ninfo; i++) {
1938       PetscCheck(info[i] >= 0 && info[i] <= 80,PETSC_COMM_SELF,PETSC_ERR_USER,"index of INFO %" PetscInt_FMT " must between 1 and 80",ninfo);
1939       mumps->info[i] = info[i];
1940     }
1941   }
1942   PetscOptionsEnd();
1943   PetscFunctionReturn(0);
1944 }
1945 
1946 PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F,Mat A,const MatFactorInfo *info,Mat_MUMPS *mumps)
1947 {
1948   PetscFunctionBegin;
1949   if (mumps->id.INFOG(1) < 0) {
1950     PetscCheck(!A->erroriffailure,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in analysis phase: INFOG(1)=%d",mumps->id.INFOG(1));
1951     if (mumps->id.INFOG(1) == -6) {
1952       PetscCall(PetscInfo(F,"matrix is singular in structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)));
1953       F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT;
1954     } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) {
1955       PetscCall(PetscInfo(F,"problem of workspace, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)));
1956       F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1957     } else if (mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0) {
1958       PetscCall(PetscInfo(F,"Empty matrix\n"));
1959     } else {
1960       PetscCall(PetscInfo(F,"Error reported by MUMPS in analysis phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)));
1961       F->factorerrortype = MAT_FACTOR_OTHER;
1962     }
1963   }
1964   PetscFunctionReturn(0);
1965 }
1966 
1967 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
1968 {
1969   Mat_MUMPS      *mumps = (Mat_MUMPS*)F->data;
1970   Vec            b;
1971   const PetscInt M = A->rmap->N;
1972 
1973   PetscFunctionBegin;
1974   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
1975     /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */
1976     PetscFunctionReturn(0);
1977   }
1978 
1979   /* Set MUMPS options from the options database */
1980   PetscCall(MatSetFromOptions_MUMPS(F,A));
1981 
1982   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
1983   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps));
1984 
1985   /* analysis phase */
1986   /*----------------*/
1987   mumps->id.job = JOB_FACTSYMBOLIC;
1988   mumps->id.n   = M;
1989   switch (mumps->id.ICNTL(18)) {
1990   case 0:  /* centralized assembled matrix input */
1991     if (!mumps->myid) {
1992       mumps->id.nnz = mumps->nnz;
1993       mumps->id.irn = mumps->irn;
1994       mumps->id.jcn = mumps->jcn;
1995       if (mumps->id.ICNTL(6)>1) mumps->id.a = (MumpsScalar*)mumps->val;
1996       if (r) {
1997         mumps->id.ICNTL(7) = 1;
1998         if (!mumps->myid) {
1999           const PetscInt *idx;
2000           PetscInt       i;
2001 
2002           PetscCall(PetscMalloc1(M,&mumps->id.perm_in));
2003           PetscCall(ISGetIndices(r,&idx));
2004           for (i=0; i<M; i++) PetscCall(PetscMUMPSIntCast(idx[i]+1,&(mumps->id.perm_in[i]))); /* perm_in[]: start from 1, not 0! */
2005           PetscCall(ISRestoreIndices(r,&idx));
2006         }
2007       }
2008     }
2009     break;
2010   case 3:  /* distributed assembled matrix input (size>1) */
2011     mumps->id.nnz_loc = mumps->nnz;
2012     mumps->id.irn_loc = mumps->irn;
2013     mumps->id.jcn_loc = mumps->jcn;
2014     if (mumps->id.ICNTL(6)>1) mumps->id.a_loc = (MumpsScalar*)mumps->val;
2015     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2016       PetscCall(MatCreateVecs(A,NULL,&b));
2017       PetscCall(VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq));
2018       PetscCall(VecDestroy(&b));
2019     }
2020     break;
2021   }
2022   PetscMUMPS_c(mumps);
2023   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps));
2024 
2025   F->ops->lufactornumeric = MatFactorNumeric_MUMPS;
2026   F->ops->solve           = MatSolve_MUMPS;
2027   F->ops->solvetranspose  = MatSolveTranspose_MUMPS;
2028   F->ops->matsolve        = MatMatSolve_MUMPS;
2029   F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS;
2030   F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS;
2031 
2032   mumps->matstruc = SAME_NONZERO_PATTERN;
2033   PetscFunctionReturn(0);
2034 }
2035 
2036 /* Note the Petsc r and c permutations are ignored */
2037 PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
2038 {
2039   Mat_MUMPS      *mumps = (Mat_MUMPS*)F->data;
2040   Vec            b;
2041   const PetscInt M = A->rmap->N;
2042 
2043   PetscFunctionBegin;
2044   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
2045     /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */
2046     PetscFunctionReturn(0);
2047   }
2048 
2049   /* Set MUMPS options from the options database */
2050   PetscCall(MatSetFromOptions_MUMPS(F,A));
2051 
2052   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
2053   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps));
2054 
2055   /* analysis phase */
2056   /*----------------*/
2057   mumps->id.job = JOB_FACTSYMBOLIC;
2058   mumps->id.n   = M;
2059   switch (mumps->id.ICNTL(18)) {
2060   case 0:  /* centralized assembled matrix input */
2061     if (!mumps->myid) {
2062       mumps->id.nnz = mumps->nnz;
2063       mumps->id.irn = mumps->irn;
2064       mumps->id.jcn = mumps->jcn;
2065       if (mumps->id.ICNTL(6)>1) {
2066         mumps->id.a = (MumpsScalar*)mumps->val;
2067       }
2068     }
2069     break;
2070   case 3:  /* distributed assembled matrix input (size>1) */
2071     mumps->id.nnz_loc = mumps->nnz;
2072     mumps->id.irn_loc = mumps->irn;
2073     mumps->id.jcn_loc = mumps->jcn;
2074     if (mumps->id.ICNTL(6)>1) {
2075       mumps->id.a_loc = (MumpsScalar*)mumps->val;
2076     }
2077     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2078       PetscCall(MatCreateVecs(A,NULL,&b));
2079       PetscCall(VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq));
2080       PetscCall(VecDestroy(&b));
2081     }
2082     break;
2083   }
2084   PetscMUMPS_c(mumps);
2085   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps));
2086 
2087   F->ops->lufactornumeric   = MatFactorNumeric_MUMPS;
2088   F->ops->solve             = MatSolve_MUMPS;
2089   F->ops->solvetranspose    = MatSolveTranspose_MUMPS;
2090   F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS;
2091 
2092   mumps->matstruc = SAME_NONZERO_PATTERN;
2093   PetscFunctionReturn(0);
2094 }
2095 
2096 /* Note the Petsc r permutation and factor info are ignored */
2097 PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F,Mat A,IS r,const MatFactorInfo *info)
2098 {
2099   Mat_MUMPS      *mumps = (Mat_MUMPS*)F->data;
2100   Vec            b;
2101   const PetscInt M = A->rmap->N;
2102 
2103   PetscFunctionBegin;
2104   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
2105     /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */
2106     PetscFunctionReturn(0);
2107   }
2108 
2109   /* Set MUMPS options from the options database */
2110   PetscCall(MatSetFromOptions_MUMPS(F,A));
2111 
2112   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
2113   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps));
2114 
2115   /* analysis phase */
2116   /*----------------*/
2117   mumps->id.job = JOB_FACTSYMBOLIC;
2118   mumps->id.n   = M;
2119   switch (mumps->id.ICNTL(18)) {
2120   case 0:  /* centralized assembled matrix input */
2121     if (!mumps->myid) {
2122       mumps->id.nnz = mumps->nnz;
2123       mumps->id.irn = mumps->irn;
2124       mumps->id.jcn = mumps->jcn;
2125       if (mumps->id.ICNTL(6)>1) {
2126         mumps->id.a = (MumpsScalar*)mumps->val;
2127       }
2128     }
2129     break;
2130   case 3:  /* distributed assembled matrix input (size>1) */
2131     mumps->id.nnz_loc = mumps->nnz;
2132     mumps->id.irn_loc = mumps->irn;
2133     mumps->id.jcn_loc = mumps->jcn;
2134     if (mumps->id.ICNTL(6)>1) {
2135       mumps->id.a_loc = (MumpsScalar*)mumps->val;
2136     }
2137     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2138       PetscCall(MatCreateVecs(A,NULL,&b));
2139       PetscCall(VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq));
2140       PetscCall(VecDestroy(&b));
2141     }
2142     break;
2143   }
2144   PetscMUMPS_c(mumps);
2145   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps));
2146 
2147   F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS;
2148   F->ops->solve                 = MatSolve_MUMPS;
2149   F->ops->solvetranspose        = MatSolve_MUMPS;
2150   F->ops->matsolve              = MatMatSolve_MUMPS;
2151   F->ops->mattransposesolve     = MatMatTransposeSolve_MUMPS;
2152   F->ops->matsolvetranspose     = MatMatSolveTranspose_MUMPS;
2153 #if defined(PETSC_USE_COMPLEX)
2154   F->ops->getinertia = NULL;
2155 #else
2156   F->ops->getinertia = MatGetInertia_SBAIJMUMPS;
2157 #endif
2158 
2159   mumps->matstruc = SAME_NONZERO_PATTERN;
2160   PetscFunctionReturn(0);
2161 }
2162 
2163 PetscErrorCode MatView_MUMPS(Mat A,PetscViewer viewer)
2164 {
2165   PetscBool         iascii;
2166   PetscViewerFormat format;
2167   Mat_MUMPS         *mumps=(Mat_MUMPS*)A->data;
2168 
2169   PetscFunctionBegin;
2170   /* check if matrix is mumps type */
2171   if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(0);
2172 
2173   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
2174   if (iascii) {
2175     PetscCall(PetscViewerGetFormat(viewer,&format));
2176     if (format == PETSC_VIEWER_ASCII_INFO) {
2177       PetscCall(PetscViewerASCIIPrintf(viewer,"MUMPS run parameters:\n"));
2178       PetscCall(PetscViewerASCIIPrintf(viewer,"  SYM (matrix type):                   %d\n",mumps->id.sym));
2179       PetscCall(PetscViewerASCIIPrintf(viewer,"  PAR (host participation):            %d\n",mumps->id.par));
2180       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(1) (output for error):         %d\n",mumps->id.ICNTL(1)));
2181       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(2) (output of diagnostic msg): %d\n",mumps->id.ICNTL(2)));
2182       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(3) (output for global info):   %d\n",mumps->id.ICNTL(3)));
2183       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(4) (level of printing):        %d\n",mumps->id.ICNTL(4)));
2184       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(5) (input mat struct):         %d\n",mumps->id.ICNTL(5)));
2185       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(6) (matrix prescaling):        %d\n",mumps->id.ICNTL(6)));
2186       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(7) (sequential matrix ordering):%d\n",mumps->id.ICNTL(7)));
2187       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(8) (scaling strategy):        %d\n",mumps->id.ICNTL(8)));
2188       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(10) (max num of refinements):  %d\n",mumps->id.ICNTL(10)));
2189       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(11) (error analysis):          %d\n",mumps->id.ICNTL(11)));
2190       if (mumps->id.ICNTL(11)>0) {
2191         PetscCall(PetscViewerASCIIPrintf(viewer,"    RINFOG(4) (inf norm of input mat):        %g\n",mumps->id.RINFOG(4)));
2192         PetscCall(PetscViewerASCIIPrintf(viewer,"    RINFOG(5) (inf norm of solution):         %g\n",mumps->id.RINFOG(5)));
2193         PetscCall(PetscViewerASCIIPrintf(viewer,"    RINFOG(6) (inf norm of residual):         %g\n",mumps->id.RINFOG(6)));
2194         PetscCall(PetscViewerASCIIPrintf(viewer,"    RINFOG(7),RINFOG(8) (backward error est): %g, %g\n",mumps->id.RINFOG(7),mumps->id.RINFOG(8)));
2195         PetscCall(PetscViewerASCIIPrintf(viewer,"    RINFOG(9) (error estimate):               %g \n",mumps->id.RINFOG(9)));
2196         PetscCall(PetscViewerASCIIPrintf(viewer,"    RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n",mumps->id.RINFOG(10),mumps->id.RINFOG(11)));
2197       }
2198       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(12) (efficiency control):                         %d\n",mumps->id.ICNTL(12)));
2199       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(13) (sequential factorization of the root node):  %d\n",mumps->id.ICNTL(13)));
2200       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(14) (percentage of estimated workspace increase): %d\n",mumps->id.ICNTL(14)));
2201       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(15) (compression of the input matrix):            %d\n",mumps->id.ICNTL(15)));
2202       /* ICNTL(15-17) not used */
2203       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(18) (input mat struct):                           %d\n",mumps->id.ICNTL(18)));
2204       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(19) (Schur complement info):                      %d\n",mumps->id.ICNTL(19)));
2205       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(20) (RHS sparse pattern):                         %d\n",mumps->id.ICNTL(20)));
2206       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(21) (solution struct):                            %d\n",mumps->id.ICNTL(21)));
2207       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(22) (in-core/out-of-core facility):               %d\n",mumps->id.ICNTL(22)));
2208       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(23) (max size of memory can be allocated locally):%d\n",mumps->id.ICNTL(23)));
2209 
2210       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(24) (detection of null pivot rows):               %d\n",mumps->id.ICNTL(24)));
2211       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(25) (computation of a null space basis):          %d\n",mumps->id.ICNTL(25)));
2212       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(26) (Schur options for RHS or solution):          %d\n",mumps->id.ICNTL(26)));
2213       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(27) (blocking size for multiple RHS):             %d\n",mumps->id.ICNTL(27)));
2214       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(28) (use parallel or sequential ordering):        %d\n",mumps->id.ICNTL(28)));
2215       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(29) (parallel ordering):                          %d\n",mumps->id.ICNTL(29)));
2216 
2217       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(30) (user-specified set of entries in inv(A)):    %d\n",mumps->id.ICNTL(30)));
2218       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(31) (factors is discarded in the solve phase):    %d\n",mumps->id.ICNTL(31)));
2219       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(33) (compute determinant):                        %d\n",mumps->id.ICNTL(33)));
2220       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(35) (activate BLR based factorization):           %d\n",mumps->id.ICNTL(35)));
2221       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(36) (choice of BLR factorization variant):        %d\n",mumps->id.ICNTL(36)));
2222       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(38) (estimated compression rate of LU factors):   %d\n",mumps->id.ICNTL(38)));
2223 
2224       PetscCall(PetscViewerASCIIPrintf(viewer,"  CNTL(1) (relative pivoting threshold):      %g \n",mumps->id.CNTL(1)));
2225       PetscCall(PetscViewerASCIIPrintf(viewer,"  CNTL(2) (stopping criterion of refinement): %g \n",mumps->id.CNTL(2)));
2226       PetscCall(PetscViewerASCIIPrintf(viewer,"  CNTL(3) (absolute pivoting threshold):      %g \n",mumps->id.CNTL(3)));
2227       PetscCall(PetscViewerASCIIPrintf(viewer,"  CNTL(4) (value of static pivoting):         %g \n",mumps->id.CNTL(4)));
2228       PetscCall(PetscViewerASCIIPrintf(viewer,"  CNTL(5) (fixation for null pivots):         %g \n",mumps->id.CNTL(5)));
2229       PetscCall(PetscViewerASCIIPrintf(viewer,"  CNTL(7) (dropping parameter for BLR):       %g \n",mumps->id.CNTL(7)));
2230 
2231       /* information local to each processor */
2232       PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(1) (local estimated flops for the elimination after analysis): \n"));
2233       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
2234       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"    [%d] %g \n",mumps->myid,mumps->id.RINFO(1)));
2235       PetscCall(PetscViewerFlush(viewer));
2236       PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(2) (local estimated flops for the assembly after factorization): \n"));
2237       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"    [%d]  %g \n",mumps->myid,mumps->id.RINFO(2)));
2238       PetscCall(PetscViewerFlush(viewer));
2239       PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(3) (local estimated flops for the elimination after factorization): \n"));
2240       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"    [%d]  %g \n",mumps->myid,mumps->id.RINFO(3)));
2241       PetscCall(PetscViewerFlush(viewer));
2242 
2243       PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization): \n"));
2244       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] %d\n",mumps->myid,mumps->id.INFO(15)));
2245       PetscCall(PetscViewerFlush(viewer));
2246 
2247       PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization): \n"));
2248       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"    [%d] %d\n",mumps->myid,mumps->id.INFO(16)));
2249       PetscCall(PetscViewerFlush(viewer));
2250 
2251       PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(23) (num of pivots eliminated on this processor after factorization): \n"));
2252       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"    [%d] %d\n",mumps->myid,mumps->id.INFO(23)));
2253       PetscCall(PetscViewerFlush(viewer));
2254 
2255       if (mumps->ninfo && mumps->ninfo <= 80) {
2256         PetscInt i;
2257         for (i=0; i<mumps->ninfo; i++) {
2258           PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(%" PetscInt_FMT "): \n",mumps->info[i]));
2259           PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"    [%d] %d\n",mumps->myid,mumps->id.INFO(mumps->info[i])));
2260           PetscCall(PetscViewerFlush(viewer));
2261         }
2262       }
2263       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
2264 
2265       if (!mumps->myid) { /* information from the host */
2266         PetscCall(PetscViewerASCIIPrintf(viewer,"  RINFOG(1) (global estimated flops for the elimination after analysis): %g \n",mumps->id.RINFOG(1)));
2267         PetscCall(PetscViewerASCIIPrintf(viewer,"  RINFOG(2) (global estimated flops for the assembly after factorization): %g \n",mumps->id.RINFOG(2)));
2268         PetscCall(PetscViewerASCIIPrintf(viewer,"  RINFOG(3) (global estimated flops for the elimination after factorization): %g \n",mumps->id.RINFOG(3)));
2269         PetscCall(PetscViewerASCIIPrintf(viewer,"  (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n",mumps->id.RINFOG(12),mumps->id.RINFOG(13),mumps->id.INFOG(34)));
2270 
2271         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n",mumps->id.INFOG(3)));
2272         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n",mumps->id.INFOG(4)));
2273         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(5) (estimated maximum front size in the complete tree): %d\n",mumps->id.INFOG(5)));
2274         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(6) (number of nodes in the complete tree): %d\n",mumps->id.INFOG(6)));
2275         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(7) (ordering option effectively used after analysis): %d\n",mumps->id.INFOG(7)));
2276         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n",mumps->id.INFOG(8)));
2277         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n",mumps->id.INFOG(9)));
2278         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(10) (total integer space store the matrix factors after factorization): %d\n",mumps->id.INFOG(10)));
2279         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(11) (order of largest frontal matrix after factorization): %d\n",mumps->id.INFOG(11)));
2280         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(12) (number of off-diagonal pivots): %d\n",mumps->id.INFOG(12)));
2281         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(13) (number of delayed pivots after factorization): %d\n",mumps->id.INFOG(13)));
2282         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(14) (number of memory compress after factorization): %d\n",mumps->id.INFOG(14)));
2283         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(15) (number of steps of iterative refinement after solution): %d\n",mumps->id.INFOG(15)));
2284         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n",mumps->id.INFOG(16)));
2285         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n",mumps->id.INFOG(17)));
2286         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n",mumps->id.INFOG(18)));
2287         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n",mumps->id.INFOG(19)));
2288         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(20) (estimated number of entries in the factors): %d\n",mumps->id.INFOG(20)));
2289         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n",mumps->id.INFOG(21)));
2290         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n",mumps->id.INFOG(22)));
2291         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n",mumps->id.INFOG(23)));
2292         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n",mumps->id.INFOG(24)));
2293         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n",mumps->id.INFOG(25)));
2294         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(28) (after factorization: number of null pivots encountered): %d\n",mumps->id.INFOG(28)));
2295         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n",mumps->id.INFOG(29)));
2296         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n",mumps->id.INFOG(30),mumps->id.INFOG(31)));
2297         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(32) (after analysis: type of analysis done): %d\n",mumps->id.INFOG(32)));
2298         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(33) (value used for ICNTL(8)): %d\n",mumps->id.INFOG(33)));
2299         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(34) (exponent of the determinant if determinant is requested): %d\n",mumps->id.INFOG(34)));
2300         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n",mumps->id.INFOG(35)));
2301         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n",mumps->id.INFOG(36)));
2302         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n",mumps->id.INFOG(37)));
2303         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n",mumps->id.INFOG(38)));
2304         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n",mumps->id.INFOG(39)));
2305       }
2306     }
2307   }
2308   PetscFunctionReturn(0);
2309 }
2310 
2311 PetscErrorCode MatGetInfo_MUMPS(Mat A,MatInfoType flag,MatInfo *info)
2312 {
2313   Mat_MUMPS *mumps =(Mat_MUMPS*)A->data;
2314 
2315   PetscFunctionBegin;
2316   info->block_size        = 1.0;
2317   info->nz_allocated      = mumps->id.INFOG(20);
2318   info->nz_used           = mumps->id.INFOG(20);
2319   info->nz_unneeded       = 0.0;
2320   info->assemblies        = 0.0;
2321   info->mallocs           = 0.0;
2322   info->memory            = 0.0;
2323   info->fill_ratio_given  = 0;
2324   info->fill_ratio_needed = 0;
2325   info->factor_mallocs    = 0;
2326   PetscFunctionReturn(0);
2327 }
2328 
2329 /* -------------------------------------------------------------------------------------------*/
2330 PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is)
2331 {
2332   Mat_MUMPS         *mumps =(Mat_MUMPS*)F->data;
2333   const PetscScalar *arr;
2334   const PetscInt    *idxs;
2335   PetscInt          size,i;
2336 
2337   PetscFunctionBegin;
2338   PetscCall(ISGetLocalSize(is,&size));
2339   /* Schur complement matrix */
2340   PetscCall(MatDestroy(&F->schur));
2341   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,size,size,NULL,&F->schur));
2342   PetscCall(MatDenseGetArrayRead(F->schur,&arr));
2343   mumps->id.schur      = (MumpsScalar*)arr;
2344   mumps->id.size_schur = size;
2345   mumps->id.schur_lld  = size;
2346   PetscCall(MatDenseRestoreArrayRead(F->schur,&arr));
2347   if (mumps->sym == 1) {
2348     PetscCall(MatSetOption(F->schur,MAT_SPD,PETSC_TRUE));
2349   }
2350 
2351   /* MUMPS expects Fortran style indices */
2352   PetscCall(PetscFree(mumps->id.listvar_schur));
2353   PetscCall(PetscMalloc1(size,&mumps->id.listvar_schur));
2354   PetscCall(ISGetIndices(is,&idxs));
2355   for (i=0; i<size; i++) PetscCall(PetscMUMPSIntCast(idxs[i]+1,&(mumps->id.listvar_schur[i])));
2356   PetscCall(ISRestoreIndices(is,&idxs));
2357   /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */
2358   mumps->id.ICNTL(26) = -1;
2359   PetscFunctionReturn(0);
2360 }
2361 
2362 /* -------------------------------------------------------------------------------------------*/
2363 PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F,Mat* S)
2364 {
2365   Mat            St;
2366   Mat_MUMPS      *mumps =(Mat_MUMPS*)F->data;
2367   PetscScalar    *array;
2368 #if defined(PETSC_USE_COMPLEX)
2369   PetscScalar    im = PetscSqrtScalar((PetscScalar)-1.0);
2370 #endif
2371 
2372   PetscFunctionBegin;
2373   PetscCheck(mumps->id.ICNTL(19),PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it");
2374   PetscCall(MatCreate(PETSC_COMM_SELF,&St));
2375   PetscCall(MatSetSizes(St,PETSC_DECIDE,PETSC_DECIDE,mumps->id.size_schur,mumps->id.size_schur));
2376   PetscCall(MatSetType(St,MATDENSE));
2377   PetscCall(MatSetUp(St));
2378   PetscCall(MatDenseGetArray(St,&array));
2379   if (!mumps->sym) { /* MUMPS always return a full matrix */
2380     if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
2381       PetscInt i,j,N=mumps->id.size_schur;
2382       for (i=0;i<N;i++) {
2383         for (j=0;j<N;j++) {
2384 #if !defined(PETSC_USE_COMPLEX)
2385           PetscScalar val = mumps->id.schur[i*N+j];
2386 #else
2387           PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i;
2388 #endif
2389           array[j*N+i] = val;
2390         }
2391       }
2392     } else { /* stored by columns */
2393       PetscCall(PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur));
2394     }
2395   } else { /* either full or lower-triangular (not packed) */
2396     if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */
2397       PetscInt i,j,N=mumps->id.size_schur;
2398       for (i=0;i<N;i++) {
2399         for (j=i;j<N;j++) {
2400 #if !defined(PETSC_USE_COMPLEX)
2401           PetscScalar val = mumps->id.schur[i*N+j];
2402 #else
2403           PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i;
2404 #endif
2405           array[i*N+j] = val;
2406           array[j*N+i] = val;
2407         }
2408       }
2409     } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */
2410       PetscCall(PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur));
2411     } else { /* ICNTL(19) == 1 lower triangular stored by rows */
2412       PetscInt i,j,N=mumps->id.size_schur;
2413       for (i=0;i<N;i++) {
2414         for (j=0;j<i+1;j++) {
2415 #if !defined(PETSC_USE_COMPLEX)
2416           PetscScalar val = mumps->id.schur[i*N+j];
2417 #else
2418           PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i;
2419 #endif
2420           array[i*N+j] = val;
2421           array[j*N+i] = val;
2422         }
2423       }
2424     }
2425   }
2426   PetscCall(MatDenseRestoreArray(St,&array));
2427   *S   = St;
2428   PetscFunctionReturn(0);
2429 }
2430 
2431 /* -------------------------------------------------------------------------------------------*/
2432 PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt ival)
2433 {
2434   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2435 
2436   PetscFunctionBegin;
2437   if (mumps->id.job == JOB_NULL) { /* need to cache icntl and ival since PetscMUMPS_c() has never been called */
2438     PetscInt i,nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; /* number of already cached ICNTL */
2439     for (i = 0; i < nICNTL_pre; ++i) if (mumps->ICNTL_pre[1+2*i] == icntl) break; /* is this ICNTL already cached? */
2440     if (i == nICNTL_pre) { /* not already cached */
2441       if (i > 0) PetscCall(PetscRealloc(sizeof(PetscMUMPSInt)*(2*nICNTL_pre + 3),&mumps->ICNTL_pre));
2442       else PetscCall(PetscCalloc(sizeof(PetscMUMPSInt)*3,&mumps->ICNTL_pre));
2443       mumps->ICNTL_pre[0]++;
2444     }
2445     mumps->ICNTL_pre[1+2*i] = icntl;
2446     PetscCall(PetscMUMPSIntCast(ival,mumps->ICNTL_pre+2+2*i));
2447   } else PetscCall(PetscMUMPSIntCast(ival,&mumps->id.ICNTL(icntl)));
2448   PetscFunctionReturn(0);
2449 }
2450 
2451 PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt *ival)
2452 {
2453   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2454 
2455   PetscFunctionBegin;
2456   *ival = mumps->id.ICNTL(icntl);
2457   PetscFunctionReturn(0);
2458 }
2459 
2460 /*@
2461   MatMumpsSetIcntl - Set MUMPS parameter ICNTL()
2462 
2463    Logically Collective on Mat
2464 
2465    Input Parameters:
2466 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2467 .  icntl - index of MUMPS parameter array ICNTL()
2468 -  ival - value of MUMPS ICNTL(icntl)
2469 
2470   Options Database:
2471 .   -mat_mumps_icntl_<icntl> <ival> - change the option numbered icntl to ival
2472 
2473    Level: beginner
2474 
2475    References:
2476 .  * - MUMPS Users' Guide
2477 
2478 .seealso: `MatGetFactor()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2479 @*/
2480 PetscErrorCode MatMumpsSetIcntl(Mat F,PetscInt icntl,PetscInt ival)
2481 {
2482   PetscFunctionBegin;
2483   PetscValidType(F,1);
2484   PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2485   PetscValidLogicalCollectiveInt(F,icntl,2);
2486   PetscValidLogicalCollectiveInt(F,ival,3);
2487   PetscCheck(icntl >= 1 && icntl <= 38,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONG,"Unsupported ICNTL value %" PetscInt_FMT,icntl);
2488   PetscTryMethod(F,"MatMumpsSetIcntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival));
2489   PetscFunctionReturn(0);
2490 }
2491 
2492 /*@
2493   MatMumpsGetIcntl - Get MUMPS parameter ICNTL()
2494 
2495    Logically Collective on Mat
2496 
2497    Input Parameters:
2498 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2499 -  icntl - index of MUMPS parameter array ICNTL()
2500 
2501   Output Parameter:
2502 .  ival - value of MUMPS ICNTL(icntl)
2503 
2504    Level: beginner
2505 
2506    References:
2507 .  * - MUMPS Users' Guide
2508 
2509 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2510 @*/
2511 PetscErrorCode MatMumpsGetIcntl(Mat F,PetscInt icntl,PetscInt *ival)
2512 {
2513   PetscFunctionBegin;
2514   PetscValidType(F,1);
2515   PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2516   PetscValidLogicalCollectiveInt(F,icntl,2);
2517   PetscValidIntPointer(ival,3);
2518   PetscCheck(icntl >= 1 && icntl <= 38,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONG,"Unsupported ICNTL value %" PetscInt_FMT,icntl);
2519   PetscUseMethod(F,"MatMumpsGetIcntl_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));
2520   PetscFunctionReturn(0);
2521 }
2522 
2523 /* -------------------------------------------------------------------------------------------*/
2524 PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal val)
2525 {
2526   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2527 
2528   PetscFunctionBegin;
2529   if (mumps->id.job == JOB_NULL) {
2530     PetscInt i,nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0;
2531     for (i = 0; i < nCNTL_pre; ++i) if (mumps->CNTL_pre[1+2*i] == icntl) break;
2532     if (i == nCNTL_pre) {
2533       if (i > 0) PetscCall(PetscRealloc(sizeof(PetscReal)*(2*nCNTL_pre + 3),&mumps->CNTL_pre));
2534       else PetscCall(PetscCalloc(sizeof(PetscReal)*3,&mumps->CNTL_pre));
2535       mumps->CNTL_pre[0]++;
2536     }
2537     mumps->CNTL_pre[1+2*i] = icntl;
2538     mumps->CNTL_pre[2+2*i] = val;
2539   } else mumps->id.CNTL(icntl) = val;
2540   PetscFunctionReturn(0);
2541 }
2542 
2543 PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal *val)
2544 {
2545   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2546 
2547   PetscFunctionBegin;
2548   *val = mumps->id.CNTL(icntl);
2549   PetscFunctionReturn(0);
2550 }
2551 
2552 /*@
2553   MatMumpsSetCntl - Set MUMPS parameter CNTL()
2554 
2555    Logically Collective on Mat
2556 
2557    Input Parameters:
2558 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2559 .  icntl - index of MUMPS parameter array CNTL()
2560 -  val - value of MUMPS CNTL(icntl)
2561 
2562   Options Database:
2563 .   -mat_mumps_cntl_<icntl> <val>  - change the option numbered icntl to ival
2564 
2565    Level: beginner
2566 
2567    References:
2568 .  * - MUMPS Users' Guide
2569 
2570 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2571 @*/
2572 PetscErrorCode MatMumpsSetCntl(Mat F,PetscInt icntl,PetscReal val)
2573 {
2574   PetscFunctionBegin;
2575   PetscValidType(F,1);
2576   PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2577   PetscValidLogicalCollectiveInt(F,icntl,2);
2578   PetscValidLogicalCollectiveReal(F,val,3);
2579   PetscCheck(icntl >= 1 && icntl <= 7,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONG,"Unsupported CNTL value %" PetscInt_FMT,icntl);
2580   PetscTryMethod(F,"MatMumpsSetCntl_C",(Mat,PetscInt,PetscReal),(F,icntl,val));
2581   PetscFunctionReturn(0);
2582 }
2583 
2584 /*@
2585   MatMumpsGetCntl - Get MUMPS parameter CNTL()
2586 
2587    Logically Collective on Mat
2588 
2589    Input Parameters:
2590 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2591 -  icntl - index of MUMPS parameter array CNTL()
2592 
2593   Output Parameter:
2594 .  val - value of MUMPS CNTL(icntl)
2595 
2596    Level: beginner
2597 
2598    References:
2599 .  * - MUMPS Users' Guide
2600 
2601 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2602 @*/
2603 PetscErrorCode MatMumpsGetCntl(Mat F,PetscInt icntl,PetscReal *val)
2604 {
2605   PetscFunctionBegin;
2606   PetscValidType(F,1);
2607   PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2608   PetscValidLogicalCollectiveInt(F,icntl,2);
2609   PetscValidRealPointer(val,3);
2610   PetscCheck(icntl >= 1 && icntl <= 7,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONG,"Unsupported CNTL value %" PetscInt_FMT,icntl);
2611   PetscUseMethod(F,"MatMumpsGetCntl_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));
2612   PetscFunctionReturn(0);
2613 }
2614 
2615 PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F,PetscInt icntl,PetscInt *info)
2616 {
2617   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2618 
2619   PetscFunctionBegin;
2620   *info = mumps->id.INFO(icntl);
2621   PetscFunctionReturn(0);
2622 }
2623 
2624 PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F,PetscInt icntl,PetscInt *infog)
2625 {
2626   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2627 
2628   PetscFunctionBegin;
2629   *infog = mumps->id.INFOG(icntl);
2630   PetscFunctionReturn(0);
2631 }
2632 
2633 PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfo)
2634 {
2635   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2636 
2637   PetscFunctionBegin;
2638   *rinfo = mumps->id.RINFO(icntl);
2639   PetscFunctionReturn(0);
2640 }
2641 
2642 PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfog)
2643 {
2644   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2645 
2646   PetscFunctionBegin;
2647   *rinfog = mumps->id.RINFOG(icntl);
2648   PetscFunctionReturn(0);
2649 }
2650 
2651 PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F,Mat spRHS)
2652 {
2653   Mat            Bt = NULL,Btseq = NULL;
2654   PetscBool      flg;
2655   Mat_MUMPS      *mumps =(Mat_MUMPS*)F->data;
2656   PetscScalar    *aa;
2657   PetscInt       spnr,*ia,*ja,M,nrhs;
2658 
2659   PetscFunctionBegin;
2660   PetscValidPointer(spRHS,2);
2661   PetscCall(PetscObjectTypeCompare((PetscObject)spRHS,MATTRANSPOSEMAT,&flg));
2662   if (flg) {
2663     PetscCall(MatTransposeGetMat(spRHS,&Bt));
2664   } else SETERRQ(PetscObjectComm((PetscObject)spRHS),PETSC_ERR_ARG_WRONG,"Matrix spRHS must be type MATTRANSPOSEMAT matrix");
2665 
2666   PetscCall(MatMumpsSetIcntl(F,30,1));
2667 
2668   if (mumps->petsc_size > 1) {
2669     Mat_MPIAIJ *b = (Mat_MPIAIJ*)Bt->data;
2670     Btseq = b->A;
2671   } else {
2672     Btseq = Bt;
2673   }
2674 
2675   PetscCall(MatGetSize(spRHS,&M,&nrhs));
2676   mumps->id.nrhs = nrhs;
2677   mumps->id.lrhs = M;
2678   mumps->id.rhs  = NULL;
2679 
2680   if (!mumps->myid) {
2681     PetscCall(MatSeqAIJGetArray(Btseq,&aa));
2682     PetscCall(MatGetRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg));
2683     PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure");
2684     PetscCall(PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs));
2685     mumps->id.rhs_sparse  = (MumpsScalar*)aa;
2686   } else {
2687     mumps->id.irhs_ptr    = NULL;
2688     mumps->id.irhs_sparse = NULL;
2689     mumps->id.nz_rhs      = 0;
2690     mumps->id.rhs_sparse  = NULL;
2691   }
2692   mumps->id.ICNTL(20)   = 1; /* rhs is sparse */
2693   mumps->id.ICNTL(21)   = 0; /* solution is in assembled centralized format */
2694 
2695   /* solve phase */
2696   /*-------------*/
2697   mumps->id.job = JOB_SOLVE;
2698   PetscMUMPS_c(mumps);
2699   PetscCheck(mumps->id.INFOG(1) >= 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d INFO(2)=%d",mumps->id.INFOG(1),mumps->id.INFO(2));
2700 
2701   if (!mumps->myid) {
2702     PetscCall(MatSeqAIJRestoreArray(Btseq,&aa));
2703     PetscCall(MatRestoreRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg));
2704     PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure");
2705   }
2706   PetscFunctionReturn(0);
2707 }
2708 
2709 /*@
2710   MatMumpsGetInverse - Get user-specified set of entries in inverse of A
2711 
2712    Logically Collective on Mat
2713 
2714    Input Parameters:
2715 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2716 -  spRHS - sequential sparse matrix in MATTRANSPOSEMAT format holding specified indices in processor[0]
2717 
2718   Output Parameter:
2719 . spRHS - requested entries of inverse of A
2720 
2721    Level: beginner
2722 
2723    References:
2724 .  * - MUMPS Users' Guide
2725 
2726 .seealso: `MatGetFactor()`, `MatCreateTranspose()`
2727 @*/
2728 PetscErrorCode MatMumpsGetInverse(Mat F,Mat spRHS)
2729 {
2730   PetscFunctionBegin;
2731   PetscValidType(F,1);
2732   PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2733   PetscUseMethod(F,"MatMumpsGetInverse_C",(Mat,Mat),(F,spRHS));
2734   PetscFunctionReturn(0);
2735 }
2736 
2737 PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F,Mat spRHST)
2738 {
2739   Mat            spRHS;
2740 
2741   PetscFunctionBegin;
2742   PetscCall(MatCreateTranspose(spRHST,&spRHS));
2743   PetscCall(MatMumpsGetInverse_MUMPS(F,spRHS));
2744   PetscCall(MatDestroy(&spRHS));
2745   PetscFunctionReturn(0);
2746 }
2747 
2748 /*@
2749   MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix A^T
2750 
2751    Logically Collective on Mat
2752 
2753    Input Parameters:
2754 +  F - the factored matrix of A obtained by calling MatGetFactor() from PETSc-MUMPS interface
2755 -  spRHST - sequential sparse matrix in MATAIJ format holding specified indices of A^T in processor[0]
2756 
2757   Output Parameter:
2758 . spRHST - requested entries of inverse of A^T
2759 
2760    Level: beginner
2761 
2762    References:
2763 .  * - MUMPS Users' Guide
2764 
2765 .seealso: `MatGetFactor()`, `MatCreateTranspose()`, `MatMumpsGetInverse()`
2766 @*/
2767 PetscErrorCode MatMumpsGetInverseTranspose(Mat F,Mat spRHST)
2768 {
2769   PetscBool      flg;
2770 
2771   PetscFunctionBegin;
2772   PetscValidType(F,1);
2773   PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2774   PetscCall(PetscObjectTypeCompareAny((PetscObject)spRHST,&flg,MATSEQAIJ,MATMPIAIJ,NULL));
2775   PetscCheck(flg,PetscObjectComm((PetscObject)spRHST),PETSC_ERR_ARG_WRONG,"Matrix spRHST must be MATAIJ matrix");
2776 
2777   PetscUseMethod(F,"MatMumpsGetInverseTranspose_C",(Mat,Mat),(F,spRHST));
2778   PetscFunctionReturn(0);
2779 }
2780 
2781 /*@
2782   MatMumpsGetInfo - Get MUMPS parameter INFO()
2783 
2784    Logically Collective on Mat
2785 
2786    Input Parameters:
2787 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2788 -  icntl - index of MUMPS parameter array INFO()
2789 
2790   Output Parameter:
2791 .  ival - value of MUMPS INFO(icntl)
2792 
2793    Level: beginner
2794 
2795    References:
2796 .  * - MUMPS Users' Guide
2797 
2798 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2799 @*/
2800 PetscErrorCode MatMumpsGetInfo(Mat F,PetscInt icntl,PetscInt *ival)
2801 {
2802   PetscFunctionBegin;
2803   PetscValidType(F,1);
2804   PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2805   PetscValidIntPointer(ival,3);
2806   PetscUseMethod(F,"MatMumpsGetInfo_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));
2807   PetscFunctionReturn(0);
2808 }
2809 
2810 /*@
2811   MatMumpsGetInfog - Get MUMPS parameter INFOG()
2812 
2813    Logically Collective on Mat
2814 
2815    Input Parameters:
2816 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2817 -  icntl - index of MUMPS parameter array INFOG()
2818 
2819   Output Parameter:
2820 .  ival - value of MUMPS INFOG(icntl)
2821 
2822    Level: beginner
2823 
2824    References:
2825 .  * - MUMPS Users' Guide
2826 
2827 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2828 @*/
2829 PetscErrorCode MatMumpsGetInfog(Mat F,PetscInt icntl,PetscInt *ival)
2830 {
2831   PetscFunctionBegin;
2832   PetscValidType(F,1);
2833   PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2834   PetscValidIntPointer(ival,3);
2835   PetscUseMethod(F,"MatMumpsGetInfog_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));
2836   PetscFunctionReturn(0);
2837 }
2838 
2839 /*@
2840   MatMumpsGetRinfo - Get MUMPS parameter RINFO()
2841 
2842    Logically Collective on Mat
2843 
2844    Input Parameters:
2845 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2846 -  icntl - index of MUMPS parameter array RINFO()
2847 
2848   Output Parameter:
2849 .  val - value of MUMPS RINFO(icntl)
2850 
2851    Level: beginner
2852 
2853    References:
2854 .  * - MUMPS Users' Guide
2855 
2856 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfog()`
2857 @*/
2858 PetscErrorCode MatMumpsGetRinfo(Mat F,PetscInt icntl,PetscReal *val)
2859 {
2860   PetscFunctionBegin;
2861   PetscValidType(F,1);
2862   PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2863   PetscValidRealPointer(val,3);
2864   PetscUseMethod(F,"MatMumpsGetRinfo_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));
2865   PetscFunctionReturn(0);
2866 }
2867 
2868 /*@
2869   MatMumpsGetRinfog - Get MUMPS parameter RINFOG()
2870 
2871    Logically Collective on Mat
2872 
2873    Input Parameters:
2874 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2875 -  icntl - index of MUMPS parameter array RINFOG()
2876 
2877   Output Parameter:
2878 .  val - value of MUMPS RINFOG(icntl)
2879 
2880    Level: beginner
2881 
2882    References:
2883 .  * - MUMPS Users' Guide
2884 
2885 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`
2886 @*/
2887 PetscErrorCode MatMumpsGetRinfog(Mat F,PetscInt icntl,PetscReal *val)
2888 {
2889   PetscFunctionBegin;
2890   PetscValidType(F,1);
2891   PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2892   PetscValidRealPointer(val,3);
2893   PetscUseMethod(F,"MatMumpsGetRinfog_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));
2894   PetscFunctionReturn(0);
2895 }
2896 
2897 /*MC
2898   MATSOLVERMUMPS -  A matrix type providing direct solvers (LU and Cholesky) for
2899   distributed and sequential matrices via the external package MUMPS.
2900 
2901   Works with MATAIJ and MATSBAIJ matrices
2902 
2903   Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS
2904 
2905   Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. See details below.
2906 
2907   Use -pc_type cholesky or lu -pc_factor_mat_solver_type mumps to use this direct solver
2908 
2909   Options Database Keys:
2910 +  -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages
2911 .  -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning
2912 .  -mat_mumps_icntl_3 -  ICNTL(3): output stream for global information, collected on the host
2913 .  -mat_mumps_icntl_4 -  ICNTL(4): level of printing (0 to 4)
2914 .  -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)
2915 .  -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto
2916                         Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only)
2917 .  -mat_mumps_icntl_8  - ICNTL(8): scaling strategy (-2 to 8 or 77)
2918 .  -mat_mumps_icntl_10  - ICNTL(10): max num of refinements
2919 .  -mat_mumps_icntl_11  - ICNTL(11): statistics related to an error analysis (via -ksp_view)
2920 .  -mat_mumps_icntl_12  - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)
2921 .  -mat_mumps_icntl_13  - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting
2922 .  -mat_mumps_icntl_14  - ICNTL(14): percentage increase in the estimated working space
2923 .  -mat_mumps_icntl_15  - ICNTL(15): compression of the input matrix resulting from a block format
2924 .  -mat_mumps_icntl_19  - ICNTL(19): computes the Schur complement
2925 .  -mat_mumps_icntl_20  - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS
2926 .  -mat_mumps_icntl_22  - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)
2927 .  -mat_mumps_icntl_23  - ICNTL(23): max size of the working memory (MB) that can allocate per processor
2928 .  -mat_mumps_icntl_24  - ICNTL(24): detection of null pivot rows (0 or 1)
2929 .  -mat_mumps_icntl_25  - ICNTL(25): compute a solution of a deficient matrix and a null space basis
2930 .  -mat_mumps_icntl_26  - ICNTL(26): drives the solution phase if a Schur complement matrix
2931 .  -mat_mumps_icntl_28  - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering
2932 .  -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis
2933 .  -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A)
2934 .  -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization
2935 .  -mat_mumps_icntl_33 - ICNTL(33): compute determinant
2936 .  -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature
2937 .  -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant
2938 .  -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR
2939 .  -mat_mumps_cntl_1  - CNTL(1): relative pivoting threshold
2940 .  -mat_mumps_cntl_2  -  CNTL(2): stopping criterion of refinement
2941 .  -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold
2942 .  -mat_mumps_cntl_4 - CNTL(4): value for static pivoting
2943 .  -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots
2944 .  -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization
2945 -  -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS.
2946                                    Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual.
2947 
2948   Level: beginner
2949 
2950     Notes:
2951     MUMPS Cholesky does not handle (complex) Hermitian matrices http://mumps.enseeiht.fr/doc/userguide_5.2.1.pdf so using it will error if the matrix is Hermitian.
2952 
2953     When used within a `KSP`/`PC` solve the options are prefixed with that of the `PC`. Otherwise one can set the options prefix by calling
2954     `MatSetOptionsPrefixFactor()` on the matrix from which the factor was obtained or `MatSetOptionsPrefix()` on the factor matrix.
2955 
2956     When a MUMPS factorization fails inside a KSP solve, for example with a KSP_DIVERGED_PC_FAILED, one can find the MUMPS information about the failure by calling
2957 $          KSPGetPC(ksp,&pc);
2958 $          PCFactorGetMatrix(pc,&mat);
2959 $          MatMumpsGetInfo(mat,....);
2960 $          MatMumpsGetInfog(mat,....); etc.
2961            Or you can run with -ksp_error_if_not_converged and the program will be stopped and the information printed in the error message.
2962 
2963   Using MUMPS with 64-bit integers
2964     MUMPS provides 64-bit integer support in two build modes:
2965       full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and
2966       requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI).
2967 
2968       selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices,
2969       MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and
2970       columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit
2971       integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS.
2972 
2973     With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc.
2974 
2975   Two modes to run MUMPS/PETSc with OpenMP
2976 $     Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP
2977 $     threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test".
2978 
2979 $     -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example,
2980 $     if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16"
2981 
2982    To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part
2983    (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with --with-openmp --download-hwloc
2984    (or --with-hwloc), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS
2985    libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS
2986    (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided).
2987 
2988    If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI
2989    processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of
2990    size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm
2991    are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set
2992    by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs.
2993    In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets,
2994    if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind
2995    MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half
2996    cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the
2997    problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding.
2998    For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and
2999    examine the mapping result.
3000 
3001    PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set OMP_PROC_BIND and OMP_PLACES in job scripts,
3002    for example, export OMP_PLACES=threads and export OMP_PROC_BIND=spread. One does not need to export OMP_NUM_THREADS=m in job scripts as PETSc
3003    calls omp_set_num_threads(m) internally before calling MUMPS.
3004 
3005    References:
3006 +  * - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011).
3007 -  * - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017.
3008 
3009 .seealso: `PCFactorSetMatSolverType()`, `MatSolverType`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`, `KSPGetPC()`, `PCFactorGetMatrix()`
3010 
3011 M*/
3012 
3013 static PetscErrorCode MatFactorGetSolverType_mumps(Mat A,MatSolverType *type)
3014 {
3015   PetscFunctionBegin;
3016   *type = MATSOLVERMUMPS;
3017   PetscFunctionReturn(0);
3018 }
3019 
3020 /* MatGetFactor for Seq and MPI AIJ matrices */
3021 static PetscErrorCode MatGetFactor_aij_mumps(Mat A,MatFactorType ftype,Mat *F)
3022 {
3023   Mat            B;
3024   Mat_MUMPS      *mumps;
3025   PetscBool      isSeqAIJ;
3026   PetscMPIInt    size;
3027 
3028   PetscFunctionBegin;
3029  #if defined(PETSC_USE_COMPLEX)
3030   PetscCheck(A->hermitian != PETSC_BOOL3_TRUE || A->symmetric == PETSC_BOOL3_TRUE || ftype != MAT_FACTOR_CHOLESKY,PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian CHOLESKY Factor is not supported");
3031  #endif
3032   /* Create the factorization matrix */
3033   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ));
3034   PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B));
3035   PetscCall(MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N));
3036   PetscCall(PetscStrallocpy("mumps",&((PetscObject)B)->type_name));
3037   PetscCall(MatSetUp(B));
3038 
3039   PetscCall(PetscNewLog(B,&mumps));
3040 
3041   B->ops->view    = MatView_MUMPS;
3042   B->ops->getinfo = MatGetInfo_MUMPS;
3043 
3044   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps));
3045   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS));
3046   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS));
3047   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS));
3048   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS));
3049   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS));
3050   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS));
3051   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS));
3052   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS));
3053   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS));
3054   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS));
3055   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS));
3056   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS));
3057 
3058   if (ftype == MAT_FACTOR_LU) {
3059     B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
3060     B->factortype            = MAT_FACTOR_LU;
3061     if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij;
3062     else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij;
3063     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_LU]));
3064     mumps->sym = 0;
3065   } else {
3066     B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
3067     B->factortype                  = MAT_FACTOR_CHOLESKY;
3068     if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij;
3069     else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij;
3070     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_CHOLESKY]));
3071 #if defined(PETSC_USE_COMPLEX)
3072     mumps->sym = 2;
3073 #else
3074     if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1;
3075     else                            mumps->sym = 2;
3076 #endif
3077   }
3078 
3079   /* set solvertype */
3080   PetscCall(PetscFree(B->solvertype));
3081   PetscCall(PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype));
3082   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
3083   if (size == 1) {
3084     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3085     B->canuseordering = PETSC_TRUE;
3086   }
3087   B->ops->destroy = MatDestroy_MUMPS;
3088   B->data         = (void*)mumps;
3089 
3090   *F = B;
3091   mumps->id.job = JOB_NULL;
3092   mumps->ICNTL_pre = NULL;
3093   mumps->CNTL_pre = NULL;
3094   mumps->matstruc = DIFFERENT_NONZERO_PATTERN;
3095   PetscFunctionReturn(0);
3096 }
3097 
3098 /* MatGetFactor for Seq and MPI SBAIJ matrices */
3099 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A,MatFactorType ftype,Mat *F)
3100 {
3101   Mat            B;
3102   Mat_MUMPS      *mumps;
3103   PetscBool      isSeqSBAIJ;
3104   PetscMPIInt    size;
3105 
3106   PetscFunctionBegin;
3107  #if defined(PETSC_USE_COMPLEX)
3108   PetscCheck(A->hermitian != PETSC_BOOL3_TRUE || A->symmetric == PETSC_BOOL3_TRUE,PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian CHOLESKY Factor is not supported");
3109  #endif
3110   PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B));
3111   PetscCall(MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N));
3112   PetscCall(PetscStrallocpy("mumps",&((PetscObject)B)->type_name));
3113   PetscCall(MatSetUp(B));
3114 
3115   PetscCall(PetscNewLog(B,&mumps));
3116   PetscCall(PetscObjectTypeCompare((PetscObject)A,MATSEQSBAIJ,&isSeqSBAIJ));
3117   if (isSeqSBAIJ) {
3118     mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij;
3119   } else {
3120     mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij;
3121   }
3122 
3123   B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
3124   B->ops->view                   = MatView_MUMPS;
3125   B->ops->getinfo                = MatGetInfo_MUMPS;
3126 
3127   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps));
3128   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS));
3129   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS));
3130   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS));
3131   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS));
3132   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS));
3133   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS));
3134   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS));
3135   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS));
3136   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS));
3137   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS));
3138   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS));
3139   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS));
3140 
3141   B->factortype = MAT_FACTOR_CHOLESKY;
3142 #if defined(PETSC_USE_COMPLEX)
3143   mumps->sym = 2;
3144 #else
3145   if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1;
3146   else                            mumps->sym = 2;
3147 #endif
3148 
3149   /* set solvertype */
3150   PetscCall(PetscFree(B->solvertype));
3151   PetscCall(PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype));
3152   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
3153   if (size == 1) {
3154     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3155     B->canuseordering = PETSC_TRUE;
3156   }
3157   PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_CHOLESKY]));
3158   B->ops->destroy = MatDestroy_MUMPS;
3159   B->data         = (void*)mumps;
3160 
3161   *F = B;
3162   mumps->id.job = JOB_NULL;
3163   mumps->ICNTL_pre = NULL;
3164   mumps->CNTL_pre = NULL;
3165   mumps->matstruc = DIFFERENT_NONZERO_PATTERN;
3166   PetscFunctionReturn(0);
3167 }
3168 
3169 static PetscErrorCode MatGetFactor_baij_mumps(Mat A,MatFactorType ftype,Mat *F)
3170 {
3171   Mat            B;
3172   Mat_MUMPS      *mumps;
3173   PetscBool      isSeqBAIJ;
3174   PetscMPIInt    size;
3175 
3176   PetscFunctionBegin;
3177   /* Create the factorization matrix */
3178   PetscCall(PetscObjectTypeCompare((PetscObject)A,MATSEQBAIJ,&isSeqBAIJ));
3179   PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B));
3180   PetscCall(MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N));
3181   PetscCall(PetscStrallocpy("mumps",&((PetscObject)B)->type_name));
3182   PetscCall(MatSetUp(B));
3183 
3184   PetscCall(PetscNewLog(B,&mumps));
3185   if (ftype == MAT_FACTOR_LU) {
3186     B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS;
3187     B->factortype            = MAT_FACTOR_LU;
3188     if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij;
3189     else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij;
3190     mumps->sym = 0;
3191     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_LU]));
3192   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead");
3193 
3194   B->ops->view        = MatView_MUMPS;
3195   B->ops->getinfo     = MatGetInfo_MUMPS;
3196 
3197   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps));
3198   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS));
3199   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS));
3200   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS));
3201   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS));
3202   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS));
3203   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS));
3204   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS));
3205   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS));
3206   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS));
3207   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS));
3208   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS));
3209   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS));
3210 
3211   /* set solvertype */
3212   PetscCall(PetscFree(B->solvertype));
3213   PetscCall(PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype));
3214   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
3215   if (size == 1) {
3216     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3217     B->canuseordering = PETSC_TRUE;
3218   }
3219   B->ops->destroy = MatDestroy_MUMPS;
3220   B->data         = (void*)mumps;
3221 
3222   *F = B;
3223   mumps->id.job = JOB_NULL;
3224   mumps->ICNTL_pre = NULL;
3225   mumps->CNTL_pre = NULL;
3226   mumps->matstruc = DIFFERENT_NONZERO_PATTERN;
3227   PetscFunctionReturn(0);
3228 }
3229 
3230 /* MatGetFactor for Seq and MPI SELL matrices */
3231 static PetscErrorCode MatGetFactor_sell_mumps(Mat A,MatFactorType ftype,Mat *F)
3232 {
3233   Mat            B;
3234   Mat_MUMPS      *mumps;
3235   PetscBool      isSeqSELL;
3236   PetscMPIInt    size;
3237 
3238   PetscFunctionBegin;
3239   /* Create the factorization matrix */
3240   PetscCall(PetscObjectTypeCompare((PetscObject)A,MATSEQSELL,&isSeqSELL));
3241   PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B));
3242   PetscCall(MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N));
3243   PetscCall(PetscStrallocpy("mumps",&((PetscObject)B)->type_name));
3244   PetscCall(MatSetUp(B));
3245 
3246   PetscCall(PetscNewLog(B,&mumps));
3247 
3248   B->ops->view        = MatView_MUMPS;
3249   B->ops->getinfo     = MatGetInfo_MUMPS;
3250 
3251   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps));
3252   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS));
3253   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS));
3254   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS));
3255   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS));
3256   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS));
3257   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS));
3258   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS));
3259   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS));
3260   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS));
3261   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS));
3262 
3263   if (ftype == MAT_FACTOR_LU) {
3264     B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
3265     B->factortype            = MAT_FACTOR_LU;
3266     if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij;
3267     else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented");
3268     mumps->sym = 0;
3269     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_LU]));
3270   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented");
3271 
3272   /* set solvertype */
3273   PetscCall(PetscFree(B->solvertype));
3274   PetscCall(PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype));
3275   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
3276   if (size == 1) {
3277     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization  */
3278     B->canuseordering = PETSC_TRUE;
3279   }
3280   B->ops->destroy = MatDestroy_MUMPS;
3281   B->data         = (void*)mumps;
3282 
3283   *F = B;
3284   mumps->id.job = JOB_NULL;
3285   mumps->ICNTL_pre = NULL;
3286   mumps->CNTL_pre = NULL;
3287   mumps->matstruc = DIFFERENT_NONZERO_PATTERN;
3288   PetscFunctionReturn(0);
3289 }
3290 
3291 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void)
3292 {
3293   PetscFunctionBegin;
3294   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps));
3295   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps));
3296   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps));
3297   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps));
3298   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATMPISBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps));
3299   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps));
3300   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps));
3301   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps));
3302   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps));
3303   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps));
3304   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSELL,MAT_FACTOR_LU,MatGetFactor_sell_mumps));
3305   PetscFunctionReturn(0);
3306 }
3307